freebsd-dev/sys/cddl/compat/opensolaris/kern/opensolaris_kstat.c
Alexander Motin 3aef5b286a MFV r315290, r315291: 7303 dynamic metaslab selection
illumos/illumos-gate@8363e80ae7
https://github.com/illumos/illumos-gate/commit/8363e80ae72609660f6090766ca8c2c18

https://www.illumos.org/issues/7303

  This change introduces a new weighting algorithm to improve metaslab selection.
  The new weighting algorithm relies on the SPACEMAP_HISTOGRAM feature. As a result,
  the metaslab weight now encodes the type of weighting algorithm used
  (size-based vs segment-based).

  This also introduce a new allocation tracing facility and two new dcmds to help
  debug allocation problems. Each zio now contains a zio_alloc_list_t structure
  that is populated as the zio goes through the allocations stage. Here's an
  example of how to use the tracing facility:

> c5ec000::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
  MSID    DVA    ASIZE      WEIGHT             RESULT               VDEV
     -      0      400           0    NOT_ALLOCATABLE           ztest.0a
     -      0      400           0    NOT_ALLOCATABLE           ztest.0a
     -      0      400           0             ENOSPC           ztest.0a
     -      0      200           0    NOT_ALLOCATABLE           ztest.0a
     -      0      200           0    NOT_ALLOCATABLE           ztest.0a
     -      0      200           0             ENOSPC           ztest.0a
     1      0      400      1 x 8M            17b1a00           ztest.0a

> 1ff2400::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
  MSID    DVA    ASIZE      WEIGHT             RESULT               VDEV
     -      0      200           0    NOT_ALLOCATABLE           mirror-2
     -      0      200           0    NOT_ALLOCATABLE           mirror-0
     1      0      200      1 x 4M            112ae00           mirror-1
     -      1      200           0    NOT_ALLOCATABLE           mirror-2
     -      1      200           0    NOT_ALLOCATABLE           mirror-0
     1      1      200      1 x 4M            112b000           mirror-1
     -      2      200           0    NOT_ALLOCATABLE           mirror-2

  If the metaslab is using segment-based weighting then the WEIGHT column will
  display the number of segments available in the bucket where the allocation
  attempt was made.

Author: George Wilson <george.wilson@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: Chris Siden <christopher.siden@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
2017-03-24 09:37:00 +00:00

148 lines
4.3 KiB
C

/*-
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <sys/kstat.h>
static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
kstat_t *
kstat_create(char *module, int instance, char *name, char *class, uchar_t type,
ulong_t ndata, uchar_t flags)
{
struct sysctl_oid *root;
kstat_t *ksp;
KASSERT(instance == 0, ("instance=%d", instance));
KASSERT(type == KSTAT_TYPE_NAMED, ("type=%hhu", type));
KASSERT(flags == KSTAT_FLAG_VIRTUAL, ("flags=%02hhx", flags));
/*
* Allocate the main structure. We don't need to copy module/class/name
* stuff in here, because it is only used for sysctl node creation
* done in this function.
*/
ksp = malloc(sizeof(*ksp), M_KSTAT, M_WAITOK);
ksp->ks_ndata = ndata;
/*
* Create sysctl tree for those statistics:
*
* kstat.<module>.<class>.<name>.
*/
sysctl_ctx_init(&ksp->ks_sysctl_ctx);
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
"");
if (root == NULL) {
printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
OID_AUTO, class, CTLFLAG_RW, 0, "");
if (root == NULL) {
printf("%s: Cannot create kstat.%s.%s tree!\n", __func__,
module, class);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
OID_AUTO, name, CTLFLAG_RW, 0, "");
if (root == NULL) {
printf("%s: Cannot create kstat.%s.%s.%s tree!\n", __func__,
module, class, name);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
ksp->ks_sysctl_root = root;
return (ksp);
}
static int
kstat_sysctl(SYSCTL_HANDLER_ARGS)
{
kstat_named_t *ksent = arg1;
uint64_t val;
val = ksent->value.ui64;
return sysctl_handle_64(oidp, &val, 0, req);
}
void
kstat_install(kstat_t *ksp)
{
kstat_named_t *ksent;
u_int i;
ksent = ksp->ks_data;
for (i = 0; i < ksp->ks_ndata; i++, ksent++) {
KASSERT(ksent->data_type == KSTAT_DATA_UINT64,
("data_type=%d", ksent->data_type));
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root), OID_AUTO, ksent->name,
CTLTYPE_U64 | CTLFLAG_RD, ksent, sizeof(*ksent),
kstat_sysctl, "QU", ksent->desc);
}
}
void
kstat_delete(kstat_t *ksp)
{
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
}
void
kstat_set_string(char *dst, const char *src)
{
bzero(dst, KSTAT_STRLEN);
(void) strncpy(dst, src, KSTAT_STRLEN - 1);
}
void
kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
{
kstat_set_string(knp->name, name);
knp->data_type = data_type;
}