To analyze the allocation of swap blocks by blist functions, add a method

for analyzing the radix tree structures and reporting on the number, and
sizes, of maximal intervals of free blocks.  The report includes the number
of maximal intervals, and also the number of them in each of several size
ranges, from small (size 1, or 3 to 4) to large (28657 to 46367) with size
boundaries defined by Fibonacci numbers.  The report is written in the test
tool with the 's' command, or in a running kernel by sysctl.

The analysis of the radix tree frequently computes the position of the lone
bit set in a u_daddr_t, a computation that also appears in leaf allocation.
That computation has been moved into a function of its own, and optimized
for cases where an inlined machine instruction can replace the usual binary
search.

Submitted by:	Doug Moore <dougm@rice.edu>
MFC after:	1 week
Differential Revision:	https://reviews.freebsd.org/D11906
This commit is contained in:
Alan Cox 2017-09-10 17:46:03 +00:00
parent 008a09355b
commit d027ed2e7a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=323391
3 changed files with 269 additions and 12 deletions

View File

@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/blist.h>
#include <sys/malloc.h>
#include <sys/sbuf.h>
#include <sys/proc.h>
#include <sys/mutex.h>
@ -101,6 +102,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/sbuf.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@ -144,6 +146,7 @@ static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
#endif
CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0);
#define BLIST_META_MASK (BLIST_META_RADIX - 1)
/*
* For a subtree that can represent the state of up to 'radix' blocks, the
@ -166,7 +169,35 @@ radix_to_skip(daddr_t radix)
{
return (radix /
((BLIST_BMAP_RADIX / BLIST_META_RADIX) * (BLIST_META_RADIX - 1)));
((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
}
/*
* Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
* Assumes that the argument has only one bit set.
*/
static inline int
bitpos(u_daddr_t mask)
{
int hi, lo, mid;
switch (sizeof(mask)) {
#ifdef HAVE_INLINE_FFSLL
case sizeof(long long):
return (ffsll(mask) - 1);
#endif
default:
lo = 0;
hi = BLIST_BMAP_RADIX;
while (lo + 1 < hi) {
mid = (lo + hi) >> 1;
if ((mask >> mid) != 0)
lo = mid;
else
hi = mid;
}
return (lo);
}
}
/*
@ -340,6 +371,192 @@ blist_print(blist_t bl)
#endif
static const u_daddr_t fib[] = {
1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
514229, 832040, 1346269, 2178309, 3524578,
};
/*
* Use 'gap' to describe a maximal range of unallocated blocks/bits.
*/
struct gap_stats {
daddr_t start; /* current gap start, or SWAPBLK_NONE */
daddr_t num; /* number of gaps observed */
daddr_t max; /* largest gap size */
daddr_t avg; /* average gap size */
daddr_t err; /* sum - num * avg */
daddr_t histo[nitems(fib)]; /* # gaps in each size range */
int max_bucket; /* last histo elt with nonzero val */
};
/*
* gap_stats_counting() - is the state 'counting 1 bits'?
* or 'skipping 0 bits'?
*/
static inline bool
gap_stats_counting(const struct gap_stats *stats)
{
return (stats->start != SWAPBLK_NONE);
}
/*
* init_gap_stats() - initialize stats on gap sizes
*/
static inline void
init_gap_stats(struct gap_stats *stats)
{
bzero(stats, sizeof(*stats));
stats->start = SWAPBLK_NONE;
}
/*
* update_gap_stats() - update stats on gap sizes
*/
static void
update_gap_stats(struct gap_stats *stats, daddr_t posn)
{
daddr_t size;
int hi, lo, mid;
if (!gap_stats_counting(stats)) {
stats->start = posn;
return;
}
size = posn - stats->start;
stats->start = SWAPBLK_NONE;
if (size > stats->max)
stats->max = size;
/*
* Find the fibonacci range that contains size,
* expecting to find it in an early range.
*/
lo = 0;
hi = 1;
while (hi < nitems(fib) && fib[hi] <= size) {
lo = hi;
hi *= 2;
}
if (hi >= nitems(fib))
hi = nitems(fib);
while (lo + 1 != hi) {
mid = (lo + hi) >> 1;
if (fib[mid] <= size)
lo = mid;
else
hi = mid;
}
stats->histo[lo]++;
if (lo > stats->max_bucket)
stats->max_bucket = lo;
stats->err += size - stats->avg;
stats->num++;
stats->avg += stats->err / stats->num;
stats->err %= stats->num;
}
/*
* dump_gap_stats() - print stats on gap sizes
*/
static inline void
dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
{
int i;
sbuf_printf(s, "number of maximal free ranges: %jd\n",
(intmax_t)stats->num);
sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
sbuf_printf(s, "average maximal free range size: %jd\n",
(intmax_t)stats->avg);
sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
sbuf_printf(s, " count | size range\n");
sbuf_printf(s, " ----- | ----------\n");
for (i = 0; i < stats->max_bucket; i++) {
if (stats->histo[i] != 0) {
sbuf_printf(s, "%20jd | ",
(intmax_t)stats->histo[i]);
if (fib[i] != fib[i + 1] - 1)
sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
(intmax_t)fib[i + 1] - 1);
else
sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
}
}
sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
if (stats->histo[i] > 1)
sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
(intmax_t)stats->max);
else
sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
}
/*
* blist_stats() - dump radix tree stats
*/
void
blist_stats(blist_t bl, struct sbuf *s)
{
struct gap_stats gstats;
struct gap_stats *stats = &gstats;
daddr_t i, nodes, radix;
u_daddr_t bit, diff, mask;
init_gap_stats(stats);
nodes = 0;
i = bl->bl_radix;
while (i < bl->bl_radix + bl->bl_blocks) {
/*
* Find max size subtree starting at i.
*/
radix = BLIST_BMAP_RADIX;
while (((i / radix) & BLIST_META_MASK) == 0)
radix *= BLIST_META_RADIX;
/*
* Check for skippable subtrees starting at i.
*/
while (radix > BLIST_BMAP_RADIX) {
if (bl->bl_root[nodes].u.bmu_avail == 0) {
if (gap_stats_counting(stats))
update_gap_stats(stats, i);
break;
}
if (bl->bl_root[nodes].u.bmu_avail == radix) {
if (!gap_stats_counting(stats))
update_gap_stats(stats, i);
break;
}
/*
* Skip subtree root.
*/
nodes++;
radix /= BLIST_META_RADIX;
}
if (radix == BLIST_BMAP_RADIX) {
/*
* Scan leaf.
*/
mask = bl->bl_root[nodes].u.bmu_bitmap;
diff = mask ^ (mask << 1);
if (gap_stats_counting(stats))
diff ^= 1;
while (diff != 0) {
bit = diff & -diff;
update_gap_stats(stats, i + bitpos(bit));
diff ^= bit;
}
}
nodes += radix_to_skip(radix);
i += radix;
}
update_gap_stats(stats, i);
dump_gap_stats(stats, s);
}
/************************************************************************
* ALLOCATION SUPPORT FUNCTIONS *
************************************************************************
@ -355,13 +572,13 @@ blist_print(blist_t bl)
*
* This is the core of the allocator and is optimized for the
* BLIST_BMAP_RADIX block allocation case. Otherwise, execution
* time is proportional to log2(count) + log2(BLIST_BMAP_RADIX).
* time is proportional to log2(count) + bitpos time.
*/
static daddr_t
blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
{
u_daddr_t mask;
int count1, hi, lo, mid, num_shifts, range1, range_ext;
int count1, lo, num_shifts, range1, range_ext;
if (count == BLIST_BMAP_RADIX) {
/*
@ -419,17 +636,10 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
/*
* The least significant set bit in mask marks the start of the first
* available range of sufficient size. Clear all the bits but that one,
* and then perform a binary search to find its position.
* and then find its position.
*/
mask &= -mask;
hi = BLIST_BMAP_RADIX - count1;
while (lo + 1 < hi) {
mid = (lo + hi) >> 1;
if ((mask >> mid) != 0)
lo = mid;
else
hi = mid;
}
lo = bitpos(mask);
/*
* Set in mask exactly the bits being allocated, and clear them from
@ -980,6 +1190,7 @@ main(int ac, char **av)
int size = 1024;
int i;
blist_t bl;
struct sbuf *s;
for (i = 1; i < ac; ++i) {
const char *ptr = av[i];
@ -1014,6 +1225,13 @@ main(int ac, char **av)
case 'p':
blist_print(bl);
break;
case 's':
s = sbuf_new_auto();
blist_stats(bl, s);
sbuf_finish(s);
printf("%s", sbuf_data(s));
sbuf_delete(s);
break;
case 'a':
if (sscanf(buf + 1, "%lld", &count) == 1) {
daddr_t blk = blist_alloc(bl, count);
@ -1041,6 +1259,7 @@ main(int ac, char **av)
case 'h':
puts(
"p -print\n"
"s -stats\n"
"a %d -allocate\n"
"f %x %d -free\n"
"l %x %d -fill\n"

View File

@ -90,6 +90,8 @@ typedef struct blist {
#define BLIST_MAX_ALLOC BLIST_BMAP_RADIX
struct sbuf;
daddr_t blist_alloc(blist_t blist, daddr_t count);
daddr_t blist_avail(blist_t blist);
blist_t blist_create(daddr_t blocks, int flags);
@ -98,6 +100,7 @@ daddr_t blist_fill(blist_t bl, daddr_t blkno, daddr_t count);
void blist_free(blist_t blist, daddr_t blkno, daddr_t count);
void blist_print(blist_t blist);
void blist_resize(blist_t *pblist, daddr_t count, int freenew, int flags);
void blist_stats(blist_t blist, struct sbuf *s);
#endif /* _SYS_BLIST_H_ */

View File

@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/blist.h>
@ -323,6 +324,10 @@ static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW |
CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
"Maximum running async swap ops");
static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, swap_fragmentation, CTLTYPE_STRING | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_fragmentation, "A",
"Swap Fragmentation Info");
static struct sx sw_alloc_sx;
@ -779,6 +784,36 @@ swp_pager_freeswapspace(daddr_t blk, int npages)
panic("Swapdev not found");
}
/*
* SYSCTL_SWAP_FRAGMENTATION() - produce raw swap space stats
*/
static int
sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS)
{
struct sbuf sbuf;
struct swdevt *sp;
const char *devname;
int error;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
mtx_lock(&sw_dev_mtx);
TAILQ_FOREACH(sp, &swtailq, sw_list) {
if (vn_isdisk(sp->sw_vp, NULL))
devname = devtoname(sp->sw_vp->v_rdev);
else
devname = "[file]";
sbuf_printf(&sbuf, "\nFree space on device %s:\n", devname);
blist_stats(sp->sw_blist, &sbuf);
}
mtx_unlock(&sw_dev_mtx);
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
return (error);
}
/*
* SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page
* range within an object.