A new parameter to blist_alloc specifies an upper bound on the size of

the allocation request, so that the blocks allocated are from the next
set of free blocks big enough to satisfy the minimum requirements of
the request, and the number of blocks allocated are as many as
possible, up to the specified maximum. The implementation of
swp_pager_getswapspace uses this parameter to ask for a number of
blocks between the new halved request size and the previous failed
request size. Thus a request for 32 blocks may fail, but instead of
getting only 16 blocks instead, the caller asks for 16 to 31 next, and
might get 19 or 27, which is closer to what they originally wanted.

I expect this to lead to bigger block allocations and less block
fragmentation, at least in some cases.

Approved by: kib (mentor)
Differential Revision: https://reviews.freebsd.org/D20001
This commit is contained in:
dougm 2019-05-11 16:15:13 +00:00
parent d2bcd1b66d
commit 24c307c3c0
3 changed files with 84 additions and 64 deletions

View File

@ -130,9 +130,10 @@ __FBSDID("$FreeBSD$");
/* /*
* static support functions * static support functions
*/ */
static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk,
static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, int *count, int maxcount);
u_daddr_t radix); static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
int maxcount, u_daddr_t radix);
static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
u_daddr_t radix); u_daddr_t radix);
@ -293,12 +294,14 @@ blist_destroy(blist_t bl)
* not be allocated. * not be allocated.
*/ */
daddr_t daddr_t
blist_alloc(blist_t bl, daddr_t count) blist_alloc(blist_t bl, int *count, int maxcount)
{ {
daddr_t blk, cursor; daddr_t blk, cursor;
KASSERT(count <= BLIST_MAX_ALLOC, KASSERT(*count <= maxcount,
("allocation too large: %d", (int)count)); ("invalid parameters %d > %d", *count, maxcount));
KASSERT(maxcount <= BLIST_MAX_ALLOC,
("allocation too large: %d", maxcount));
/* /*
* This loop iterates at most twice. An allocation failure in the * This loop iterates at most twice. An allocation failure in the
@ -306,19 +309,18 @@ blist_alloc(blist_t bl, daddr_t count)
* non-zero. When the cursor is zero, an allocation failure will * non-zero. When the cursor is zero, an allocation failure will
* stop further iterations. * stop further iterations.
*/ */
cursor = bl->bl_cursor; for (cursor = bl->bl_cursor;; cursor = 0) {
for (;;) { blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
blk = blst_meta_alloc(bl->bl_root, cursor, count,
bl->bl_radix); bl->bl_radix);
if (blk != SWAPBLK_NONE) { if (blk != SWAPBLK_NONE) {
bl->bl_avail -= count; bl->bl_avail -= *count;
bl->bl_cursor = blk + count; bl->bl_cursor = blk + *count;
if (bl->bl_cursor == bl->bl_blocks) if (bl->bl_cursor == bl->bl_blocks)
bl->bl_cursor = 0; bl->bl_cursor = 0;
return (blk); return (blk);
} else if (cursor == 0) }
if (cursor == 0)
return (SWAPBLK_NONE); return (SWAPBLK_NONE);
cursor = 0;
} }
} }
@ -615,29 +617,34 @@ blist_stats(blist_t bl, struct sbuf *s)
* common ancestor to mark any subtrees made completely empty. * common ancestor to mark any subtrees made completely empty.
*/ */
static int static int
blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, int maxcount)
{ {
blmeta_t *next; blmeta_t *next;
u_daddr_t radix; u_daddr_t radix;
int digit; int avail, digit;
next = scan + 1; next = scan + 1;
blk += BLIST_BMAP_RADIX; blk += BLIST_BMAP_RADIX;
radix = BLIST_BMAP_RADIX; radix = BLIST_BMAP_RADIX;
while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 && while ((next->bm_bitmap & 1) == 1 &&
(next->bm_bitmap & 1) == 1) { (digit = ((blk / radix) & BLIST_META_MASK)) == 0) {
next++; next++;
radix *= BLIST_META_RADIX; radix *= BLIST_META_RADIX;
} }
if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) { if ((next->bm_bitmap & 1) != 1)
/* return (0);
* The next leaf doesn't have enough free blocks at the avail = (~next->bm_bitmap != 0) ?
* beginning to complete the spanning allocation. bitpos(~next->bm_bitmap) : BLIST_BMAP_RADIX;
*/ if (avail < count) {
return (ENOMEM); /*
* The next leaf doesn't have enough free blocks at the
* beginning to complete the spanning allocation.
*/
return (0);
} }
count = imin(avail, maxcount);
/* Clear the first 'count' bits in the next leaf to allocate. */ /* Clear the first 'count' bits in the next leaf to allocate. */
next->bm_bitmap &= (u_daddr_t)-1 << count; next->bm_bitmap &= ~bitrange(0, count);
/* /*
* Update bitmaps of next-ancestors, up to least common ancestor. * Update bitmaps of next-ancestors, up to least common ancestor.
@ -650,7 +657,7 @@ blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
} }
next->bm_bitmap ^= 1; next->bm_bitmap ^= 1;
} }
return (0); return (count);
} }
/* /*
@ -674,13 +681,13 @@ flip_hibits(u_daddr_t mask)
* crosses a leaf boundary. * crosses a leaf boundary.
*/ */
static daddr_t static daddr_t
blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount)
{ {
u_daddr_t cursor_mask, mask; u_daddr_t cursor_mask, mask;
int count1, hi, lo, num_shifts, range1, range_ext; int count1, hi, lo, num_shifts, range1, range_ext;
range1 = 0; range1 = 0;
count1 = count - 1; count1 = *count - 1;
num_shifts = fls(count1); num_shifts = fls(count1);
mask = scan->bm_bitmap; mask = scan->bm_bitmap;
while (flip_hibits(mask) != 0 && num_shifts > 0) { while (flip_hibits(mask) != 0 && num_shifts > 0) {
@ -735,40 +742,50 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
/* /*
* The least significant set bit in mask marks the start of the first * The least significant set bit in mask marks the start of the first
* available range of sufficient size. Clear all the bits but that one, * available range of sufficient size. Find its position.
* and then find its position.
*/ */
mask &= -mask;
lo = bitpos(mask); lo = bitpos(mask);
hi = lo + count; /*
if (hi > BLIST_BMAP_RADIX) { * Find how much space is available starting at that position.
/* */
* An allocation within this leaf is impossible, so a successful if (flip_hibits(mask) != 0) {
* allocation depends on the next leaf providing some of the blocks. /* Count the 1 bits starting at position lo. */
*/ hi = bitpos(flip_hibits(mask)) + count1;
if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0) if (maxcount < hi - lo)
hi = lo + maxcount;
*count = hi - lo;
mask = bitrange(lo, *count);
} else if (maxcount <= BLIST_BMAP_RADIX - lo) {
/* All the blocks we can use are available here. */
hi = lo + maxcount;
*count = maxcount;
mask = bitrange(lo, *count);
} else {
/* Check next leaf for some of the blocks we want or need. */
count1 = *count - (BLIST_BMAP_RADIX - lo);
maxcount -= BLIST_BMAP_RADIX - lo;
hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
if (hi < count1)
/* /*
* The hint cannot be updated, because the same * The next leaf cannot supply enough blocks to reach
* allocation request could be satisfied later, by this * the minimum required allocation. The hint cannot be
* leaf, if the state of the next leaf changes, and * updated, because the same allocation request could
* without any changes to this leaf. * be satisfied later, by this leaf, if the state of
* the next leaf changes, and without any changes to
* this leaf.
*/ */
return (SWAPBLK_NONE); return (SWAPBLK_NONE);
*count = BLIST_BMAP_RADIX - lo + hi;
hi = BLIST_BMAP_RADIX; hi = BLIST_BMAP_RADIX;
} }
/* Set the bits of mask at position 'lo' and higher. */
mask = -mask;
if (hi == BLIST_BMAP_RADIX) { if (hi == BLIST_BMAP_RADIX) {
/* /*
* Update bighint. There is no allocation bigger than range1 * Update bighint. There is no allocation bigger than range1
* available in this leaf after this allocation completes. * available in this leaf after this allocation completes.
*/ */
scan->bm_bighint = range1; scan->bm_bighint = range1;
} else {
/* Clear the bits of mask at position 'hi' and higher. */
mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
} }
/* Clear the allocated bits from this leaf. */ /* Clear the allocated bits from this leaf. */
scan->bm_bitmap &= ~mask; scan->bm_bitmap &= ~mask;
@ -784,7 +801,8 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
* and we have a few optimizations strewn in as well. * and we have a few optimizations strewn in as well.
*/ */
static daddr_t static daddr_t
blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix) blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
int maxcount, u_daddr_t radix)
{ {
daddr_t blk, i, r, skip; daddr_t blk, i, r, skip;
u_daddr_t mask; u_daddr_t mask;
@ -792,7 +810,7 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
int digit; int digit;
if (radix == BLIST_BMAP_RADIX) if (radix == BLIST_BMAP_RADIX)
return (blst_leaf_alloc(scan, cursor, count)); return (blst_leaf_alloc(scan, cursor, count, maxcount));
blk = cursor & -radix; blk = cursor & -radix;
scan_from_start = (cursor == blk); scan_from_start = (cursor == blk);
radix /= BLIST_META_RADIX; radix /= BLIST_META_RADIX;
@ -821,12 +839,12 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
do { do {
digit = bitpos(mask); digit = bitpos(mask);
i = 1 + digit * skip; i = 1 + digit * skip;
if (count <= scan[i].bm_bighint) { if (*count <= scan[i].bm_bighint) {
/* /*
* The allocation might fit beginning in the i'th subtree. * The allocation might fit beginning in the i'th subtree.
*/ */
r = blst_meta_alloc(&scan[i], cursor + digit * radix, r = blst_meta_alloc(&scan[i], cursor + digit * radix,
count, radix); count, maxcount, radix);
if (r != SWAPBLK_NONE) { if (r != SWAPBLK_NONE) {
if (scan[i].bm_bitmap == 0) if (scan[i].bm_bitmap == 0)
scan->bm_bitmap ^= bitrange(digit, 1); scan->bm_bitmap ^= bitrange(digit, 1);
@ -842,7 +860,7 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
*/ */
if (scan_from_start && !(digit == BLIST_META_RADIX - 1 && if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
scan[i].bm_bighint == BLIST_MAX_ALLOC)) scan[i].bm_bighint == BLIST_MAX_ALLOC))
scan->bm_bighint = count - 1; scan->bm_bighint = *count - 1;
return (SWAPBLK_NONE); return (SWAPBLK_NONE);
} }
@ -1101,7 +1119,7 @@ main(int ac, char **av)
for (;;) { for (;;) {
char buf[1024]; char buf[1024];
long long da = 0; long long da = 0;
long long count = 0; int count = 0, maxcount = 0;
printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
(long long)size, (long long)bl->bl_radix); (long long)size, (long long)bl->bl_radix);
@ -1110,7 +1128,7 @@ main(int ac, char **av)
break; break;
switch(buf[0]) { switch(buf[0]) {
case 'r': case 'r':
if (sscanf(buf + 1, "%lld", &count) == 1) { if (sscanf(buf + 1, "%d", &count) == 1) {
blist_resize(&bl, count, 1, M_WAITOK); blist_resize(&bl, count, 1, M_WAITOK);
} else { } else {
printf("?\n"); printf("?\n");
@ -1126,22 +1144,23 @@ main(int ac, char **av)
sbuf_delete(s); sbuf_delete(s);
break; break;
case 'a': case 'a':
if (sscanf(buf + 1, "%lld", &count) == 1) { if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) {
daddr_t blk = blist_alloc(bl, count); daddr_t blk = blist_alloc(bl, &count, maxcount);
printf(" R=%08llx\n", (long long)blk); printf(" R=%08llx, c=%08d\n",
(long long)blk, count);
} else { } else {
printf("?\n"); printf("?\n");
} }
break; break;
case 'f': case 'f':
if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
blist_free(bl, da, count); blist_free(bl, da, count);
} else { } else {
printf("?\n"); printf("?\n");
} }
break; break;
case 'l': case 'l':
if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
printf(" n=%jd\n", printf(" n=%jd\n",
(intmax_t)blist_fill(bl, da, count)); (intmax_t)blist_fill(bl, da, count));
} else { } else {
@ -1153,7 +1172,7 @@ main(int ac, char **av)
puts( puts(
"p -print\n" "p -print\n"
"s -stats\n" "s -stats\n"
"a %d -allocate\n" "a %d %d -allocate\n"
"f %x %d -free\n" "f %x %d -free\n"
"l %x %d -fill\n" "l %x %d -fill\n"
"r %d -resize\n" "r %d -resize\n"

View File

@ -33,7 +33,7 @@
* Usage: * Usage:
* blist = blist_create(blocks, flags) * blist = blist_create(blocks, flags)
* (void) blist_destroy(blist) * (void) blist_destroy(blist)
* blkno = blist_alloc(blist, count) * blkno = blist_alloc(blist, &count, maxcount)
* (void) blist_free(blist, blkno, count) * (void) blist_free(blist, blkno, count)
* nblks = blist_fill(blist, blkno, count) * nblks = blist_fill(blist, blkno, count)
* (void) blist_resize(&blist, count, freeextra, flags) * (void) blist_resize(&blist, count, freeextra, flags)
@ -92,7 +92,7 @@ typedef struct blist {
struct sbuf; struct sbuf;
daddr_t blist_alloc(blist_t blist, daddr_t count); daddr_t blist_alloc(blist_t blist, int *count, int maxcount);
daddr_t blist_avail(blist_t blist); daddr_t blist_avail(blist_t blist);
blist_t blist_create(daddr_t blocks, int flags); blist_t blist_create(daddr_t blocks, int flags);
void blist_destroy(blist_t blist); void blist_destroy(blist_t blist);

View File

@ -725,23 +725,24 @@ swp_pager_getswapspace(int *io_npages, int limit)
{ {
daddr_t blk; daddr_t blk;
struct swdevt *sp; struct swdevt *sp;
int npages; int mpages, npages;
blk = SWAPBLK_NONE; blk = SWAPBLK_NONE;
npages = *io_npages; npages = mpages = *io_npages;
mtx_lock(&sw_dev_mtx); mtx_lock(&sw_dev_mtx);
sp = swdevhd; sp = swdevhd;
while (!TAILQ_EMPTY(&swtailq)) { while (!TAILQ_EMPTY(&swtailq)) {
if (sp == NULL) if (sp == NULL)
sp = TAILQ_FIRST(&swtailq); sp = TAILQ_FIRST(&swtailq);
if ((sp->sw_flags & SW_CLOSING) == 0) if ((sp->sw_flags & SW_CLOSING) == 0)
blk = blist_alloc(sp->sw_blist, npages); blk = blist_alloc(sp->sw_blist, &npages, mpages);
if (blk != SWAPBLK_NONE) if (blk != SWAPBLK_NONE)
break; break;
sp = TAILQ_NEXT(sp, sw_list); sp = TAILQ_NEXT(sp, sw_list);
if (swdevhd == sp) { if (swdevhd == sp) {
if (npages <= limit) if (npages <= limit)
break; break;
mpages = npages - 1;
npages >>= 1; npages >>= 1;
} }
} }