This is David Schultz's swapoff code which I am finally able to commit.

This should be considered highly experimental for the moment.

Submitted by:	David Schultz <dschultz@uclink.Berkeley.EDU>
MFC after:	3 weeks
This commit is contained in:
Matthew Dillon 2002-12-15 19:17:57 +00:00
parent 389d2b6e21
commit 92da00bb24
19 changed files with 633 additions and 84 deletions

View File

@ -522,6 +522,7 @@ int setruid(uid_t);
void setusershell(void);
int strtofflags(char **, u_long *, u_long *);
int swapon(const char *);
int swapoff(const char *);
int syscall(int, ...);
off_t __syscall(quad_t, ...);
int ttyslot(void);

View File

@ -131,6 +131,7 @@ MLINKS+=shmat.2 shmdt.2
MLINKS+=stat.2 fstat.2 stat.2 lstat.2
MLINKS+=statfs.2 fstatfs.2
MLINKS+=syscall.2 __syscall.2
MLINKS+=swapon.2 swapoff.2
MLINKS+=truncate.2 ftruncate.2
MLINKS+=utimes.2 futimes.2 utimes.2 lutimes.2
MLINKS+=wait.2 wait3.2 wait.2 wait4.2 wait.2 waitpid.2

View File

@ -36,14 +36,16 @@
.Dt SWAPON 2
.Os
.Sh NAME
.Nm swapon
.Nd add a swap device for interleaved paging/swapping
.Nm swapon , swapoff
.Nd control devices for interleaved paging/swapping
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In unistd.h
.Ft int
.Fn swapon "const char *special"
.Ft int
.Fn swapoff "const char *special"
.Sh DESCRIPTION
.Fn Swapon
makes the block device
@ -55,13 +57,22 @@ configuration time. The size of the swap area on
.Fa special
is calculated at the time the device is first made available
for swapping.
.Pp
The
.Fn swapoff
system call disables paging and swapping on the given device.
All associated swap metadata are deallocated, and the device
is made available for other purposes.
.Sh RETURN VALUES
If an error has occurred, a value of -1 is returned and
.Va errno
is set to indicate the error.
.Sh ERRORS
.Fn Swapon
succeeds unless:
Both
.Fn swapon
and
.Fn swapoff
can fail if:
.Bl -tag -width Er
.It Bq Er ENOTDIR
A component of the path prefix is not a directory.
@ -76,6 +87,19 @@ Search permission is denied for a component of the path prefix.
Too many symbolic links were encountered in translating the pathname.
.It Bq Er EPERM
The caller is not the super-user.
.It Bq Er EFAULT
.Fa Special
points outside the process's allocated address space.
.El
.Pp
Additionally,
.Fn swapon
can fail for the following reasons:
.Bl -tag -width Er
.It Bq Er EINVAL
The system has reached the boot-time limit on the number of
swap devices,
.Va vm.nswapdev .
.It Bq Er ENOTBLK
.Fa Special
is not a block device.
@ -84,11 +108,6 @@ The device specified by
.Fa special
has already
been made available for swapping
.It Bq Er EINVAL
The device configured by
.Fa special
was not
configured into the system as a swap device.
.It Bq Er ENXIO
The major device number of
.Fa special
@ -96,20 +115,28 @@ is out of range (this indicates no device driver exists
for the associated hardware).
.It Bq Er EIO
An I/O error occurred while opening the swap device.
.It Bq Er EFAULT
.Fa Special
points outside the process's allocated address space.
.El
.Pp
Lastly,
.Fn swapoff
can fail if:
.Bl -tag -width Er
.It Bq Er EINVAL
The system is not currently swapping to
.Fa special .
.It Bq Er ENOMEM
Not enough virtual memory is available to safely disable
paging and swapping to the given device.
.El
.Sh SEE ALSO
.Xr config 8 ,
.Xr swapon 8
.Sh BUGS
There is no way to stop swapping on a disk so that the pack may be
dismounted.
.Pp
This call will be upgraded in future versions of the system.
.Xr swapon 8 ,
.Xr sysctl 8
.Sh HISTORY
The
.Fn swapon
function call appeared in
.Bx 4.0 .
.Fn Swapoff
appeared in
.Fx 5.0 .

View File

@ -3,5 +3,7 @@
PROG= swapon
MAN= swapon.8
LINKS= ${BINDIR}/swapon ${BINDIR}/swapoff
MLINKS= swapon.8 swapoff.8
.include <bsd.prog.mk>

View File

@ -36,39 +36,46 @@
.Dt SWAPON 8
.Os
.Sh NAME
.Nm swapon
.Nd "specify additional device for paging and swapping"
.Nm swapon , swapoff
.Nd "specify devices for paging and swapping"
.Sh SYNOPSIS
.Nm
.Nm swap[on|off]
.Fl a
.Nm
.Nm swap[on|off]
.Ar special_file ...
.Sh DESCRIPTION
The
.Nm
.Nm swapon
utility is used to specify additional devices on which paging and swapping
are to take place.
The system begins by swapping and paging on only a single device
so that only one disk is required at bootstrap time.
Calls to
.Nm
.Nm swapon
normally occur in the system multi-user initialization file
.Pa /etc/rc
making all swap devices available, so that the paging and swapping
activity is interleaved across several devices.
.Pp
The
.Nm swapoff
utility disables paging and swapping on a device.
Calls to
.Nm swapoff
succeed only if disabling the device would leave enough
remaining virtual memory to accomodate all running programs.
.Pp
Normally, the first form is used:
.Bl -tag -width indent
.It Fl a
All devices marked as ``sw''
swap devices in
.Pa /etc/fstab
are made available unless their ``noauto'' option is also set.
are added to or removed from the pool of available swap
unless their ``noauto'' option is also set.
.El
.Pp
The second form gives individual block devices as given
in the system swap configuration table. The call makes only this space
available to the system for swap allocation.
The second form is used to configure or disable individual devices.
.Sh SEE ALSO
.Xr swapon 2 ,
.Xr fstab 5 ,
@ -85,12 +92,12 @@ memory disk devices
.It Pa /etc/fstab
ASCII file system description table
.El
.Sh BUGS
There is no way to stop paging and swapping on a device.
It is therefore not possible to dismount swap devices which are
mounted during system operation.
.Sh HISTORY
The
.Nm
.Nm swapon
utility appeared in
.Bx 4.0 .
The
.Nm swapoff
utility appeared in
.Fx 5.0 .

View File

@ -53,8 +53,9 @@ static const char rcsid[] =
#include <string.h>
#include <unistd.h>
static void usage(void);
int add(char *name, int ignoreebusy);
static void usage(const char *);
static int is_swapoff(const char *);
int swap_on_off(char *name, int ignoreebusy, int do_swapoff);
int
main(int argc, char **argv)
@ -62,6 +63,10 @@ main(int argc, char **argv)
struct fstab *fsp;
int stat;
int ch, doall;
int do_swapoff;
char *pname = argv[0];
do_swapoff = is_swapoff(pname);
doall = 0;
while ((ch = getopt(argc, argv, "a")) != -1)
@ -71,7 +76,7 @@ main(int argc, char **argv)
break;
case '?':
default:
usage();
usage(pname);
}
argv += optind;
@ -82,23 +87,24 @@ main(int argc, char **argv)
continue;
if (strstr(fsp->fs_mntops, "noauto"))
continue;
if (add(fsp->fs_spec, 1))
if (swap_on_off(fsp->fs_spec, 1, do_swapoff))
stat = 1;
else
printf("swapon: adding %s as swap device\n",
printf("%s: %sing %s as swap device\n",
pname, do_swapoff ? "remov" : "add",
fsp->fs_spec);
}
else if (!*argv)
usage();
usage(pname);
for (; *argv; ++argv)
stat |= add(*argv, 0);
stat |= swap_on_off(*argv, 0, do_swapoff);
exit(stat);
}
int
add(char *name, int ignoreebusy)
swap_on_off(char *name, int ignoreebusy, int do_swapoff)
{
if (swapon(name) == -1) {
if ((do_swapoff ? swapoff(name) : swapon(name)) == -1) {
switch (errno) {
case EBUSY:
if (!ignoreebusy)
@ -114,8 +120,23 @@ add(char *name, int ignoreebusy)
}
static void
usage()
usage(const char *pname)
{
fprintf(stderr, "usage: swapon [-a] [special_file ...]\n");
fprintf(stderr, "usage: %s [-a] [special_file ...]\n", pname);
exit(1);
}
static int
is_swapoff(const char *s)
{
const char *u;
if ((u = strrchr(s, '/')) != NULL)
++u;
else
u = s;
if (strcmp(u, "swapoff") == 0)
return 1;
else
return 0;
}

View File

@ -594,3 +594,4 @@
421 UNIMPL BSD getcontext
422 UNIMPL BSD setcontext
423 UNIMPL BSD swapcontext
424 MNOPROTO BSD swapoff

View File

@ -594,3 +594,4 @@
421 UNIMPL BSD getcontext
422 UNIMPL BSD setcontext
423 UNIMPL BSD swapcontext
424 MNOPROTO BSD swapoff

View File

@ -594,3 +594,4 @@
421 UNIMPL BSD getcontext
422 UNIMPL BSD setcontext
423 UNIMPL BSD swapcontext
424 MNOPROTO BSD swapoff

View File

@ -93,7 +93,7 @@
#include <stdlib.h>
#include <stdarg.h>
#define malloc(a,b,c) malloc(a)
#define malloc(a,b,c) calloc(a, 1)
#define free(a,b) free(a)
typedef unsigned int u_daddr_t;
@ -116,6 +116,9 @@ static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
daddr_t radix, int skip, daddr_t blk);
static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
daddr_t skip, blist_t dest, daddr_t count);
static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
daddr_t radix, int skip, daddr_t blk);
static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix,
int skip, daddr_t count);
#ifndef _KERNEL
@ -165,13 +168,14 @@ blist_create(daddr_t blocks)
#if defined(BLIST_DEBUG)
printf(
"BLIST representing %d blocks (%d MB of swap)"
", requiring %dK of ram\n",
bl->bl_blocks,
bl->bl_blocks * 4 / 1024,
(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
"BLIST representing %lld blocks (%lld MB of swap)"
", requiring %lldK of ram\n",
(long long)bl->bl_blocks,
(long long)bl->bl_blocks * 4 / 1024,
(long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
);
printf("BLIST raw radix tree contains %d records\n", bl->bl_rootblks);
printf("BLIST raw radix tree contains %lld records\n",
(long long)bl->bl_rootblks);
#endif
blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
@ -225,6 +229,30 @@ blist_free(blist_t bl, daddr_t blkno, daddr_t count)
}
}
/*
* blist_fill() - mark a region in the block bitmap as off-limits
* to the allocator (i.e. allocate it), ignoring any
* existing allocations. Return the number of blocks
* actually filled that were free before the call.
*/
int
blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
{
int filled;
if (bl) {
if (bl->bl_radix == BLIST_BMAP_RADIX)
filled = blst_leaf_fill(bl->bl_root, blkno, count);
else
filled = blst_meta_fill(bl->bl_root, blkno, count,
bl->bl_radix, bl->bl_skip, 0);
bl->bl_free -= filled;
return filled;
} else
return 0;
}
/*
* blist_resize() - resize an existing radix tree to handle the
* specified number of blocks. This will reallocate
@ -507,9 +535,9 @@ blst_meta_free(
int next_skip = (skip >> BLIST_META_RADIX_SHIFT);
#if 0
printf("FREE (%x,%d) FROM (%x,%d)\n",
freeBlk, count,
blk, radix
printf("FREE (%llx,%lld) FROM (%llx,%lld)\n",
(long long)freeBlk, (long long)count,
(long long)blk, (long long)radix
);
#endif
@ -678,6 +706,117 @@ static void blst_copy(
}
}
/*
* BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
*
* This routine allocates all blocks in the specified range
* regardless of any existing allocations in that range. Returns
* the number of blocks allocated by the call.
*/
static int
blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
{
int n = blk & (BLIST_BMAP_RADIX - 1);
int nblks;
u_daddr_t mask, bitmap;
mask = ((u_daddr_t)-1 << n) &
((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
/* Count the number of blocks we're about to allocate */
bitmap = scan->u.bmu_bitmap & mask;
for (nblks = 0; bitmap != 0; nblks++)
bitmap &= bitmap - 1;
scan->u.bmu_bitmap &= ~mask;
return nblks;
}
/*
* BLIST_META_FILL() - allocate specific blocks at a meta node
*
* This routine allocates the specified range of blocks,
* regardless of any existing allocations in the range. The
* range must be within the extent of this node. Returns the
* number of blocks allocated by the call.
*/
static int
blst_meta_fill(
blmeta_t *scan,
daddr_t allocBlk,
daddr_t count,
daddr_t radix,
int skip,
daddr_t blk
) {
int i;
int next_skip = (skip >> BLIST_META_RADIX_SHIFT);
int nblks = 0;
if (count == radix || scan->u.bmu_avail == 0) {
/*
* ALL-ALLOCATED special case
*/
nblks = scan->u.bmu_avail;
scan->u.bmu_avail = 0;
scan->bm_bighint = count;
return nblks;
}
if (scan->u.bmu_avail == radix) {
radix >>= BLIST_META_RADIX_SHIFT;
/*
* ALL-FREE special case, initialize sublevel
*/
for (i = 1; i <= skip; i += next_skip) {
if (scan[i].bm_bighint == (daddr_t)-1)
break;
if (next_skip == 1) {
scan[i].u.bmu_bitmap = (u_daddr_t)-1;
scan[i].bm_bighint = BLIST_BMAP_RADIX;
} else {
scan[i].bm_bighint = radix;
scan[i].u.bmu_avail = radix;
}
}
} else {
radix >>= BLIST_META_RADIX_SHIFT;
}
if (count > radix)
panic("blist_meta_fill: allocation too large");
i = (allocBlk - blk) / radix;
blk += i * radix;
i = i * next_skip + 1;
while (i <= skip && blk < allocBlk + count) {
daddr_t v;
v = blk + radix - allocBlk;
if (v > count)
v = count;
if (scan->bm_bighint == (daddr_t)-1)
panic("blst_meta_fill: filling unexpected range");
if (next_skip == 1) {
nblks += blst_leaf_fill(&scan[i], allocBlk, v);
} else {
nblks += blst_meta_fill(&scan[i], allocBlk, v,
radix, next_skip - 1, blk);
}
count -= v;
allocBlk += v;
blk += radix;
i += next_skip;
}
scan->u.bmu_avail -= nblks;
return nblks;
}
/*
* BLST_RADIX_INIT() - initialize radix tree
*
@ -768,41 +907,41 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
if (radix == BLIST_BMAP_RADIX) {
printf(
"%*.*s(%04x,%d): bitmap %08x big=%d\n",
"%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
tab, tab, "",
blk, radix,
scan->u.bmu_bitmap,
scan->bm_bighint
(long long)blk, (long long)radix,
(long long)scan->u.bmu_bitmap,
(long long)scan->bm_bighint
);
return;
}
if (scan->u.bmu_avail == 0) {
printf(
"%*.*s(%04x,%d) ALL ALLOCATED\n",
"%*.*s(%08llx,%lld) ALL ALLOCATED\n",
tab, tab, "",
blk,
radix
(long long)blk,
(long long)radix
);
return;
}
if (scan->u.bmu_avail == radix) {
printf(
"%*.*s(%04x,%d) ALL FREE\n",
"%*.*s(%08llx,%lld) ALL FREE\n",
tab, tab, "",
blk,
radix
(long long)blk,
(long long)radix
);
return;
}
printf(
"%*.*s(%04x,%d): subtree (%d/%d) big=%d {\n",
"%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
tab, tab, "",
blk, radix,
scan->u.bmu_avail,
radix,
scan->bm_bighint
(long long)blk, (long long)radix,
(long long)scan->u.bmu_avail,
(long long)radix,
(long long)scan->bm_bighint
);
radix >>= BLIST_META_RADIX_SHIFT;
@ -812,9 +951,9 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
for (i = 1; i <= skip; i += next_skip) {
if (scan[i].bm_bighint == (daddr_t)-1) {
printf(
"%*.*s(%04x,%d): Terminator\n",
"%*.*s(%08llx,%lld): Terminator\n",
tab, tab, "",
blk, radix
(long long)blk, (long long)radix
);
lastState = 0;
break;
@ -866,13 +1005,14 @@ main(int ac, char **av)
daddr_t count = 0;
printf("%d/%d/%d> ", bl->bl_free, size, bl->bl_radix);
printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
(long long)size, (long long)bl->bl_radix);
fflush(stdout);
if (fgets(buf, sizeof(buf), stdin) == NULL)
break;
switch(buf[0]) {
case 'r':
if (sscanf(buf + 1, "%d", &count) == 1) {
if (sscanf(buf + 1, "%lld", &count) == 1) {
blist_resize(&bl, count, 1);
} else {
printf("?\n");
@ -881,26 +1021,37 @@ main(int ac, char **av)
blist_print(bl);
break;
case 'a':
if (sscanf(buf + 1, "%d", &count) == 1) {
if (sscanf(buf + 1, "%lld", &count) == 1) {
daddr_t blk = blist_alloc(bl, count);
printf(" R=%04x\n", blk);
printf(" R=%08llx\n", (long long)blk);
} else {
printf("?\n");
}
break;
case 'f':
if (sscanf(buf + 1, "%x %d", &da, &count) == 2) {
if (sscanf(buf + 1, "%llx %lld",
(long long *)&da, (long long *)&count) == 2) {
blist_free(bl, da, count);
} else {
printf("?\n");
}
break;
case 'l':
if (sscanf(buf + 1, "%llx %lld",
(long long *)&da, (long long *)&count) == 2) {
printf(" n=%d\n",
blist_fill(bl, da, count));
} else {
printf("?\n");
}
break;
case '?':
case 'h':
puts(
"p -print\n"
"a %d -allocate\n"
"f %x %d -free\n"
"l %x %d -fill\n"
"r %d -resize\n"
"h/? -help"
);

View File

@ -612,6 +612,7 @@
422 MSTD BSD { int setcontext(const struct __ucontext *ucp); }
423 MSTD BSD { int swapcontext(struct __ucontext *oucp, \
const struct __ucontext *ucp); }
424 MSTD BSD { int swapoff(const char *name); }
; Please copy any additions and changes to the following compatability tables:
; sys/ia64/ia32/syscalls.master (take a best guess)

View File

@ -9,6 +9,7 @@
* (void) blist_destroy(blist)
* blkno = blist_alloc(blist, count)
* (void) blist_free(blist, blkno, count)
* nblks = blist_fill(blist, blkno, count)
* (void) blist_resize(&blist, count, freeextra)
*
*
@ -78,6 +79,7 @@ extern blist_t blist_create(daddr_t blocks);
extern void blist_destroy(blist_t blist);
extern daddr_t blist_alloc(blist_t blist, daddr_t count);
extern void blist_free(blist_t blist, daddr_t blkno, daddr_t count);
extern int blist_fill(blist_t bl, daddr_t blkno, daddr_t count);
extern void blist_print(blist_t blist);
extern void blist_resize(blist_t *pblist, daddr_t count, int freenew);

View File

@ -274,6 +274,7 @@ struct swdevt {
};
#define SW_FREED 0x01
#define SW_SEQUENTIAL 0x02
#define SW_CLOSING 0x04
#define sw_freed sw_flags /* XXX compat */
#ifdef _KERNEL

View File

@ -274,6 +274,7 @@ struct swdevt {
};
#define SW_FREED 0x01
#define SW_SEQUENTIAL 0x02
#define SW_CLOSING 0x04
#define sw_freed sw_flags /* XXX compat */
#ifdef _KERNEL

View File

@ -206,6 +206,8 @@ static __inline daddr_t swp_pager_getswapspace(int npages);
/*
* Metadata functions
*/
static __inline struct swblock **
swp_pager_hash(vm_object_t object, vm_pindex_t index);
static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t);
static void swp_pager_meta_free_all(vm_object_t);
@ -512,12 +514,22 @@ swp_pager_freeswapspace(blk, npages)
daddr_t blk;
int npages;
{
struct swdevt *sp = &swdevt[BLK2DEVIDX(blk)];
GIANT_REQUIRED;
/* per-swap area stats */
sp->sw_used -= npages;
/*
* If we are attempting to stop swapping on this device, we
* don't want to mark any blocks free lest they be reused.
*/
if (sp->sw_flags & SW_CLOSING)
return;
blist_free(swapblist, blk, npages);
vm_swap_size += npages;
/* per-swap area stats */
swdevt[BLK2DEVIDX(blk)].sw_used -= npages;
swp_sizecheck();
}
@ -1624,6 +1636,149 @@ swp_pager_async_iodone(bp)
splx(s);
}
/*
* swap_pager_isswapped:
*
* Return 1 if at least one page in the given object is paged
* out to the given swap device.
*
* This routine may not block.
*/
int swap_pager_isswapped(vm_object_t object, int devidx) {
daddr_t index = 0;
int bcount;
int i;
for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) {
struct swblock *swap;
if ((swap = *swp_pager_hash(object, index)) != NULL) {
for (i = 0; i < SWAP_META_PAGES; ++i) {
daddr_t v = swap->swb_pages[i];
if (v != SWAPBLK_NONE &&
BLK2DEVIDX(v) == devidx)
return 1;
}
}
index += SWAP_META_PAGES;
if (index > 0x20000000)
panic("swap_pager_isswapped: failed to locate all swap meta blocks");
}
return 0;
}
/*
* SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in
*
* This routine dissociates the page at the given index within a
* swap block from its backing store, paging it in if necessary.
* If the page is paged in, it is placed in the inactive queue,
* since it had its backing store ripped out from under it.
* We also attempt to swap in all other pages in the swap block,
* we only guarantee that the one at the specified index is
* paged in.
*
* XXX - The code to page the whole block in doesn't work, so we
* revert to the one-by-one behavior for now. Sigh.
*/
static __inline void
swp_pager_force_pagein(struct swblock *swap, int idx)
{
vm_object_t object;
vm_page_t m;
vm_pindex_t pindex;
object = swap->swb_object;
pindex = swap->swb_index;
vm_object_pip_add(object, 1);
m = vm_page_grab(object, pindex + idx, VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
if (m->valid == VM_PAGE_BITS_ALL) {
vm_object_pip_subtract(object, 1);
vm_page_lock_queues();
vm_page_activate(m);
vm_page_dirty(m);
vm_page_wakeup(m);
vm_page_unlock_queues();
vm_pager_page_unswapped(m);
return;
}
if (swap_pager_getpages(object, &m, 1, 0) !=
VM_PAGER_OK)
panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
vm_object_pip_subtract(object, 1);
vm_page_lock_queues();
vm_page_dirty(m);
vm_page_dontneed(m);
vm_page_wakeup(m);
vm_page_unlock_queues();
vm_pager_page_unswapped(m);
}
/*
* swap_pager_swapoff:
*
* Page in all of the pages that have been paged out to the
* given device. The corresponding blocks in the bitmap must be
* marked as allocated and the device must be flagged SW_CLOSING.
* There may be no processes swapped out to the device.
*
* The sw_used parameter points to the field in the swdev structure
* that contains a count of the number of blocks still allocated
* on the device. If we encounter objects with a nonzero pip count
* in our scan, we use this number to determine if we're really done.
*
* This routine may block.
*/
void
swap_pager_swapoff(int devidx, int *sw_used)
{
struct swblock **pswap;
struct swblock *swap;
vm_object_t waitobj;
daddr_t v;
int i, j;
GIANT_REQUIRED;
full_rescan:
waitobj = NULL;
for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */
restart:
pswap = &swhash[i];
while ((swap = *pswap) != NULL) {
for (j = 0; j < SWAP_META_PAGES; ++j) {
v = swap->swb_pages[j];
if (v != SWAPBLK_NONE &&
BLK2DEVIDX(v) == devidx)
break;
}
if (j < SWAP_META_PAGES) {
swp_pager_force_pagein(swap, j);
goto restart;
} else if (swap->swb_object->paging_in_progress) {
if (!waitobj)
waitobj = swap->swb_object;
}
pswap = &swap->swb_hnext;
}
}
if (waitobj && *sw_used) {
/*
* We wait on an arbitrary object to clock our rescans
* to the rate of paging completion.
*/
vm_object_pip_wait(waitobj, "swpoff");
goto full_rescan;
}
if (*sw_used)
panic("swapoff: failed to locate %d swap blocks", *sw_used);
}
/************************************************************************
* SWAP META DATA *
************************************************************************

View File

@ -83,9 +83,11 @@ extern struct pagerlst swap_pager_un_object_list;
extern int swap_pager_full;
extern struct blist *swapblist;
extern struct uma_zone *swap_zone;
extern int nswap_lowat, nswap_hiwat;
void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after);
void swap_pager_swapoff(int devidx, int *sw_used);
int swap_pager_swp_alloc(vm_object_t, int);
void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int);

View File

@ -91,6 +91,7 @@
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#include <vm/vm_pager.h>
#include <vm/swap_pager.h>
#include <sys/user.h>
@ -324,6 +325,45 @@ vm_proc_swapin(struct proc *p)
up = (vm_offset_t)p->p_uarea;
pmap_qenter(up, ma, UAREA_PAGES);
}
/*
* Swap in the UAREAs of all processes swapped out to the given device.
* The pages in the UAREA are marked dirty and their swap metadata is freed.
*/
void
vm_proc_swapin_all(int devidx)
{
struct proc *p;
vm_object_t object;
vm_page_t m;
retry:
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p);
mtx_lock_spin(&sched_lock);
object = p->p_upages_obj;
if (object != NULL &&
swap_pager_isswapped(p->p_upages_obj, devidx)) {
sx_sunlock(&allproc_lock);
faultin(p);
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
vm_page_lock_queues();
TAILQ_FOREACH(m, &object->memq, listq)
vm_page_dirty(m);
vm_page_unlock_queues();
swap_pager_freespace(object, 0,
object->un_pager.swp.swp_bcount);
goto retry;
}
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
}
#endif
/*

View File

@ -104,6 +104,12 @@ extern void pagedaemon_wakeup(void);
extern void vm_wait(void);
extern void vm_waitpfault(void);
/* XXX This is probably misplaced. */
#ifndef NO_SWAPPING
void vm_proc_swapin_all(int);
int swap_pager_isswapped(vm_object_t, int);
#endif /* !NO_SWAPPING */
#ifdef _KERNEL
void vm_pageout_page(vm_page_t, vm_object_t);
void vm_pageout_cluster(vm_page_t, vm_object_t);

View File

@ -36,6 +36,7 @@
#include "opt_mac.h"
#include "opt_swap.h"
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -58,6 +59,7 @@
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
#include <vm/uma.h>
@ -73,6 +75,8 @@ struct swdevt *swdevt = should_be_malloced;
static int nswap; /* first block after the interleaved devs */
int nswdev = NSWAPDEV;
int vm_swap_size;
static int swdev_syscall_active = 0; /* serialize swap(on|off) */
static int swapdev_strategy(struct vop_strategy_args *ap);
struct vnode *swapdev_vp;
@ -165,11 +169,12 @@ swapdev_strategy(ap)
/*
* Create a special vnode op vector for swapdev_vp - we only use
* VOP_STRATEGY(), everything else returns an error.
* VOP_STRATEGY() and reclaim; everything else returns an error.
*/
vop_t **swapdev_vnodeop_p;
static struct vnodeopv_entry_desc swapdev_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) vop_defaultop },
{ &vop_reclaim_desc, (vop_t *) vop_null },
{ &vop_strategy_desc, (vop_t *) swapdev_strategy },
{ NULL, NULL }
};
@ -208,19 +213,23 @@ swapon(td, uap)
if (error)
goto done2;
while (swdev_syscall_active)
tsleep(&swdev_syscall_active, PUSER - 1, "swpon", 0);
swdev_syscall_active = 1;
/*
* Swap metadata may not fit in the KVM if we have physical
* memory of >1GB.
*/
if (swap_zone == NULL) {
error = ENOMEM;
goto done2;
goto done;
}
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td);
error = namei(&nd);
if (error)
goto done2;
goto done;
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
@ -239,6 +248,9 @@ swapon(td, uap)
if (error)
vrele(vp);
done:
swdev_syscall_active = 0;
wakeup_one(&swdev_syscall_active);
done2:
mtx_unlock(&Giant);
return (error);
@ -252,8 +264,6 @@ swapon(td, uap)
*
* The new swap code uses page-sized blocks. The old swap code used
* DEV_BSIZE'd chunks.
*
* XXX locking when multiple swapon's run in parallel
*/
int
swaponvp(td, vp, dev, nblks)
@ -330,7 +340,7 @@ swaponvp(td, vp, dev, nblks)
sp->sw_vp = vp;
sp->sw_dev = dev2udev(dev);
sp->sw_device = dev;
sp->sw_flags |= SW_FREED;
sp->sw_flags = SW_FREED;
sp->sw_nblks = nblks;
sp->sw_used = 0;
@ -356,9 +366,127 @@ swaponvp(td, vp, dev, nblks)
vm_swap_size += blk;
}
swap_pager_full = 0;
return (0);
}
/*
* SYSCALL: swapoff(devname)
*
* Disable swapping on the given device.
*/
#ifndef _SYS_SYSPROTO_H_
struct swapoff_args {
char *name;
};
#endif
/*
* MPSAFE
*/
/* ARGSUSED */
int
swapoff(td, uap)
struct thread *td;
struct swapoff_args *uap;
{
struct vnode *vp;
struct nameidata nd;
struct swdevt *sp;
swblk_t dvbase, vsbase;
u_long nblks, aligned_nblks, blk;
int error, index;
mtx_lock(&Giant);
error = suser(td);
if (error)
goto done2;
while (swdev_syscall_active)
tsleep(&swdev_syscall_active, PUSER - 1, "swpoff", 0);
swdev_syscall_active = 1;
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td);
error = namei(&nd);
if (error)
goto done;
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) {
if (sp->sw_vp == vp)
goto found;
}
error = EINVAL;
goto done;
found:
nblks = sp->sw_nblks;
/*
* We can turn off this swap device safely only if the
* available virtual memory in the system will fit the amount
* of data we will have to page back in, plus an epsilon so
* the system doesn't become critically low on swap space.
*/
if (cnt.v_free_count + cnt.v_cache_count + vm_swap_size <
nblks + nswap_lowat) {
error = ENOMEM;
goto done;
}
/*
* Prevent further allocations on this device.
*/
sp->sw_flags |= SW_CLOSING;
for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) {
blk = min(nblks - dvbase, dmmax);
vsbase = index * dmmax + dvbase * nswdev;
vm_swap_size -= blist_fill(swapblist, vsbase, blk);
}
/*
* Page in the contents of the device and close it.
*/
#ifndef NO_SWAPPING
vm_proc_swapin_all(index);
#endif /* !NO_SWAPPING */
swap_pager_swapoff(index, &sp->sw_used);
VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
vrele(vp);
sp->sw_vp = NULL;
/*
* Resize the bitmap based on the new largest swap device,
* or free the bitmap if there are no more devices.
*/
for (sp = swdevt, nblks = 0; sp < swdevt + nswdev; sp++) {
if (sp->sw_vp == NULL)
continue;
nblks = max(nblks, sp->sw_nblks);
}
aligned_nblks = (nblks + (dmmax - 1)) & ~(u_long)(dmmax - 1);
nswap = aligned_nblks * nswdev;
if (nswap == 0) {
blist_destroy(swapblist);
swapblist = NULL;
vrele(swapdev_vp);
swapdev_vp = NULL;
} else
blist_resize(&swapblist, nswap, 0);
done:
swdev_syscall_active = 0;
wakeup_one(&swdev_syscall_active);
done2:
mtx_unlock(&Giant);
return (error);
}
static int
sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS)
{