Submitted by: Matt Dillon <dillon@freebsd.org>

The old VN device broke in -4.x when the definition of B_PAGING
changed. This patch fixes this plus implements additional capabilities.
The new VN device can be backed by a file ( as per normal ), or it can
be directly backed by swap.

Due to dependencies in VM include files  (on opt_xxx options) the new
vn device cannot be a module yet. This will be fixed in a later commit.
This commit delimitted by tags {PRE,POST}_MATT_VNDEV
This commit is contained in:
Julian Elischer 1999-03-14 09:20:01 +00:00
parent ac3101276b
commit a5296b05b4
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=44739
7 changed files with 821 additions and 247 deletions

View File

@ -38,7 +38,7 @@
* from: Utah Hdr: vn.c 1.13 94/04/02
*
* from: @(#)vn.c 8.6 (Berkeley) 4/1/94
* $Id: vn.c,v 1.73 1999/01/23 00:28:56 peter Exp $
* $Id: vn.c,v 1.74 1999/02/01 08:36:02 dillon Exp $
*/
/*
@ -91,6 +91,16 @@
#include <miscfs/specfs/specdev.h>
#include <sys/vnioctl.h>
#include <vm/vm.h>
#include <vm/vm_prot.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
#include <vm/vm_extern.h>
#include <vm/vm_zone.h>
static d_ioctl_t vnioctl;
static d_open_t vnopen;
static d_read_t vnread;
@ -99,17 +109,24 @@ static d_close_t vnclose;
static d_dump_t vndump;
static d_psize_t vnsize;
static d_strategy_t vnstrategy;
static d_parms_t vnparms;
#define CDEV_MAJOR 43
#define BDEV_MAJOR 15
/*
* cdevsw
* D_DISK we want to look like a disk
* ( D_NOCLUSTERRW removed - clustering should be ok )
* D_CANFREE We support B_FREEBUF
*/
static struct cdevsw vn_cdevsw = {
vnopen, vnclose, vnread, vnwrite,
vnioctl, nostop, nullreset, nodevtotty,
seltrue, nommap, vnstrategy, "vn",
NULL, -1, vndump, vnsize,
D_DISK|D_NOCLUSTERRW, 0, -1 };
vnparms, -1, vndump, vnsize,
D_DISK|D_CANFREE, 0, -1 };
#define vnunit(dev) dkunit(dev)
@ -121,14 +138,16 @@ static struct cdevsw vn_cdevsw = {
free((caddr_t)(bp), M_DEVBUF)
struct vn_softc {
int sc_flags; /* flags */
size_t sc_size; /* size of vn */
int sc_flags; /* flags */
int sc_size; /* size of vn, sc_secsize scale */
int sc_secsize; /* sector size */
struct diskslices *sc_slices;
struct vnode *sc_vp; /* vnode */
struct ucred *sc_cred; /* credentials */
int sc_maxactive; /* max # of active requests */
struct buf sc_tab; /* transfer queue */
u_long sc_options; /* options */
struct vnode *sc_vp; /* vnode if not NULL */
vm_object_t sc_object; /* backing object if not NULL */
struct ucred *sc_cred; /* credentials */
int sc_maxactive; /* max # of active requests */
struct buf sc_tab; /* transfer queue */
u_long sc_options; /* options */
#ifdef DEVFS
void *r_devfs_token;
void *devfs_token;
@ -143,10 +162,14 @@ static u_long vn_options;
#define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
#if 0
static void vniodone (struct buf *bp);
#endif
static int vnsetcred (struct vn_softc *vn, struct ucred *cred);
static void vnclear (struct vn_softc *vn);
static int vn_modevent (module_t, int, void *);
static int vniocattach_file (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct proc *p);
static int vniocattach_swap (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct proc *p);
static int
vnclose(dev_t dev, int flags, int mode, struct proc *p)
@ -191,14 +214,13 @@ vnopen(dev_t dev, int flags, int mode, struct proc *p)
/* Build label for whole disk. */
bzero(&label, sizeof label);
label.d_secsize = DEV_BSIZE;
label.d_secsize = vn->sc_secsize;
label.d_nsectors = 32;
label.d_ntracks = 64;
label.d_ncylinders = vn->sc_size / (32 * 64);
label.d_secpercyl = 32 * 64;
label.d_secperunit =
label.d_partitions[RAW_PART].p_size =
vn->sc_size;
label.d_ntracks = 64 / (vn->sc_secsize / DEV_BSIZE);
label.d_secpercyl = label.d_nsectors * label.d_ntracks;
label.d_ncylinders = vn->sc_size / label.d_secpercyl;
label.d_secperunit = vn->sc_size;
label.d_partitions[RAW_PART].p_size = vn->sc_size;
return (dsopen("vn", dev, mode, 0, &vn->sc_slices,
&label, vnstrategy, (ds_setgeom_t *)NULL,
@ -225,20 +247,23 @@ vnwrite(dev_t dev, struct uio *uio, int ioflag)
}
/*
* this code does I/O calls through the appropriate VOP entry point...
* unless a swap_pager I/O request is being done. This strategy (-))
* allows for coherency with mmap except in the case of paging. This
* is necessary, because the VOP calls use lots of memory (and actually
* are not extremely efficient -- but we want to keep semantics correct),
* and the pageout daemon gets really unhappy (and so does the rest of the
* system) when it runs out of memory.
* vnstrategy:
*
* Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls
* for vnode-backed vn's, and the new vm_pager_strategy() call for
* vm_object-backed vn's.
*
* Currently B_ASYNC is only partially handled - for OBJT_SWAP I/O only.
*
* NOTE: bp->b_blkno is DEV_BSIZE'd. We must generate bp->b_pblkno for
* our uio or vn_pager_strategy() call that is vn->sc_secsize'd
*/
static void
vnstrategy(struct buf *bp)
{
int unit = vnunit(bp->b_dev);
register struct vn_softc *vn = vn_softc[unit];
register daddr_t bn;
struct vn_softc *vn = vn_softc[unit];
int error;
int isvplocked = 0;
long sz;
@ -254,34 +279,46 @@ vnstrategy(struct buf *bp)
biodone(bp);
return;
}
bp->b_resid = bp->b_bcount;
IFOPT(vn, VN_LABELS) {
bp->b_resid = bp->b_bcount;/* XXX best place to set this? */
if (vn->sc_slices != NULL && dscheck(bp, vn->sc_slices) <= 0) {
bp->b_flags |= B_INVAL;
biodone(bp);
return;
}
bn = bp->b_pblkno;
bp->b_resid = bp->b_bcount;/* XXX best place to set this? */
} else {
bn = bp->b_blkno;
sz = howmany(bp->b_bcount, DEV_BSIZE);
bp->b_resid = bp->b_bcount;
if (bn < 0 || bn + sz > vn->sc_size) {
if (bn != vn->sc_size) {
int pbn;
pbn = bp->b_blkno * (vn->sc_secsize / DEV_BSIZE);
sz = howmany(bp->b_bcount, vn->sc_secsize);
if (pbn < 0 || pbn + sz > vn->sc_size) {
if (pbn != vn->sc_size) {
bp->b_error = EINVAL;
bp->b_flags |= B_ERROR;
bp->b_flags |= B_ERROR | B_INVAL;
}
biodone(bp);
return;
}
bp->b_pblkno = pbn;
}
if( (bp->b_flags & B_PAGING) == 0) {
if (vn->sc_vp && (bp->b_flags & B_FREEBUF)) {
/*
* Not handled for vnode-backed element yet.
*/
biodone(bp);
} else if (vn->sc_vp) {
/*
* VNODE I/O
*/
aiov.iov_base = bp->b_data;
aiov.iov_len = bp->b_bcount;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = dbtob(bn);
auio.uio_offset = (vm_ooffset_t)bp->b_pblkno * vn->sc_secsize;
auio.uio_segflg = UIO_SYSSPACE;
if( bp->b_flags & B_READ)
auio.uio_rw = UIO_READ;
@ -303,135 +340,42 @@ vnstrategy(struct buf *bp)
}
bp->b_resid = auio.uio_resid;
if( error )
if( error ) {
bp->b_error = error;
bp->b_flags |= B_ERROR;
biodone(bp);
} else {
long bsize, resid;
off_t byten;
int flags;
caddr_t addr;
struct buf *nbp;
nbp = getvnbuf();
bzero(nbp, sizeof(struct buf));
LIST_INIT(&nbp->b_dep);
byten = dbtob(bn);
bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize;
addr = bp->b_data;
flags = bp->b_flags | B_CALL;
for (resid = bp->b_resid; resid; ) {
struct vnode *vp;
daddr_t nbn;
int off, s, nra;
nra = 0;
if (!VOP_ISLOCKED(vn->sc_vp)) {
isvplocked = 1;
vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
}
error = VOP_BMAP(vn->sc_vp, (daddr_t)(byten / bsize),
&vp, &nbn, &nra, NULL);
if (isvplocked) {
VOP_UNLOCK(vn->sc_vp, 0, curproc);
isvplocked = 0;
}
if (error == 0 && nbn == -1)
error = EIO;
IFOPT(vn, VN_DONTCLUSTER)
nra = 0;
off = byten % bsize;
if (off)
sz = bsize - off;
else
sz = (1 + nra) * bsize;
if (resid < sz)
sz = resid;
if (error) {
bp->b_resid -= (resid - sz);
bp->b_flags |= B_ERROR;
biodone(bp);
putvnbuf(nbp);
return;
}
IFOPT(vn,VN_IO)
printf(
/* XXX no %qx in kernel. Synthesize it. */
"vnstrategy: vp %p/%p bn 0x%lx%08lx/0x%lx sz 0x%lx\n",
(void *)vn->sc_vp, (void *)vp,
(u_long)(byten >> 32), (u_long)byten,
(u_long)nbn, sz);
nbp->b_flags = flags;
nbp->b_bcount = sz;
nbp->b_bufsize = sz;
nbp->b_error = 0;
if (vp->v_type == VBLK || vp->v_type == VCHR)
nbp->b_dev = vp->v_rdev;
else
nbp->b_dev = NODEV;
nbp->b_data = addr;
nbp->b_blkno = nbn + btodb(off);
nbp->b_offset = dbtob(nbn) + off;
nbp->b_proc = bp->b_proc;
nbp->b_iodone = vniodone;
nbp->b_vp = vp;
nbp->b_rcred = vn->sc_cred; /* XXX crdup? */
nbp->b_wcred = vn->sc_cred; /* XXX crdup? */
nbp->b_dirtyoff = bp->b_dirtyoff;
nbp->b_dirtyend = bp->b_dirtyend;
nbp->b_validoff = bp->b_validoff;
nbp->b_validend = bp->b_validend;
if ((nbp->b_flags & B_READ) == 0)
nbp->b_vp->v_numoutput++;
VOP_STRATEGY(vp, nbp);
s = splbio();
while ((nbp->b_flags & B_DONE) == 0) {
nbp->b_flags |= B_WANTED;
tsleep(nbp, PRIBIO, "vnwait", 0);
}
splx(s);
if( nbp->b_flags & B_ERROR) {
bp->b_flags |= B_ERROR;
bp->b_resid -= (resid - sz);
biodone(bp);
putvnbuf(nbp);
return;
}
byten += sz;
addr += sz;
resid -= sz;
}
bp->b_resid = resid;
biodone(bp);
putvnbuf(nbp);
} else if (vn->sc_object) {
/*
* OBJT_SWAP I/O
*
* ( handles read, write, freebuf )
*/
vm_pager_strategy(vn->sc_object, bp);
} else {
bp->b_flags |= B_ERROR;
bp->b_error = EINVAL;
biodone(bp);
}
}
#if 0
void
vniodone( struct buf *bp) {
bp->b_flags |= B_DONE;
wakeup((caddr_t) bp);
}
#endif
/* ARGSUSED */
static int
vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
{
struct vn_softc *vn = vn_softc[vnunit(dev)];
struct vn_ioctl *vio;
struct vattr vattr;
struct nameidata nd;
int error;
u_long *f;
@ -476,47 +420,11 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
case VNIOCATTACH:
if (vn->sc_flags & VNF_INITED)
return(EBUSY);
/*
* Always open for read and write.
* This is probably bogus, but it lets vn_open()
* weed out directories, sockets, etc. so we don't
* have to worry about them.
*/
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
error = vn_open(&nd, FREAD|FWRITE, 0);
if (error)
return(error);
error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
if (error) {
VOP_UNLOCK(nd.ni_vp, 0, p);
(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
return(error);
}
VOP_UNLOCK(nd.ni_vp, 0, p);
vn->sc_vp = nd.ni_vp;
vn->sc_size = btodb(vattr.va_size); /* note truncation */
error = vnsetcred(vn, p->p_ucred);
if (error) {
(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
return(error);
}
vio->vn_size = dbtob(vn->sc_size);
vn->sc_flags |= VNF_INITED;
IFOPT(vn, VN_LABELS) {
/*
* Reopen so that `ds' knows which devices are open.
* If this is the first VNIOCSET, then we've
* guaranteed that the device is the cdev and that
* no other slices or labels are open. Otherwise,
* we rely on VNIOCCLR not being abused.
*/
error = vnopen(dev, flag, S_IFCHR, p);
if (error)
vnclear(vn);
}
IFOPT(vn, VN_FOLLOW)
printf("vnioctl: SET vp %p size %x\n",
vn->sc_vp, vn->sc_size);
if (vio->vn_file == NULL)
error = vniocattach_swap(vn, vio, dev, flag, p);
else
error = vniocattach_file(vn, vio, dev, flag, p);
break;
case VNIOCDETACH:
@ -556,11 +464,140 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
break;
default:
return (ENOTTY);
error = ENOTTY;
break;
}
return(error);
}
/*
* vniocattach_file:
*
* Attach a file to a VN partition. Return the size in the vn_size
* field.
*/
static int
vniocattach_file(vn, vio, dev, flag, p)
struct vn_softc *vn;
struct vn_ioctl *vio;
dev_t dev;
int flag;
struct proc *p;
{
struct vattr vattr;
struct nameidata nd;
int error;
/*
* Always open for read and write.
* This is probably bogus, but it lets vn_open()
* weed out directories, sockets, etc. so we don't
* have to worry about them.
*/
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
error = vn_open(&nd, FREAD|FWRITE, 0);
if (error)
return(error);
error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
if (error) {
VOP_UNLOCK(nd.ni_vp, 0, p);
(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
return(error);
}
VOP_UNLOCK(nd.ni_vp, 0, p);
vn->sc_secsize = DEV_BSIZE;
vn->sc_vp = nd.ni_vp;
vn->sc_size = vattr.va_size / vn->sc_secsize; /* note truncation */
error = vnsetcred(vn, p->p_ucred);
if (error) {
(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
return(error);
}
vn->sc_flags |= VNF_INITED;
IFOPT(vn, VN_LABELS) {
/*
* Reopen so that `ds' knows which devices are open.
* If this is the first VNIOCSET, then we've
* guaranteed that the device is the cdev and that
* no other slices or labels are open. Otherwise,
* we rely on VNIOCCLR not being abused.
*/
error = vnopen(dev, flag, S_IFCHR, p);
if (error)
vnclear(vn);
}
IFOPT(vn, VN_FOLLOW)
printf("vnioctl: SET vp %p size %x blks\n",
vn->sc_vp, vn->sc_size);
return(0);
}
/*
* vniocattach_swap:
*
* Attach swap backing store to a VN partition of the size specified
* in vn_size.
*/
static int
vniocattach_swap(vn, vio, dev, flag, p)
struct vn_softc *vn;
struct vn_ioctl *vio;
dev_t dev;
int flag;
struct proc *p;
{
int error;
/*
* Range check. Disallow negative sizes or any size less then the
* size of a page. Then round to a page.
*/
if (vio->vn_size <= 0)
return(EDOM);
/*
* Allocate an OBJT_SWAP object.
*
* sc_secsize is PAGE_SIZE'd
*
* vio->vn_size is in PAGE_SIZE'd chunks.
* sc_size must be in PAGE_SIZE'd chunks.
* Note the truncation.
*/
vn->sc_secsize = PAGE_SIZE;
vn->sc_size = vio->vn_size;
vn->sc_object =
vm_pager_allocate(OBJT_SWAP, NULL, vn->sc_secsize * (vm_ooffset_t)vio->vn_size, VM_PROT_DEFAULT, 0);
vn->sc_flags |= VNF_INITED;
error = vnsetcred(vn, p->p_ucred);
if (error == 0) {
IFOPT(vn, VN_LABELS) {
/*
* Reopen so that `ds' knows which devices are open.
* If this is the first VNIOCSET, then we've
* guaranteed that the device is the cdev and that
* no other slices or labels are open. Otherwise,
* we rely on VNIOCCLR not being abused.
*/
error = vnopen(dev, flag, S_IFCHR, p);
}
}
if (error == 0) {
IFOPT(vn, VN_FOLLOW) {
printf("vnioctl: SET vp %p size %x\n",
vn->sc_vp, vn->sc_size);
}
}
if (error)
vnclear(vn);
return(error);
}
/*
* Duplicate the current processes' credentials. Since we are called only
* as the result of a SET ioctl and only root can do that, any future access
@ -573,45 +610,61 @@ vnsetcred(struct vn_softc *vn, struct ucred *cred)
struct uio auio;
struct iovec aiov;
char *tmpbuf;
int error;
int error = 0;
/*
* Set credits in our softc
*/
if (vn->sc_cred)
crfree(vn->sc_cred);
vn->sc_cred = crdup(cred);
tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
/* XXX: Horrible kludge to establish credentials for NFS */
aiov.iov_base = tmpbuf;
aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size));
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_resid = aiov.iov_len;
vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
VOP_UNLOCK(vn->sc_vp, 0, curproc);
/*
* Horrible kludge to establish credentials for NFS XXX.
*/
free(tmpbuf, M_TEMP);
if (vn->sc_vp) {
tmpbuf = malloc(vn->sc_secsize, M_TEMP, M_WAITOK);
aiov.iov_base = tmpbuf;
aiov.iov_len = vn->sc_secsize;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_resid = aiov.iov_len;
vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
VOP_UNLOCK(vn->sc_vp, 0, curproc);
free(tmpbuf, M_TEMP);
}
return (error);
}
void
vnclear(struct vn_softc *vn)
{
register struct vnode *vp = vn->sc_vp;
struct proc *p = curproc; /* XXX */
IFOPT(vn, VN_FOLLOW)
printf("vnclear(%p): vp=%p\n", vn, vp);
printf("vnclear(%p): vp=%p\n", vn, vn->sc_vp);
if (vn->sc_slices != NULL)
dsgone(&vn->sc_slices);
vn->sc_flags &= ~VNF_INITED;
if (vp == (struct vnode *)0)
panic("vnclear: null vp");
(void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p);
crfree(vn->sc_cred);
vn->sc_vp = (struct vnode *)0;
vn->sc_cred = (struct ucred *)0;
if (vn->sc_vp != NULL) {
(void)vn_close(vn->sc_vp, FREAD|FWRITE, vn->sc_cred, p);
vn->sc_vp = NULL;
}
if (vn->sc_cred) {
crfree(vn->sc_cred);
vn->sc_cred = NULL;
}
if (vn->sc_object != NULL) {
vm_pager_deallocate(vn->sc_object);
vn->sc_object = NULL;
}
vn->sc_size = 0;
}
@ -619,11 +672,54 @@ static int
vnsize(dev_t dev)
{
int unit = vnunit(dev);
struct vn_softc *vn;
if (unit >= NVN || (!vn_softc[unit]) ||
(vn_softc[unit]->sc_flags & VNF_INITED) == 0)
if (unit < 0 || unit >= NVN)
return(-1);
return(vn_softc[unit]->sc_size);
vn = vn_softc[unit];
if ((vn->sc_flags & VNF_INITED) == 0)
return(-1);
return(vn->sc_size);
}
/*
* vnparms() - return requested device block info
*
* This is typically called by specfs with DBLK_MIN to get
* the minimum read/write block size. If the device does not
* exist or has not been configured, 0 is returned.
*/
static int
vnparms(dev_t dev, struct specinfo *sinfo, int ctl)
{
int unit = vnunit(dev);
int r = -1;
struct vn_softc *vn;
if (unit < 0 || unit >= NVN)
return(r);
if ((vn = vn_softc[unit]) == NULL || (vn->sc_flags & VNF_INITED) == 0)
return(r);
switch(ctl) {
case DPARM_GET:
/*
* Retrieve disk parameters. The system has already set
* the defaults, we simply override them as necessary.
*/
r = 0;
if (sinfo->si_bsize_phys < vn->sc_secsize)
sinfo->si_bsize_phys = vn->sc_secsize;
if (sinfo->si_bsize_best < vn->sc_secsize)
sinfo->si_bsize_best = vn->sc_secsize;
break;
default:
break;
}
return(r);
}
static int

View File

@ -1,11 +1,11 @@
# $Id: Makefile,v 1.50 1999/02/03 04:18:25 semenu Exp $
# $Id: Makefile,v 1.51 1999/02/22 11:44:35 newton Exp $
# XXX present but broken: ip_mroute_mod
# XXX present but broken: ip_mroute_mod vn
# XXX not yet completed porting: acd
# XXX builds but not updated: atapi
SUBDIR= ccd cd9660 coda fdesc if_disc if_ppp if_sl if_tun ipfw kernfs \
mfs msdos nfs nullfs portal procfs umapfs union vinum vn ntfs
mfs msdos nfs nullfs portal procfs umapfs union vinum ntfs
# XXX some of these can move to the general case when de-i386'ed
.if ${MACHINE_ARCH} == "i386"

View File

@ -64,7 +64,7 @@
*
* @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
*
* $Id: swap_pager.c,v 1.115 1999/02/21 08:30:49 dillon Exp $
* $Id: swap_pager.c,v 1.116 1999/02/21 08:34:15 dillon Exp $
*/
#include <sys/param.h>
@ -150,6 +150,7 @@ static void swap_pager_dealloc __P((vm_object_t object));
static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
static void swap_pager_init __P((void));
static void swap_pager_unswapped __P((vm_page_t));
static void swap_pager_strategy __P((vm_object_t, struct buf *));
struct pagerops swappagerops = {
swap_pager_init, /* early system initialization of pager */
@ -158,7 +159,8 @@ struct pagerops swappagerops = {
swap_pager_getpages, /* pagein */
swap_pager_putpages, /* pageout */
swap_pager_haspage, /* get backing store status for page */
swap_pager_unswapped /* remove swap related to page */
swap_pager_unswapped, /* remove swap related to page */
swap_pager_strategy /* pager strategy call */
};
/*
@ -747,6 +749,230 @@ swap_pager_unswapped(m)
swp_pager_meta_ctl(m->object, m->pindex, SWM_FREE);
}
/*
* SWAP_PAGER_STRATEGY() - read, write, free blocks
*
* This implements the vm_pager_strategy() interface to swap and allows
* other parts of the system to directly access swap as backing store
* through vm_objects of type OBJT_SWAP. This is intended to be a
* cacheless interface ( i.e. caching occurs at higher levels ).
* Therefore we do not maintain any resident pages. All I/O goes
* directly from and to the swap device.
*
* Note that b_blkno is scaled for PAGE_SIZE
*
* We currently attempt to run I/O synchronously or asynchronously as
* the caller requests. This isn't perfect because we loose error
* sequencing when we run multiple ops in parallel to satisfy a request.
* But this is swap, so we let it all hang out.
*/
static void
swap_pager_strategy(vm_object_t object, struct buf *bp)
{
vm_pindex_t start;
int count;
char *data;
struct buf *nbp = NULL;
if (bp->b_bcount & PAGE_MASK) {
bp->b_error = EINVAL;
bp->b_flags |= B_ERROR | B_INVAL;
biodone(bp);
printf("swap_pager_strategy: bp %p b_vp %p blk %d size %d, not page bounded\n", bp, bp->b_vp, (int)bp->b_pblkno, (int)bp->b_bcount);
return;
}
/*
* Clear error indication, initialize page index, count, data pointer.
*/
bp->b_error = 0;
bp->b_flags &= ~B_ERROR;
bp->b_resid = bp->b_bcount;
start = bp->b_pblkno;
count = howmany(bp->b_bcount, PAGE_SIZE);
data = bp->b_data;
/*
* Execute strategy function
*/
if (bp->b_flags & B_FREEBUF) {
/*
* FREE PAGE(s) - destroy underlying swap that is no longer
* needed.
*/
int s;
s = splvm();
swp_pager_meta_free(object, start, count);
splx(s);
bp->b_resid = 0;
} else if (bp->b_flags & B_READ) {
/*
* READ FROM SWAP - read directly from swap backing store,
* zero-fill as appropriate.
*
* Note: the count == 0 case is beyond the end of the
* buffer. This is a special case to close out any
* left over nbp.
*/
while (count > 0) {
daddr_t blk;
int s;
s = splvm();
blk = swp_pager_meta_ctl(object, start, 0);
splx(s);
/*
* Do we have to flush our current collection?
*/
if (
nbp && (
(blk & SWAPBLK_NONE) ||
nbp->b_blkno + btoc(nbp->b_bcount) != blk
)
) {
++cnt.v_swapin;
cnt.v_swappgsin += btoc(nbp->b_bcount);
flushchainbuf(nbp);
nbp = NULL;
}
/*
* Add to collection
*/
if (blk & SWAPBLK_NONE) {
s = splbio();
bp->b_resid -= PAGE_SIZE;
splx(s);
bzero(data, PAGE_SIZE);
} else {
if (nbp == NULL) {
nbp = getchainbuf(bp, swapdev_vp, B_READ|B_ASYNC);
nbp->b_blkno = blk;
nbp->b_data = data;
}
nbp->b_bcount += PAGE_SIZE;
}
--count;
++start;
data += PAGE_SIZE;
}
} else {
/*
* WRITE TO SWAP - [re]allocate swap and write.
*/
while (count > 0) {
int i;
int s;
int n;
daddr_t blk;
n = min(count, BLIST_MAX_ALLOC);
n = min(n, nsw_cluster_max);
s = splvm();
for (;;) {
blk = swp_pager_getswapspace(n);
if (blk != SWAPBLK_NONE)
break;
n >>= 1;
if (n == 0)
break;
}
if (n == 0) {
bp->b_error = ENOMEM;
bp->b_flags |= B_ERROR;
splx(s);
break;
}
/*
* Oops, too big if it crosses a stripe
*
* 1111000000
* 111111
* 1000001
*/
if ((blk ^ (blk + n)) & dmmax_mask) {
int j = ((blk + dmmax) & dmmax_mask) - blk;
swp_pager_freeswapspace(blk + j, n - j);
n = j;
}
swp_pager_meta_free(object, start, n);
splx(s);
if (nbp) {
++cnt.v_swapout;
cnt.v_swappgsout += btoc(nbp->b_bcount);
flushchainbuf(nbp);
}
nbp = getchainbuf(bp, swapdev_vp, B_ASYNC);
nbp->b_blkno = blk;
nbp->b_data = data;
nbp->b_bcount = PAGE_SIZE * n;
/*
* Must set dirty range for NFS to work. dirtybeg &
* off are already 0.
*/
nbp->b_dirtyend = nbp->b_bcount;
++cnt.v_swapout;
cnt.v_swappgsout += n;
s = splbio();
for (i = 0; i < n; ++i) {
swp_pager_meta_build(
object,
start + i,
blk + i,
1
);
}
splx(s);
count -= n;
start += n;
data += PAGE_SIZE * n;
}
}
/*
* Cleanup. Commit last nbp either async or sync, and either
* wait for it synchronously or make it auto-biodone itself and
* the parent bp.
*/
if (nbp) {
if ((bp->b_flags & B_ASYNC) == 0)
nbp->b_flags &= ~B_ASYNC;
if (nbp->b_flags & B_READ) {
++cnt.v_swapin;
cnt.v_swappgsin += btoc(nbp->b_bcount);
} else {
++cnt.v_swapout;
cnt.v_swappgsout += btoc(nbp->b_bcount);
}
flushchainbuf(nbp);
}
if (bp->b_flags & B_ASYNC) {
autochaindone(bp);
} else {
waitchainbuf(bp, 0, 1);
}
}
/*
* SWAP_PAGER_GETPAGES() - bring pages in from swap
*
@ -886,9 +1112,9 @@ swap_pager_getpages(object, m, count, reqpage)
bp->b_iodone = swp_pager_async_iodone;
bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
bp->b_data = (caddr_t) kva;
crhold(bp->b_rcred);
crhold(bp->b_wcred);
bp->b_data = (caddr_t) kva;
/*
* b_blkno is in page-sized chunks. swapblk is valid, too, so
* we don't have to mask it against SWAPBLK_MASK.
@ -1039,7 +1265,7 @@ swap_pager_putpages(object, m, count, sync, rtvals)
/*
* Step 2
*
* Update nsw parameters from swap_async_max sysctl values.
* Update nsw parameters from swap_async_max sysctl values.
* Do not let the sysop crash the machine with bogus numbers.
*/
@ -1133,10 +1359,10 @@ swap_pager_putpages(object, m, count, sync, rtvals)
if (sync == TRUE) {
bp = getpbuf(&nsw_wcount_sync);
bp->b_flags = B_BUSY;
bp->b_flags = B_BUSY | B_CALL;
} else {
bp = getpbuf(&nsw_wcount_async);
bp->b_flags = B_BUSY | B_ASYNC;
bp->b_flags = B_BUSY | B_CALL | B_ASYNC;
}
bp->b_spc = NULL; /* not used, but NULL-out anyway */
@ -1144,17 +1370,15 @@ swap_pager_putpages(object, m, count, sync, rtvals)
bp->b_proc = &proc0; /* XXX (but without B_PHYS this is ok) */
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
if (bp->b_rcred != NOCRED)
crhold(bp->b_rcred);
if (bp->b_wcred != NOCRED)
crhold(bp->b_wcred);
pbgetvp(swapdev_vp, bp);
bp->b_bcount = PAGE_SIZE * n;
bp->b_bufsize = PAGE_SIZE * n;
bp->b_blkno = blk;
crhold(bp->b_rcred);
crhold(bp->b_wcred);
pbgetvp(swapdev_vp, bp);
s = splvm();
for (j = 0; j < n; ++j) {
@ -1172,8 +1396,12 @@ swap_pager_putpages(object, m, count, sync, rtvals)
vm_page_flag_set(mreq, PG_SWAPINPROG);
bp->b_pages[j] = mreq;
}
bp->b_flags |= B_CALL;
bp->b_npages = n;
/*
* Must set dirty range for NFS to work.
*/
bp->b_dirtyoff = 0;
bp->b_dirtyend = bp->b_bcount;
cnt.v_swapout++;
cnt.v_swappgsout += bp->b_npages;
@ -1187,8 +1415,6 @@ swap_pager_putpages(object, m, count, sync, rtvals)
if (sync == FALSE) {
bp->b_iodone = swp_pager_async_iodone;
bp->b_dirtyoff = 0;
bp->b_dirtyend = bp->b_bcount;
VOP_STRATEGY(bp->b_vp, bp);
for (j = 0; j < n; ++j)
@ -1220,7 +1446,6 @@ swap_pager_putpages(object, m, count, sync, rtvals)
for (j = 0; j < n; ++j)
rtvals[i+j] = VM_PAGER_PEND;
/*
* Now that we are through with the bp, we can call the
* normal async completion, which frees everything up.

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_pager.c,v 1.42 1999/01/21 10:15:24 dillon Exp $
* $Id: vm_pager.c,v 1.43 1999/01/24 02:32:15 dillon Exp $
*/
/*
@ -72,9 +72,11 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/buf.h>
#include <sys/ucred.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@ -251,6 +253,25 @@ vm_pager_deallocate(object)
(*pagertab[object->type]->pgo_dealloc) (object);
}
/*
* vm_pager_strategy:
*
* called with no specific spl
* Execute strategy routine directly to pager.
*/
void
vm_pager_strategy(vm_object_t object, struct buf *bp)
{
if (pagertab[object->type]->pgo_strategy) {
(*pagertab[object->type]->pgo_strategy)(object, bp);
} else {
bp->b_flags |= B_ERROR;
bp->b_error = ENXIO;
biodone(bp);
}
}
/*
* vm_pager_get_pages() - inline, see vm/vm_pager.h
* vm_pager_put_pages() - inline, see vm/vm_pager.h
@ -442,3 +463,136 @@ relpbuf(bp, pfreecnt)
}
splx(s);
}
/********************************************************
* CHAINING FUNCTIONS *
********************************************************
*
* These functions support recursion of I/O operations
* on bp's, typically by chaining one or more 'child' bp's
* to the parent. Synchronous, asynchronous, and semi-synchronous
* chaining is possible.
*/
/*
* vm_pager_chain_iodone:
*
* io completion routine for child bp. Currently we fudge a bit
* on dealing with b_resid. Since users of these routines may issue
* multiple children simultaniously, sequencing of the error can be lost.
*/
static void
vm_pager_chain_iodone(struct buf *nbp)
{
struct buf *bp;
if ((bp = nbp->b_chain.parent) != NULL) {
if (nbp->b_flags & B_ERROR) {
bp->b_flags |= B_ERROR;
bp->b_error = nbp->b_error;
} else if (nbp->b_resid != 0) {
bp->b_flags |= B_ERROR;
bp->b_error = EINVAL;
} else {
bp->b_resid -= nbp->b_bcount;
}
nbp->b_chain.parent = NULL;
--bp->b_chain.count;
if (bp->b_flags & B_WANTED) {
bp->b_flags &= ~B_WANTED;
wakeup(bp);
}
if (!bp->b_chain.count && (bp->b_flags & B_AUTOCHAINDONE)) {
bp->b_flags &= ~B_AUTOCHAINDONE;
if (bp->b_resid != 0 && !(bp->b_flags & B_ERROR)) {
bp->b_flags |= B_ERROR;
bp->b_error = EINVAL;
}
biodone(bp);
}
}
nbp->b_flags |= B_DONE;
nbp->b_flags &= ~(B_ASYNC|B_WANTED);
relpbuf(nbp, NULL);
}
/*
* getchainbuf:
*
* Obtain a physical buffer and chain it to its parent buffer. When
* I/O completes, the parent buffer will be B_SIGNAL'd. Errors are
* automatically propogated to the parent
*/
struct buf *
getchainbuf(struct buf *bp, struct vnode *vp, int flags)
{
struct buf *nbp = getpbuf(NULL);
nbp->b_chain.parent = bp;
++bp->b_chain.count;
if (bp->b_chain.count > 4)
waitchainbuf(bp, 4, 0);
nbp->b_flags = B_BUSY | B_CALL | (bp->b_flags & B_ORDERED) | flags;
nbp->b_proc = &proc0;
nbp->b_rcred = nbp->b_proc->p_ucred;
nbp->b_wcred = nbp->b_proc->p_ucred;
nbp->b_iodone = vm_pager_chain_iodone;
crhold(nbp->b_rcred);
crhold(nbp->b_wcred);
if (vp)
pbgetvp(vp, nbp);
return(nbp);
}
void
flushchainbuf(struct buf *nbp)
{
if (nbp->b_bcount) {
nbp->b_bufsize = nbp->b_bcount;
if ((nbp->b_flags & B_READ) == 0)
nbp->b_dirtyend = nbp->b_bcount;
VOP_STRATEGY(nbp->b_vp, nbp);
} else {
biodone(nbp);
}
}
void
waitchainbuf(struct buf *bp, int count, int done)
{
int s;
s = splbio();
while (bp->b_chain.count > count) {
bp->b_flags |= B_WANTED;
tsleep(bp, PRIBIO + 4, "bpchain", 0);
}
if (done) {
if (bp->b_resid != 0 && !(bp->b_flags & B_ERROR)) {
bp->b_flags |= B_ERROR;
bp->b_error = EINVAL;
}
biodone(bp);
}
splx(s);
}
void
autochaindone(struct buf *bp)
{
int s;
s = splbio();
if (bp->b_chain.count == 0)
biodone(bp);
else
bp->b_flags |= B_AUTOCHAINDONE;
splx(s);
}

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vm_pager.h 8.4 (Berkeley) 1/12/94
* $Id: vm_pager.h,v 1.19 1999/01/21 10:15:47 dillon Exp $
* $Id: vm_pager.h,v 1.20 1999/01/24 02:32:15 dillon Exp $
*/
/*
@ -50,6 +50,8 @@
TAILQ_HEAD(pagerlst, vm_object);
struct buf;
struct pagerops {
void (*pgo_init) __P((void)); /* Initialize pager. */
vm_object_t (*pgo_alloc) __P((void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t)); /* Allocate pager. */
@ -58,6 +60,7 @@ struct pagerops {
void (*pgo_putpages) __P((vm_object_t, vm_page_t *, int, int, int *)); /* Put (write) page. */
boolean_t (*pgo_haspage) __P((vm_object_t, vm_pindex_t, int *, int *)); /* Does pager have page? */
void (*pgo_pageunswapped) __P((vm_page_t));
void (*pgo_strategy) __P((vm_object_t, struct buf *));
};
/*
@ -101,6 +104,11 @@ vm_offset_t vm_pager_map_page __P((vm_page_t));
void vm_pager_sync __P((void));
void vm_pager_unmap_pages __P((vm_offset_t, int));
void vm_pager_unmap_page __P((vm_offset_t));
void vm_pager_strategy __P((vm_object_t object, struct buf *bp));
struct buf *getchainbuf(struct buf *bp, struct vnode *vp, int flags);
void flushchainbuf(struct buf *nbp);
void waitchainbuf(struct buf *bp, int count, int done);
void autochaindone(struct buf *bp);
static __inline int
vm_pager_get_pages(
@ -149,7 +157,6 @@ vm_pager_page_unswapped(vm_page_t m)
(*pagertab[m->object->type]->pgo_pageunswapped)(m);
}
#endif
#endif /* _VM_PAGER_ */

View File

@ -47,7 +47,8 @@
.Op Fl cdeguv
.Op Fl s Ar option
.Op Fl r Ar option
.Ar special_file Ar regular_file
.Op Fl S Ar value
.Ar special_file Ar [regular_file]
.Op Ar feature
.Nm vnconfig
.Fl a
@ -65,7 +66,10 @@ with the regular file
.Ar regular_file
allowing the latter to be accessed as though it were a disk.
Hence a regular file within the filesystem can be used for swapping
or can contain a filesystem that is mounted in the name space.
or can contain a filesystem that is mounted in the name space. If you
want to use swap backing store for your device instead of a file, you
can leave regular_file out and specify the size of the block device
with the -S option.
.Pp
Options indicate an action to be performed:
.Bl -tag -width indent
@ -124,6 +128,11 @@ Set
The list of allowed flags and their meanings are the same as for the
.Fl r
option.
.It Fl S Ar value{k,m,g,t}
If no regular file is specified, VN will use swap for backing store.
This option specifies the size of the device. For example, '23m' for
23 megabytes. The VN device will round the size up to a machine page boundry.
Filesystems up to 7.9 terrabytes are supported.
.It Fl u
Disable and ``unconfigure'' the device.
.It Fl v

View File

@ -43,7 +43,7 @@
static char sccsid[] = "@(#)vnconfig.c 8.1 (Berkeley) 12/15/93";
#endif
static const char rcsid[] =
"$Id: vnconfig.c,v 1.7 1997/10/27 07:55:31 charnier Exp $";
"$Id: vnconfig.c,v 1.8 1999/01/26 04:53:09 peter Exp $";
#endif /* not lint */
#include <err.h>
@ -66,7 +66,9 @@ static const char rcsid[] =
struct vndisk {
char *dev;
char *file;
char *autolabel;
int flags;
int size;
char *oarg;
} vndisks[MAXVNDISK];
@ -95,6 +97,8 @@ void getoptions __P((struct vndisk *, char *));
char *rawdevice __P((char *));
void readconfig __P((int));
static void usage __P((void));
static int getsize(const char *arg);
static void do_autolabel(const char *dev, const char *label);
int what_opt __P((char *, u_long *));
int
@ -103,9 +107,11 @@ main(argc, argv)
{
register int i, rv;
int flags = 0;
int size = 0;
char *autolabel = NULL;
configfile = _PATH_VNTAB;
while ((i = getopt(argc, argv, "acdef:gr:s:uv")) != -1)
while ((i = getopt(argc, argv, "acdef:gr:s:S:L:uv")) != -1)
switch (i) {
/* all -- use config file */
@ -166,6 +172,14 @@ main(argc, argv)
verbose++;
break;
case 'S':
size = getsize(optarg);
break;
case 'L':
autolabel = optarg;
break;
default:
usage();
}
@ -184,6 +198,8 @@ main(argc, argv)
vndisks[0].dev = argv[optind++];
vndisks[0].file = argv[optind++];
vndisks[0].flags = flags;
vndisks[0].size = size;
vndisks[0].autolabel = autolabel;
if (optind < argc)
getoptions(&vndisks[0], argv[optind]);
nvndisks = 1;
@ -235,6 +251,7 @@ config(vnp)
return(1);
}
vnio.vn_file = file;
vnio.vn_size = vnp->size; /* non-zero only if swap backed */
/*
* Disable the device
@ -274,9 +291,20 @@ config(vnp)
if (rv) {
warn("VNIOCATTACH");
flags &= ~VN_ENABLE;
} else if (verbose)
printf("%s: %d bytes on %s\n",
dev, vnio.vn_size, file);
} else {
if (verbose) {
printf(
"%s: %d bytes on %s\n",
dev, vnio.vn_size, file
);
}
/*
* autolabel
*/
if (vnp->autolabel) {
do_autolabel(vnp->dev, vnp->autolabel);
}
}
}
/*
* Set an option
@ -377,8 +405,14 @@ readconfig(flags)
while (!EOL(*cp) && !WHITE(*cp))
cp++;
*cp++ = '\0';
vndisks[ix].file = malloc(cp - sp);
strcpy(vndisks[ix].file, sp);
if (*sp == '%' && strtol(sp + 1, NULL, 0) > 0) {
vndisks[ix].size = getsize(sp + 1);
} else {
vndisks[ix].file = malloc(cp - sp);
strcpy(vndisks[ix].file, sp);
}
while (!EOL(*cp) && WHITE(*cp))
cp++;
vndisks[ix].flags = flags;
@ -452,3 +486,52 @@ usage()
"usage: vnconfig [-acdefguv] [-s option] [-r option] [special-device file]\n");
exit(1);
}
static int
getsize(const char *arg)
{
char *ptr;
int pgsize = getpagesize();
quad_t size = strtoq(arg, &ptr, 0);
switch(tolower(*ptr)) {
case 't':
/*
* GULP! Terrabytes. It's actually possible to create
* a 7.9 TB VN device, though newfs can't handle any single
* filesystem larger then 1 TB.
*/
size *= 1024;
/* fall through */
case 'g':
size *= 1024;
/* fall through */
default:
case 'm':
size *= 1024;
/* fall through */
case 'k':
size *= 1024;
/* fall through */
case 'c':
break;
}
size = (size + pgsize - 1) / pgsize;
return((int)size);
}
/*
* DO_AUTOLABEL
*
* Automatically label the device. This will wipe any preexisting
* label.
*/
static void
do_autolabel(const char *dev, const char *label)
{
/* XXX not yet implemented */
fprintf(stderr, "autolabel not yet implemented, sorry\n");
exit(1);
}