This commit was generated by cvs2svn to compensate for changes in r39330,

which included commits to RCS files with non-trunk default branches.
This commit is contained in:
grog 1998-09-16 05:56:21 +00:00
commit eb613b0ffa
69 changed files with 24705 additions and 0 deletions

37
lkm/vinum/COPYRIGHT Normal file
View File

@ -0,0 +1,37 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
*/

26
lkm/vinum/Makefile Normal file
View File

@ -0,0 +1,26 @@
# $Id: Makefile.lkm.lite,v 1.2 1998/08/13 06:07:29 grog Exp grog $
.PATH: ${.CURDIR}/../../sys/dev/ccd
KMOD= vinum_mod
SRCS= vinum.c vinum.h vnode_if.h parser.c config.c io.c util.c vinumhdr.h request.h \
state.c memory.c request.c lock.c vinumext.h vinumio.h vinumkw.h \
vinumstate.h vinumvar.h revive.c vinumioctl.c interrupt.c
NOMAN=
PSEUDO_LKM=
CFLAGS = -I. -O -g -I/usr/include/machine -DDEBUG -Wall -Wno-unused -Wno-parentheses
CLEANFILES+= vinum.h vnode_if.h vnode_if.c
all:
# We don't need this, but the Makefile wants it
vinum.h:
touch $@
state.h: maketabs vinumstate.h
./maketabs >state.h
maketabs: maketabs.c
${CC} -g -o maketabs maketabs.c
.include <bsd.kmod.mk>

1712
lkm/vinum/config.c Normal file

File diff suppressed because it is too large Load Diff

190
lkm/vinum/interrupt.c Normal file
View File

@ -0,0 +1,190 @@
/* interrupt.c: bottom half of the driver */
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: interrupt.c,v 1.1 1998/08/13 06:12:27 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
#include <miscfs/specfs/specdev.h>
#include <sys/resourcevar.h>
void complete_raid5_write(struct rqelement *);
void freerq(struct request *rq);
void free_rqg(struct rqgroup *rqg);
void complete_rqe(struct buf *bp);
void sdio_done(struct buf *bp);
/* Take a completed buffer, transfer the data back if
* it's a read, and complete the high-level request
* if this is the last subrequest.
*
* The bp parameter is in fact a struct rqelement, which
* includes a couple of extras at the end.
*/
void
complete_rqe(struct buf *bp)
{
BROKEN_GDB;
struct rqelement *rqe;
struct request *rq;
struct rqgroup *rqg;
struct buf *ubp; /* user buffer */
rqe = (struct rqelement *) bp; /* point to the element element that completed */
rqg = rqe->rqg; /* and the request group */
rq = rqg->rq; /* and the complete request */
if ((bp->b_flags & B_ERROR) != 0) { /* transfer in error */
if (bp->b_error != 0) /* did it return a number? */
rq->error = bp->b_error; /* yes, put it in. */
else if (rq->error == 0) /* no: do we have one already? */
rq->error = EIO; /* no: catchall "I/O error" */
if (rq->error == EIO) /* I/O error, */
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* take the subdisk down */
}
/* Now update the statistics */
if (bp->b_flags & B_READ) { /* read operation */
DRIVE[rqe->driveno].reads++;
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
SD[rqe->sdno].reads++;
SD[rqe->sdno].bytes_read += bp->b_bcount;
PLEX[rqe->rqg->plexno].reads++;
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
} else { /* write operation */
DRIVE[rqe->driveno].writes++;
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
SD[rqe->sdno].writes++;
SD[rqe->sdno].bytes_written += bp->b_bcount;
PLEX[rqe->rqg->plexno].writes++;
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
}
ubp = rq->bp; /* user buffer */
rqg->active--; /* one less request active */
if (rqg->active == 0) /* request group finished, */
rq->active--; /* one less */
if (rq->active == 0) { /* request finished, */
#if DEBUG
if (debug & 4) {
if (ubp->b_resid != 0) /* still something to transfer? */
Debugger("resid");
{
int i;
for (i = 0; i < ubp->b_bcount; i += 512) /* XXX debug */
if (((char *) ubp->b_data)[i] != '<') { /* and not what we expected */
printf("At 0x%x (offset 0x%x): '%c' (0x%x)\n",
(int) (&((char *) ubp->b_data)[i]),
i,
((char *) ubp->b_data)[i],
((char *) ubp->b_data)[i]);
Debugger("complete_request checksum");
}
}
}
#endif
if (rq->error) { /* did we have an error? */
ubp->b_flags |= B_ERROR; /* yes, propagate to user */
ubp->b_error = rq->error;
} else
ubp->b_resid = 0; /* completed our transfer */
if (rq->isplex == 0) /* volume request, */
VOL[rq->volplex.volno].active--; /* another request finished */
biodone(ubp); /* top level buffer completed */
freerq(rq); /* return the request storage */
}
}
/* Free a request block and anything hanging off it */
void
freerq(struct request *rq)
{
BROKEN_GDB;
struct rqgroup *rqg;
struct rqgroup *nrqg; /* next in chain */
int rqno;
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
for (rqno = 0; rqno < rqg->count; rqno++)
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
Free(rqg->rqe[rqno].b.b_data); /* free it */
nrqg = rqg->next; /* note the next one */
Free(rqg); /* and free this one */
}
Free(rq); /* free the request itself */
}
void
free_rqg(struct rqgroup *rqg)
{
if ((rqg->flags & XFR_GROUPOP) /* RAID 5 request */
&&(rqg->rqe) /* got a buffer structure */
&&(rqg->rqe->b.b_data)) /* and it has a buffer allocated */
Free(rqg->rqe->b.b_data); /* free it */
}
/* I/O on subdisk completed */
void
sdio_done(struct buf *bp)
{
struct sdbuf *sbp;
sbp = (struct sdbuf *) bp;
if (sbp->b.b_flags & B_ERROR) { /* had an error */
bp->b_flags |= B_ERROR;
bp->b_error = sbp->b.b_error;
}
bp->b_resid = sbp->b.b_resid;
biodone(sbp->bp); /* complete the caller's I/O */
/* Now update the statistics */
if (bp->b_flags & B_READ) { /* read operation */
DRIVE[sbp->driveno].reads++;
DRIVE[sbp->driveno].bytes_read += bp->b_bcount;
SD[sbp->sdno].reads++;
SD[sbp->sdno].bytes_read += bp->b_bcount;
} else { /* write operation */
DRIVE[sbp->driveno].writes++;
DRIVE[sbp->driveno].bytes_written += bp->b_bcount;
SD[sbp->sdno].writes++;
SD[sbp->sdno].bytes_written += bp->b_bcount;
}
Free(sbp);
}

886
lkm/vinum/io.c Normal file
View File

@ -0,0 +1,886 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: io.c,v 1.16 1998/08/10 23:47:21 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#if __FreeBSD__ < 3 /* this is in sys/disklabel.h in 3.0 and on */
#define DTYPE_VINUM 12 /* vinum volume */
#endif
#define REALLYKERNEL
#include "vinumhdr.h"
#include <miscfs/specfs/specdev.h>
extern jmp_buf command_fail; /* return on a failed command */
struct _ioctl_reply *ioctl_reply; /* data pointer, for returning error messages */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
/* pointer to ioctl p parameter, to save passing it around */
extern struct proc *myproc;
/* Open the device associated with the drive, and set drive's vp */
int
open_drive(struct drive *drive, struct proc *p)
{
BROKEN_GDB;
struct nameidata nd;
struct vattr va;
int error;
if (drive->devicename[0] == '\0') /* no device name */
sprintf(drive->devicename, "/dev/%s", drive->label.name); /* get it from the drive name */
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, drive->devicename, p);
error = vn_open(&nd, FREAD | FWRITE, 0); /* open the device */
if (error != 0) { /* can't open? */
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = error;
printf("vinum open_drive %s: failed with error %d\n", drive->devicename, error); /* XXX */
return error;
}
drive->vp = nd.ni_vp;
drive->p = p;
if (drive->vp->v_usecount > 1) { /* already in use? */
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = EBUSY;
printf("vinum open_drive %s: Drive in use\n", drive->devicename); /* XXX */
return EBUSY;
}
error = VOP_GETATTR(drive->vp, &va, NOCRED, p);
if (error) {
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = error;
printf("vinum open_drive %s: GETAATTR returns error %d\n", drive->devicename, error); /* XXX */
return error;
}
drive->dev = va.va_rdev; /* device */
if (va.va_type != VBLK) { /* only consider block devices */
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1); /* this also closes the drive */
drive->lasterror = ENOTBLK;
printf("vinum open_drive %s: Not a block device\n", drive->devicename); /* XXX */
return ENOTBLK;
}
drive->vp->v_numoutput = 0;
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
return 0;
}
/* Set some variables in the drive struct
* in more convenient form. Return error indication */
int
set_drive_parms(struct drive *drive)
{
drive->blocksize = BLKDEV_IOSIZE; /* XXX do we need this? */
drive->secsperblock = drive->blocksize /* number of sectors per block */
/ drive->partinfo.disklab->d_secsize;
/* Now update the label part */
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
#if __FreeBSD__ >= 3
getmicrotime(&drive->label.date_of_birth); /* and current time */
#else
drive->label.date_of_birth = time; /* and current time */
#endif
drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
*((u_int64_t) drive->partinfo.disklab->d_secsize);
/* number of sectors available for subdisks */
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
/* XXX Bug in 3.0 as of January 1998: you can open
* non-existent slices. They have a length of 0 */
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
set_drive_state(drive->driveno, drive_down, 1);
printf("vinum open_drive %s: Drive too small\n", drive->devicename); /* XXX */
drive->lasterror = ENOSPC;
return ENOSPC;
}
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
drive->freelist = (struct drive_freelist *)
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
if (drive->freelist == NULL) /* can't malloc, dammit */
return ENOSPC;
drive->freelist_entries = 1; /* just (almost) the complete drive */
drive->freelist[0].offset = DATASTART; /* starts here */
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
set_drive_state(drive->driveno, drive_up, 1); /* our drive is accessible */
return 0;
}
/* Initialize a drive: open the device and add device
* information */
int
init_drive(struct drive *drive)
{
BROKEN_GDB;
int error;
if (drive->devicename[0] == '\0') { /* no device name yet, default to drive name */
drive->lasterror = EINVAL;
printf("vinum: Can't open drive without drive name\n"); /* XXX */
return EINVAL;
}
error = open_drive(drive, myproc); /* open the drive */
if (error)
return error;
error = VOP_IOCTL(drive->vp, /* get the partition information */
DIOCGPART,
(caddr_t) & drive->partinfo,
FREAD,
NOCRED,
myproc);
if (error) {
printf("vinum open_drive %s: Can't get partition information, error %d\n",
drive->devicename,
error); /* XXX */
close_drive(drive);
drive->lasterror = error;
set_drive_state(drive->driveno, drive_down, 1);
return error;
}
if (drive->partinfo.part->p_fstype != 0) { /* not plain */
drive->lasterror = EFTYPE;
printf("vinum open_drive %s: Wrong partition type for vinum\n", drive->devicename); /* XXX */
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
return EFTYPE;
}
return set_drive_parms(drive); /* set various odds and ends */
}
/* Close a drive if it's open. No errors */
void
close_drive(struct drive *drive)
{
if (drive->vp) {
vn_close(drive->vp, FREAD | FWRITE, NOCRED, drive->p);
drive->vp = NULL;
}
}
/* Remove drive from the configuration.
* Caller must ensure that it isn't active
*/
void
remove_drive(int driveno)
{
BROKEN_GDB;
struct drive *drive = &vinum_conf.drive[driveno];
long long int nomagic = VINUM_NOMAGIC; /* no magic number */
write_drive(drive, /* obliterate the magic, but leave a hint */
(char *) &nomagic,
8,
VINUM_LABEL_OFFSET);
close_drive(drive); /* and close it */
drive->state = drive_unallocated; /* and forget everything we knew about it */
save_config(); /* and save the updated configuration */
}
/* Transfer drive data. Usually called from one of these defines;
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
*
* Return error number
*/
int
driveio(struct drive *drive, void *buf, size_t length, off_t offset, int flag)
{
BROKEN_GDB;
int error;
struct buf *bp;
int spl;
error = 0;
/* Get a buffer */
bp = (struct buf *) Malloc(sizeof(struct buf)); /* get a buffer */
CHECKALLOC(bp, "Can't allocate memory");
bzero(&buf, sizeof(buf));
bp->b_flags = B_BUSY | flag; /* tell us when it's done */
bp->b_iodone = drive_io_done; /* here */
bp->b_proc = myproc; /* process */
bp->b_dev = drive->vp->v_un.vu_specinfo->si_rdev; /* device */
if (offset & (drive->partinfo.disklab->d_secsize - 1)) /* not on a block boundary */
bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
bp->b_data = buf;
bp->b_vp = drive->vp; /* vnode */
bp->b_bcount = length;
bp->b_bufsize = length;
(*bdevsw[major(bp->b_dev)]->d_strategy) (bp); /* initiate the transfer */
spl = splbio();
while ((bp->b_flags & B_DONE) == 0) {
bp->b_flags |= B_CALL; /* wake me again */
tsleep((caddr_t) bp, PRIBIO, "driveio", 0); /* and wait for it to complete */
}
splx(spl);
if (bp->b_flags & B_ERROR) /* didn't work */
error = bp->b_error; /* get the error return */
Free(bp); /* then return the buffer */
return error;
}
/* Read data from a drive
* Return error number
*/
int
read_drive(struct drive *drive, void *buf, size_t length, off_t offset)
{
BROKEN_GDB;
int error;
struct buf *bp;
daddr_t nextbn;
long bscale;
struct uio uio;
struct iovec iov;
daddr_t blocknum; /* block number */
int blockoff; /* offset in block */
int count; /* amount to transfer */
iov.iov_base = buf;
iov.iov_len = length;
uio.uio_iov = &iov;
uio.uio_iovcnt = length;
uio.uio_offset = offset;
uio.uio_resid = length;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_READ;
uio.uio_procp = myproc;
bscale = btodb(drive->blocksize); /* mask off offset from block number */
do {
blocknum = btodb(uio.uio_offset) & ~(bscale - 1); /* get the block number */
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
uio.uio_resid);
/* XXX Check this. I think the test is wrong */
if (drive->vp->v_lastr + bscale == blocknum) { /* did our last read finish in this block? */
nextbn = blocknum + bscale; /* note the end of the transfer */
error = breadn(drive->vp, /* and read with read-ahead */
blocknum,
(int) drive->blocksize,
&nextbn,
(int *) &drive->blocksize,
1,
NOCRED,
&bp);
} else /* random read: just read this block */
error = bread(drive->vp, blocknum, (int) drive->blocksize, NOCRED, &bp);
drive->vp->v_lastr = blocknum; /* note the last block we read */
count = min(count, drive->blocksize - bp->b_resid);
if (error) {
brelse(bp);
return error;
}
error = uiomove((char *) bp->b_data + blockoff, count, &uio); /* move the data */
brelse(bp);
}
while (error == 0 && uio.uio_resid > 0 && count != 0);
return error;
}
/* Write data to a drive
* Return error number
*/
int
write_drive(struct drive *drive, void *buf, size_t length, off_t offset)
{
BROKEN_GDB;
int error;
struct buf *bp;
struct uio uio;
struct iovec iov;
daddr_t blocknum; /* block number */
int blockoff; /* offset in block */
int count; /* amount to transfer */
int blockshift;
if (drive->state == drive_down) /* currently down */
return 0; /* ignore */
if (drive->vp == NULL) {
drive->lasterror = ENODEV;
return ENODEV; /* not configured yet */
}
iov.iov_base = buf;
iov.iov_len = length;
uio.uio_iov = &iov;
uio.uio_iovcnt = length;
uio.uio_offset = offset;
uio.uio_resid = length;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_WRITE;
uio.uio_procp = myproc;
error = 0;
blockshift = btodb(drive->blocksize) - 1; /* amount to shift block number
* to get sector number */
do {
blocknum = btodb(uio.uio_offset) & ~blockshift; /* get the block number */
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
uio.uio_resid);
if (count == drive->blocksize) /* the whole block */
bp = getblk(drive->vp, blocknum, drive->blocksize, 0, 0); /* just get it */
else /* partial block: */
error = bread(drive->vp, /* read it first */
blocknum,
drive->blocksize,
NOCRED,
&bp);
count = min(count, drive->blocksize - bp->b_resid); /* how much will we transfer now? */
if (error == 0)
error = uiomove((char *) bp->b_data + blockoff, /* move the data to the block */
count,
&uio);
if (error) {
brelse(bp);
drive->lasterror = error;
switch (error) {
case EIO:
set_drive_state(drive->driveno, drive_down, 1);
break;
/* XXX Add other possibilities here */
default:
}
return error;
}
if (count + blockoff == drive->blocksize)
/* The transfer goes to the end of the block. There's
* no need to wait for any more data to arrive. */
bawrite(bp); /* start the write now */
else
bdwrite(bp); /* do a delayed write */
}
while (error == 0 && uio.uio_resid > 0 && count != 0);
if (error)
drive->lasterror = error;
return error; /* OK */
}
/* Wake up on completion */
void
drive_io_done(struct buf *bp)
{
BROKEN_GDB;
wakeup((caddr_t) bp); /* Wachet auf! */
bp->b_flags &= ~B_CALL; /* don't do this again */
}
/* Check a drive for a vinum header. If found,
* update the drive information. We come here
* with a partially populated drive structure
* which includes the device name.
*
* Return information on what we found
*/
enum drive_label_info
read_drive_label(struct drive *drive)
{
BROKEN_GDB;
int error;
int result; /* result of our search */
struct vinum_hdr *vhdr; /* and as header */
error = init_drive(drive); /* find the drive */
if (error) /* find the drive */
return DL_CANT_OPEN; /* not ours */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
CHECKALLOC(vhdr, "Can't allocate memory");
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
if (drive->label.name[0] /* we have a name for this drive */
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
drive->lasterror = EINVAL;
result = DL_WRONG_DRIVE; /* it's the wrong drive */
} else {
set_drive_parms(drive); /* and set other parameters */
result = DL_OURS;
}
/* We copy the drive anyway so that we have
* the correct name in the drive info. This
* may not be the name specified */
drive->label = vhdr->label; /* put in the label information */
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
result = DL_DELETED_LABEL;
else
result = DL_NOT_OURS; /* we could have it, but we don't yet */
Free(vhdr); /* that's all. */
return result;
}
/* Check a drive for a vinum header. If found,
* read configuration information from the drive and
* incorporate the data into the configuration.
*
* Return error number
*/
int
check_drive(char *drivename)
{
BROKEN_GDB;
int error;
struct nameidata nd; /* mount point credentials */
char *config_text; /* read the config info from disk into here */
volatile char *cptr; /* pointer into config information */
char *eptr; /* end pointer into config information */
int driveno;
struct drive *drive;
char *config_line; /* copy the config line to */
driveno = find_drive_by_dev(drivename, 1); /* doesn't exist, create it */
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
strcpy(drive->devicename, drivename); /* put in device name */
if (read_drive_label(drive) == DL_OURS) { /* ours! */
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
CHECKALLOC(config_text, "Can't allocate memory");
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
CHECKALLOC(config_line, "Can't allocate memory");
/* Read in both copies of the configuration information */
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
if (error != 0) {
printf("vinum: Can't read device %s, error %d\n", drive->devicename, error);
Free(config_text);
Free(config_line);
free_drive(drive); /* give it back */
return error;
}
/* XXX At this point, check that the two copies are the same, and do something useful if not.
* In particular, consider which is newer, and what this means for the integrity of the
* data on the drive */
/* Parse the configuration, and add it to the global configuration */
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
volatile int parse_status; /* return value from parse_config */
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
*eptr++ = *cptr++;
*eptr = '\0'; /* and delimit */
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
parse_status = parse_config(config_line, &keyword_set); /* parse the config line */
if (parse_status < 0) { /* error in config */
/* This config should have been parsed in user
* space. If we run into problems here, something
* serious is afoot. Complain and let the user
* snarf the config to see what's wrong */
printf("vinum: Config error on drive %s, aborting integration\n", nd.ni_dirp);
Free(config_text);
Free(config_line);
free_drive(drive); /* give it back */
return EINVAL;
}
}
while (*cptr == '\n')
cptr++; /* skip to next line */
}
Free(config_text);
if ((vinum_conf.flags & VF_READING_CONFIG) == 0) /* not reading config */
updateconfig(0); /* update object states */
printf("vinum: read configuration from %s\n", drivename);
return 0; /* it all worked */
} else { /* no vinum label found */
if (drive->lasterror) {
set_drive_state(drive->driveno, drive_down, 1);
return drive->lasterror;
} else
return ENODEV; /* not our device */
}
}
/* Kludge: kernel printf doesn't handle longs correctly XXX */
static char *lltoa(long long l, char *s);
static char *sappend(char *txt, char *s);
static char *
lltoa(long long l, char *s)
{
if (l < 0) {
*s++ = '-';
l = -l;
}
if (l > 9) {
s = lltoa(l / 10, s);
l %= 10;
}
*s++ = l + '0';
return s;
}
static char *
sappend(char *txt, char *s)
{
while (*s++ = *txt++);
return s - 1;
}
/* Format the configuration in text form into the buffer
* at config. Don't go beyond len bytes
* XXX this stinks. Fix soon. */
void
format_config(char *config, int len)
{
BROKEN_GDB;
int i;
int j;
char *s = config;
bzero(config, len);
/* First write the drive configuration */
for (i = 0; i < vinum_conf.drives_used; i++) {
struct drive *drive;
drive = &vinum_conf.drive[i];
if (drive->state != drive_unallocated) {
sprintf(s,
"drive %s state %s device %s\n",
drive->label.name,
drive_state(drive->state),
drive->devicename);
while (*s)
s++; /* find the end */
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* Then the volume configuration */
for (i = 0; i < vinum_conf.volumes_used; i++) {
struct volume *vol;
vol = &vinum_conf.volume[i];
if (vol->state != volume_unallocated) {
if (vol->preferred_plex >= 0) /* preferences, */
sprintf(s,
"volume %s state %s readpol prefer %s",
vol->name,
volume_state(vol->state),
vinum_conf.plex[vol->preferred_plex].name);
else /* default round-robin */
sprintf(s,
"volume %s state %s",
vol->name,
volume_state(vol->state));
while (*s)
s++; /* find the end */
s = sappend("\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* Then the plex configuration */
for (i = 0; i < vinum_conf.plexes_used; i++) {
struct plex *plex;
plex = &vinum_conf.plex[i];
if (plex->state != plex_unallocated) {
sprintf(s, "plex name %s state %s org %s ",
plex->name,
plex_state(plex->state),
plex_org(plex->organization));
while (*s)
s++; /* find the end */
if ((plex->organization == plex_striped)
) {
sprintf(s, "%db ", (int) plex->stripesize);
while (*s)
s++; /* find the end */
}
if (plex->volno >= 0) /* we have a volume */
sprintf(s, "vol %s ", vinum_conf.volume[plex->volno].name);
while (*s)
s++; /* find the end */
for (j = 0; j < plex->subdisks; j++) {
sprintf(s, " sd %s", vinum_conf.sd[plex->sdnos[j]].name);
}
s = sappend("\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* And finally the subdisk configuration */
for (i = 0; i < vinum_conf.subdisks_used; i++) {
struct sd *sd = &vinum_conf.sd[i]; /* XXX */
if (vinum_conf.sd[i].state != sd_unallocated) {
sprintf(s,
"sd name %s drive %s plex %s state %s len ",
sd->name,
vinum_conf.drive[sd->driveno].label.name,
vinum_conf.plex[sd->plexno].name,
sd_state(sd->state));
while (*s)
s++; /* find the end */
s = lltoa(sd->sectors, s);
s = sappend("b driveoffset ", s);
s = lltoa(sd->driveoffset, s);
s = sappend("b plexoffset ", s);
s = lltoa(sd->plexoffset, s);
s = sappend("b\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
}
/* Write the configuration to all vinum slices */
int
save_config(void)
{
BROKEN_GDB;
int error;
int written_config; /* set when we firstnwrite the config to disk */
int driveno;
struct drive *drive; /* point to current drive info */
struct vinum_hdr *vhdr; /* and as header */
char *config; /* point to config data */
int wlabel_on; /* to set writing label on/off */
/* don't save the configuration while we're still working on it */
if (vinum_conf.flags & VF_CONFIGURING)
return 0;
written_config = 0; /* no config written yet */
/* Build a volume header */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
CHECKALLOC(vhdr, "Can't allocate config data");
vhdr->magic = VINUM_MAGIC; /* magic number */
vhdr->config_length = MAXCONFIG; /* length of following config info */
config = Malloc(MAXCONFIG); /* get space for the config data */
CHECKALLOC(config, "Can't allocate config data");
format_config(config, MAXCONFIG);
error = 0; /* no errors yet */
for (driveno = 0; driveno < vinum_conf.drives_used; driveno++) {
drive = &vinum_conf.drive[driveno]; /* point to drive */
if (drive->state != drive_down) {
#if (__FreeBSD__ >= 3)
getmicrotime(&drive->label.last_update); /* time of last update is now */
#else
drive->label.last_update = time; /* time of last update is now */
#endif
bcopy((char *) &drive->label, /* and the label info from the drive structure */
(char *) &vhdr->label,
sizeof(vhdr->label));
if ((drive->state != drive_unallocated)
&& (drive->state != drive_uninit)) {
wlabel_on = 1; /* enable writing the label */
error = VOP_IOCTL(drive->vp, /* make the label writeable */
DIOCWLABEL,
(caddr_t) & wlabel_on,
FWRITE,
NOCRED,
myproc);
if (error == 0)
error = write_drive(drive, vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (error == 0)
error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET);
wlabel_on = 0; /* enable writing the label */
VOP_IOCTL(drive->vp, /* make the label non-writeable again */
DIOCWLABEL,
(caddr_t) & wlabel_on,
FWRITE,
NOCRED,
myproc);
if (error) {
printf("vinum: Can't write config to %s, error %d\n", drive->devicename, error);
set_drive_state(drive->driveno, drive_down, 1);
} else
written_config = 1; /* we've written it on at least one drive */
}
}
}
Free(vhdr);
Free(config);
return written_config == 0; /* return 1 if we failed to write config */
}
/* Disk labels are a mess. The correct way to access them
* is with the DIOC[GSW]DINFO ioctls, but some programs, such
* as newfs, access the disk directly, so we have to write
* things there. We do this only on request. If a user
* request tries to read it directly, we fake up one on the fly.
*/
/* get_volume_label returns a label structure to lp, which
* is allocated by the caller */
void
get_volume_label(struct volume *vol, struct disklabel *lp)
{
bzero(lp, sizeof(struct disklabel));
strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
lp->d_type = DTYPE_VINUM;
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
lp->d_rpm = 14400 * vol->plexes; /* to keep them guessing */
lp->d_interleave = 1;
lp->d_flags = 0;
/* Fitting unto the vine, a vinum has a single
* track with all its sectors */
lp->d_secsize = DEV_BSIZE; /* bytes per sector */
lp->d_nsectors = vol->size; /* data sectors per track */
lp->d_ntracks = 1; /* tracks per cylinder */
lp->d_ncylinders = 1; /* data cylinders per unit */
lp->d_secpercyl = vol->size; /* data sectors per cylinder */
lp->d_secperunit = vol->size; /* data sectors per unit */
lp->d_bbsize = BBSIZE;
lp->d_sbsize = SBSIZE;
lp->d_magic = DISKMAGIC;
lp->d_magic2 = DISKMAGIC;
/* Set up partitions a, b and c to be identical
* and the size of the volume. a is UFS, b is
* swap, c is nothing */
lp->d_partitions[0].p_size = vol->size;
lp->d_partitions[0].p_fsize = 1024;
lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */
lp->d_partitions[0].p_frag = 8; /* and fragments per block */
lp->d_partitions[SWAP_PART].p_size = vol->size;
lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
lp->d_partitions[LABEL_PART].p_size = vol->size;
lp->d_npartitions = LABEL_PART + 1;
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
lp->d_checksum = dkcksum(lp);
}
int
write_volume_label(int volno)
{
struct disklabel *lp;
struct buf *bp;
struct disklabel *dlp;
struct volume *vol;
int error;
lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
if (lp == 0)
return ENOMEM;
if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_used) /* invalid volume */
return ENOENT;
vol = &VOL[volno]; /* volume in question */
if (vol->state == volume_unallocated) /* nothing there */
return ENOENT;
get_volume_label(vol, lp); /* get the label */
/* Now write to disk. This code is derived from the
* system writedisklabel (), which does silly things
* like reading the label and refusing to write
* unless it's already there. */
bp = geteblk((int) lp->d_secsize); /* get a buffer */
bp->b_dev = minor(vol->devno) | (CDEV_MAJOR << MAJORDEV_SHIFT); /* our own raw volume */
bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
bp->b_bcount = lp->d_secsize;
bzero(bp->b_data, lp->d_secsize);
dlp = (struct disklabel *) bp->b_data;
*dlp = *lp;
bp->b_flags &= ~B_INVAL;
bp->b_flags |= B_BUSY | B_WRITE;
vinumstrategy(bp); /* write it out */
error = biowait(bp);
bp->b_flags |= B_INVAL | B_AGE;
brelse(bp);
return error;
}
/* Initialize a subdisk */
int
initsd(int sdno)
{
return 0;
}

137
lkm/vinum/lock.c Normal file
View File

@ -0,0 +1,137 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: lock.c,v 1.6 1998/07/28 06:32:57 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
/* Lock routines. Currently, we lock either an individual volume
* or the global configuration. I don't think tsleep and
* wakeup are SMP safe. FIXME XXX */
/* Lock a volume, wait if it's in use */
int
lockvol(struct volume *vol)
{
int error;
while ((vol->flags & VF_LOCKED) != 0) {
vol->flags |= VF_LOCKING;
/* It would seem to make more sense to sleep on
* the address 'vol'. Unfortuntaly we can't
* guarantee that this address won't change due to
* table expansion. The address we choose won't change. */
if ((error = tsleep(&vinum_conf.volume + vol->devno,
PRIBIO | PCATCH,
"volock",
0)) != 0)
return error;
}
vol->flags |= VF_LOCKED;
return 0;
}
/* Unlock a volume and let the next one at it */
void
unlockvol(struct volume *vol)
{
vol->flags &= ~VF_LOCKED;
if ((vol->flags & VF_LOCKING) != 0) {
vol->flags &= ~VF_LOCKING;
wakeup(&vinum_conf.volume + vol->devno);
}
}
/* Lock a plex, wait if it's in use */
int
lockplex(struct plex *plex)
{
int error;
while ((plex->flags & VF_LOCKED) != 0) {
plex->flags |= VF_LOCKING;
/* It would seem to make more sense to sleep on
* the address 'plex'. Unfortuntaly we can't
* guarantee that this address won't change due to
* table expansion. The address we choose won't change. */
if ((error = tsleep(&vinum_conf.plex + plex->sdnos[0],
PRIBIO | PCATCH,
"plexlk",
0)) != 0)
return error;
}
plex->flags |= VF_LOCKED;
return 0;
}
/* Unlock a plex and let the next one at it */
void
unlockplex(struct plex *plex)
{
plex->flags &= ~VF_LOCKED;
if ((plex->flags & VF_LOCKING) != 0) {
plex->flags &= ~VF_LOCKING;
wakeup(&vinum_conf.plex + plex->plexno);
}
}
/* Get a lock for the global config, wait if it's not available */
int
lock_config(void)
{
int error;
while ((vinum_conf.flags & VF_LOCKED) != 0) {
vinum_conf.flags |= VF_LOCKING;
if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0)
return error;
}
vinum_conf.flags |= VF_LOCKED;
return 0;
}
/* Unlock and wake up any waiters */
void
unlock_config(void)
{
vinum_conf.flags &= ~VF_LOCKED;
if ((vinum_conf.flags & VF_LOCKING) != 0) {
vinum_conf.flags &= ~VF_LOCKING;
wakeup(&vinum_conf);
}
}

40
lkm/vinum/makestatetext Executable file
View File

@ -0,0 +1,40 @@
#!/bin/sh
# Make statetexts.h from vinumstate.h
# $Id: makestatetext,v 1.4 1998/03/13 05:36:16 grog Exp grog $
infile=vinumstate.h
ofile=statetexts.h
cat <COPYRIGHT > $ofile
echo >>$ofile "/* Created by $0 on" `date`. "Do not edit */"
echo >>$ofile
echo >>$ofile "/* Drive state texts */"
echo >>$ofile "char *drivestatetext [] =
{ "
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Subdisk state texts */
char *sdstatetext [] =
{
FOO
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Plex state texts */
char *plexstatetext [] =
{
FOO
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Volume state texts */
char *volstatetext [] =
{
FOO
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
FOO

186
lkm/vinum/memory.c Normal file
View File

@ -0,0 +1,186 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: memory.c,v 1.16 1998/08/08 04:43:22 grog Exp grog $
*/
#define REALLYKERNEL
#define USES_VM
#include "vinumhdr.h"
extern jmp_buf command_fail; /* return on a failed command */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
void freedatabuf(struct mc *me);
caddr_t allocdatabuf(struct mc *me);
void
expand_table(void **table, int oldsize, int newsize)
{
if (newsize > oldsize) {
int *temp;
temp = (int *) Malloc(newsize); /* allocate a new table */
CHECKALLOC(temp, "vinum: Can't expand table\n");
if (*table != NULL) { /* already something there, */
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
Free(*table);
}
*table = temp;
}
}
#ifndef DEBUG
/* increase the size of a request block */
void
expandrq(struct plexrq *prq)
{
expand_table((void **) &prq->rqe,
prq->requests * sizeof(struct rqelement),
(prq->requests + RQELTS) * sizeof(struct rqelement));
bzero(&prq->rqe[prq->requests], RQELTS * sizeof(struct rqelement)); /* clear the new part */
prq->rqcount += RQELTS;
}
#endif
#if DEBUG /* XXX debug */
#define MALLOCENTRIES 16384
int malloccount = 0;
int highwater = 0; /* highest index ever allocated */
static struct mc malloced[MALLOCENTRIES];
static total_malloced;
caddr_t
MMalloc(int size, char *file, int line)
{
caddr_t result;
int i;
static int seq = 0;
int s;
struct mc me; /* information to pass to allocdatabuf */
if (malloccount >= MALLOCENTRIES) { /* too many */
printf("vinum: can't allocate table space to trace memory allocation");
return 0; /* can't continue */
}
result = malloc(size, M_DEVBUF, M_WAITOK); /* use malloc for smaller and irregular stuff */
if (result == NULL)
printf("vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
else {
me.flags = 0; /* allocation via malloc */
s = splhigh();
for (i = 0; i < malloccount; i++) {
if (((result + size) > malloced[i].address)
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
Debugger("Malloc overlap");
}
if (result) {
i = malloccount++;
total_malloced += size;
malloced[i].address = result;
malloced[i].size = size;
malloced[i].line = line;
malloced[i].seq = seq++;
malloced[i].flags = me.flags;
malloced[i].databuf = me.databuf; /* only used with kva alloc */
bcopy(file, malloced[i].file, min(strlen(file) + 1, 16));
}
if (malloccount > highwater)
highwater = malloccount;
splx(s);
}
return result;
}
void
FFree(void *mem, char *file, int line)
{
int i;
int s;
s = splhigh();
for (i = 0; i < malloccount; i++) {
if ((caddr_t) mem == malloced[i].address) { /* found it */
bzero(mem, malloced[i].size); /* XXX */
free(mem, M_DEVBUF);
malloccount--;
total_malloced -= malloced[i].size;
if (i < malloccount) /* more coming after */
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
splx(s);
return;
}
}
splx(s);
printf("Freeing unallocated data at 0x%08x from %s, line %d\n", (int) mem, file, line);
Debugger("Free");
}
void
vinum_meminfo(caddr_t data)
{
struct meminfo *m = (struct meminfo *) data;
m->mallocs = malloccount;
m->total_malloced = total_malloced;
m->malloced = malloced;
m->highwater = highwater;
}
int
vinum_mallocinfo(caddr_t data)
{
struct mc *m = (struct mc *) data;
unsigned int ent = *(int *) data; /* 1st word is index */
if (ent >= malloccount)
return ENOENT;
m->address = malloced[ent].address;
m->size = malloced[ent].size;
m->line = malloced[ent].line;
m->seq = malloced[ent].seq;
bcopy(malloced[ent].file, m->file, 16);
return 0;
}
#endif

206
lkm/vinum/parser.c Normal file
View File

@ -0,0 +1,206 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: parser.c,v 1.11 1998/08/10 08:50:42 grog Exp grog $
*/
/* This file contains the parser for the configuration routines. It's used
* both in the kernel and in the user interface program, thus the separate file. */
/* Go through a text and split up into text tokens. These are either non-blank
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
* " characters may be escaped by \, which otherwise has no special meaning.
*
* Delimit by following with a \0, and return pointers to the starts at token [].
* Return the number of tokens found as the return value.
*
* This method has the restriction that a closing " or ' must be followed by
* grey space.
*
* Error conditions are end of line before end of quote, or no space after
* a closing quote. In this case, tokenize() returns -1. */
#include <sys/param.h>
#ifdef KERNEL
#undef KERNEL /* XXX */
#define REALLYKERNEL
#else
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#endif
/* All this mess for a single struct definition */
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/disklabel.h>
#include <sys/mount.h>
#include <sys/device.h>
#include <sys/disk.h>
#include "sys/buf.h"
#include <vinumvar.h>
#include "vinumkw.h"
#include "vinumio.h"
#include "vinumext.h"
#ifdef REALLYKERNEL
#define isspace(c) ((c == ' ') || (c == '\t')) /* check for white space */
#else /* get it from the headers */
#include <ctype.h>
#endif
/* enum keyword is defined in vinumvar.h */
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
/* Normal keywords. These are all the words that vinum knows. */
struct _keywords keywords[] =
{keypair(drive),
keypair(sd),
keypair(subdisk),
keypair(plex),
keypair(volume),
keypair(vol),
keypair(setupstate),
keypair(readpol),
keypair(org),
keypair(name),
keypair(writethrough),
keypair(writeback),
keypair(raw),
keypair(device),
keypair(concat),
keypair(raid5),
keypair(striped),
keypair(plexoffset),
keypair(driveoffset),
keypair(length),
keypair(len),
keypair(state),
keypair(round),
keypair(prefer),
keypair(rename),
keypair(detached),
#ifndef KERNEL /* for vinum(8) only */
#ifdef DEBUG
keypair(debug),
#endif
keypair(attach),
keypair(detach),
keypair(printconfig),
keypair(replace),
keypair(create),
keypair(read),
keypair(modify),
keypair(list),
keypair(l),
keypair(ld),
keypair(ls),
keypair(lp),
keypair(lv),
keypair(info),
keypair(set),
keypair(rm),
keypair(init),
keypair(label),
keypair(resetconfig),
keypair(start),
keypair(stop),
keypair(resetstats)
#endif
};
struct keywordset keyword_set = KEYWORDSET(keywords);
#ifndef KERNEL
struct _keywords flag_keywords[] =
{flagkeypair(f),
flagkeypair(d),
flagkeypair(v),
flagkeypair(s),
flagkeypair(r)
};
struct keywordset flag_set = KEYWORDSET(flag_keywords);
#endif
int
tokenize(char *cptr, char *token[])
{
char delim; /* delimiter for searching for the partner */
int tokennr; /* index of this token */
tokennr = 0; /* none found yet */
for (;;) {
while (isspace(*cptr))
cptr++; /* skip initial white space */
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
return tokennr; /* return number of tokens found */
delim = *cptr;
token[tokennr] = cptr; /* point to it */
tokennr++; /* one more */
/* XXX this is broken. It leaves superfluous \\ characters in the text */
if ((delim == '\'') || (delim == '"')) { /* delimitered */
for (;;) {
cptr++;
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
cptr++; /* move on past */
if (!isspace(*cptr)) /* error, no space after closing quote */
return -1;
*cptr++ = '\0'; /* delimit */
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
return -1;
}
} else { /* not quoted */
while ((*cptr != '\0') && (!isspace(*cptr)) && (*cptr != '\n'))
cptr++;
if (*cptr != '\0') /* not end of the line, */
*cptr++ = '\0'; /* delimit and move to the next */
}
}
}
/* Find a keyword and return an index */
enum keyword
get_keyword(char *name, struct keywordset *keywordset)
{
int i;
struct _keywords *keywords = keywordset->k; /* point to the keywords */
for (i = 0; i < keywordset->size; i++)
if (!strcmp(name, keywords[i].name))
return (enum keyword) keywords[i].keyword;
return kw_invalid_keyword;
}

882
lkm/vinum/request.c Normal file
View File

@ -0,0 +1,882 @@
/* XXX to do:
* Decide where we need splbio ()
*/
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: request.c,v 1.17 1998/08/13 06:04:47 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
#include <miscfs/specfs/specdev.h>
#include <sys/resourcevar.h>
/* pointer to ioctl p parameter, to save passing it around */
extern struct proc *myproc;
enum requeststatus bre(struct request *rq,
int plexno,
daddr_t * diskstart,
daddr_t diskend);
enum requeststatus bre5(struct request *rq,
int plexno,
daddr_t * diskstart,
daddr_t diskend);
enum requeststatus build_read_request(struct request *rq, int volplexno);
enum requeststatus build_write_request(struct request *rq);
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
void freerq(struct request *rq);
void free_rqg(struct rqgroup *rqg);
int find_alternate_sd(struct request *rq);
int check_range_covered(struct request *);
void complete_rqe(struct buf *bp);
void complete_raid5_write(struct rqelement *);
int abortrequest(struct request *rq, int error);
void sdio(struct buf *bp);
void sdio_done(struct buf *bp);
int vinum_bounds_check(struct buf *bp, struct volume *vol);
caddr_t allocdatabuf(struct rqelement *rqe);
void freedatabuf(struct rqelement *rqe);
void
vinumstrategy(struct buf *bp)
{
BROKEN_GDB;
int volno;
struct volume *vol = NULL;
int s;
struct devcode *device = (struct devcode *) &bp->b_dev; /* decode device number */
enum requeststatus status;
switch (device->type) {
case VINUM_SD_TYPE:
sdio(bp);
return;
/* In fact, vinum doesn't handle drives: they're
* handled directly by the disk drivers */
case VINUM_DRIVE_TYPE:
default:
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
biodone(bp);
return;
case VINUM_VOLUME_TYPE: /* volume I/O */
volno = VOLNO(bp->b_dev);
vol = &VOL[volno];
if (vol->state != volume_up) { /* can't access this volume */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
biodone(bp);
return;
}
if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */
biodone(bp); /* have nothing to do with this */
return;
}
/* FALLTHROUGH */
/* Plex I/O is pretty much the same as volume I/O
* for a single plex. Indicate this by passing a NULL
* pointer (set above) for the volume */
case VINUM_PLEX_TYPE:
bp->b_resid = bp->b_bcount; /* transfer everything */
vinumstart(bp, 0);
return;
}
}
/* Start a transfer. Return -1 on error,
* 0 if OK, 1 if we need to retry.
* Parameter reviveok is set when doing
* transfers for revives: it allows transfers to
* be started immediately when a revive is in
* progress. During revive, normal transfers
* are queued if they share address space with
* a currently active revive operation. */
int
vinumstart(struct buf *bp, int reviveok)
{
BROKEN_GDB;
int plexno;
int maxplex; /* maximum number of plexes to handle */
struct volume *vol;
struct rqgroup *rqg; /* current plex's requests */
struct rqelement *rqe; /* individual element */
struct request *rq; /* build up our request here */
int rqno; /* index in request list */
enum requeststatus status;
/* XXX In these routines, we're assuming that
* we will always be called with bp->b_bcount
* which is a multiple of the sector size. This
* is a reasonable assumption, since we are only
* called from system routines. Should we check
* anyway? */
if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */
bp->b_error = EINVAL; /* invalid size */
bp->b_flags |= B_ERROR;
biodone(bp);
return -1;
}
rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
if (rq == NULL) { /* can't do it */
bp->b_error = ENOMEM; /* can't get memory */
bp->b_flags |= B_ERROR;
biodone(bp);
return -1;
}
bzero(rq, sizeof(struct request));
/* Note the volume ID. This can be NULL, which
* the request building functions use as an
* indication for single plex I/O */
rq->bp = bp; /* and the user buffer struct */
if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */
rq->volplex.volno = VOLNO(bp->b_dev); /* get the volume number */
vol = &VOL[rq->volplex.volno]; /* and point to it */
vol->active++; /* one more active request */
maxplex = vol->plexes; /* consider all its plexes */
} else {
vol = NULL; /* no volume */
rq->volplex.plexno = PLEXNO(bp->b_dev); /* point to the plex */
rq->isplex = 1; /* note that it's a plex */
maxplex = 1; /* just the one plex */
}
if (bp->b_flags & B_READ) {
/* This is a read request. Decide
* which plex to read from.
*
* There's a potential race condition here,
* since we're not locked, and we could end
* up multiply incrementing the round-robin
* counter. This doesn't have any serious
* effects, however. */
if (vol != NULL) {
vol->reads++;
vol->bytes_read += bp->b_bcount;
plexno = vol->preferred_plex; /* get the plex to use */
if (plexno < 0) { /* round robin */
plexno = vol->last_plex_read;
vol->last_plex_read++;
if (vol->last_plex_read == vol->plexes) /* got the the end? */
vol->last_plex_read = 0; /* wrap around */
}
status = build_read_request(rq, plexno); /* build a request */
} else {
daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */
status = bre(rq, /* build a request list */
rq->volplex.plexno,
&diskaddr,
diskaddr + (bp->b_bcount / DEV_BSIZE));
}
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
if (status == REQUEST_DOWN) { /* not enough subdisks */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
}
biodone(bp);
freerq(rq);
return -1;
}
return launch_requests(rq, reviveok); /* now start the requests if we can */
} else
/* This is a write operation. We write to all
* plexes. If this is a RAID 5 plex, we must also
* update the parity stripe. */
{
if (vol != NULL) {
vol->writes++;
vol->bytes_written += bp->b_bcount;
status = build_write_request(rq); /* Not all the subdisks are up */
} else { /* plex I/O */
daddr_t diskstart;
diskstart = bp->b_blkno; /* start offset of transfer */
status = bre(rq,
PLEXNO(bp->b_dev),
&diskstart,
bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */
}
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
if (status == REQUEST_DOWN) { /* not enough subdisks */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
}
if ((bp->b_flags & B_DONE) == 0)
biodone(bp);
freerq(rq);
return -1;
}
return launch_requests(rq, reviveok); /* start the requests */
}
}
/* Call the low-level strategy routines to
* perform the requests in a struct request */
int
launch_requests(struct request *rq, int reviveok)
{
struct rqgroup *rqg;
int rqno; /* loop index */
struct rqelement *rqe; /* current element */
int s;
/* First find out whether we're reviving, and the
* request contains a conflict. If so, we hang
* the request off plex->waitlist of the first
* plex we find which is reviving */
if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */
&&(!reviveok)) { /* and we don't want to do it now, */
struct volume *vol = &VOL[VOLNO(rq->bp->b_dev)];
struct plex *plex;
int plexno;
for (plexno = 0; plexno < vol->plexes; plexno++) { /* find the reviving plex */
plex = &PLEX[vol->plex[plexno]];
if (plex->state == plex_reviving) /* found it */
break;
}
if (plexno < vol->plexes) { /* found it? */
struct request *waitlist = plex->waitlist; /* point to the waiting list */
while (waitlist->next != NULL) /* find the end */
waitlist = waitlist->next;
waitlist->next = rq; /* hook our request there */
return 0; /* and get out of here */
} else /* bad vinum, bad */
printf("vinum: can't find reviving plex for volume %s\n", vol->name);
}
rq->active = 0; /* nothing yet */
/* XXX This is probably due to a bug */
if (rq->rqg == NULL) { /* no request */
abortrequest(rq, EINVAL);
return -1;
}
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf("Request: %x\nWrite dev 0x%x, offset 0x%x, length %ld\n",
(u_int) rq,
rq->bp->b_dev,
rq->bp->b_blkno,
rq->bp->b_bcount); /* XXX */
vinum_conf.lastrq = (int) rq;
vinum_conf.lastbuf = rq->bp;
#endif
for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */
rqg->active = rqg->count; /* they're all active */
rq->active++; /* one more active request group */
for (rqno = 0; rqno < rqg->count; rqno++) {
rqe = &rqg->rqe[rqno];
if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */
rqg->active--; /* one less active request */
else {
struct drive *drive = &DRIVE[rqe->driveno]; /* drive to access */
if ((rqe->b.b_flags & B_READ) == 0)
rqe->b.b_vp->v_numoutput++; /* one more output going */
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
rqe->b.b_flags & B_READ ? "Read" : "Write",
rqe->b.b_dev,
rqe->sdno,
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
rqe->b.b_blkno,
rqe->b.b_bcount); /* XXX */
if (debug & DEBUG_NUMOUTPUT)
printf(" vinumstart sd %d numoutput %ld\n",
rqe->sdno,
rqe->b.b_vp->v_numoutput);
#endif
/* fire off the request */
s = splbio();
(*bdevsw[major(rqe->b.b_dev)]->d_strategy) (&rqe->b);
splx(s);
}
/* XXX Do we need caching? Think about this more */
}
}
return 0;
}
/* define the low-level requests needed to perform a
* high-level I/O operation for a specific plex 'plexno'.
*
* Return 0 if all subdisks involved in the request are up, 1 if some
* subdisks are not up, and -1 if the request is at least partially
* outside the bounds of the subdisks.
*
* Modify the pointer *diskstart to point to the end address. On
* read, return on the first bad subdisk, so that the caller
* (build_read_request) can try alternatives.
*
* On entry to this routine, the rqg structures are not assigned. The
* assignment is performed by expandrq(). Strictly speaking, the
* elements rqe->sdno of all entries should be set to -1, since 0
* (from bzero) is a valid subdisk number. We avoid this problem by
* initializing the ones we use, and not looking at the others (index
* >= rqg->requests).
*/
enum requeststatus
bre(struct request *rq,
int plexno,
daddr_t * diskaddr,
daddr_t diskend)
{
BROKEN_GDB;
int sdno;
struct sd *sd;
struct rqgroup *rqg;
struct buf *bp; /* user's bp */
struct plex *plex;
enum requeststatus status; /* return value */
daddr_t plexoffset; /* offset of transfer in plex */
daddr_t stripebase; /* base address of stripe (1st subdisk) */
daddr_t stripeoffset; /* offset in stripe */
daddr_t blockoffset; /* offset in stripe on subdisk */
struct rqelement *rqe; /* point to this request information */
daddr_t diskstart = *diskaddr; /* remember where this transfer starts */
bp = rq->bp; /* buffer pointer */
status = REQUEST_OK; /* return value: OK until proven otherwise */
plex = &PLEX[plexno]; /* point to the plex */
switch (plex->organization) {
case plex_concat:
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
if ((*diskaddr < (sd->plexoffset + sd->sectors)) /* The request starts before the end of this */
&&(diskend > sd->plexoffset)) { /* subdisk and ends after the start of this sd */
if ((sd->state != sd_up) || (plex->state != plex_up)) {
enum requeststatus s;
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
if (s) /* give up? */
return s; /* yup */
}
rqg = allocrqg(rq, 1); /* space for the request */
if (rqg == NULL) { /* malloc failed */
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM;
}
rqg->plexno = plexno;
rqe = &rqg->rqe[0]; /* point to the element */
rqe->rqg = rqg; /* group */
rqe->sdno = sd->sdno; /* put in the subdisk number */
plexoffset = max(sd->plexoffset, *diskaddr); /* start offset in plex */
rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */
rqe->dataoffset = 0;
rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */
sd->sectors - rqe->sdoffset);
rqe->groupoffset = 0; /* no groups for concatenated plexes */
rqe->grouplen = 0;
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
rqe->flags = 0;
rqe->driveno = sd->driveno;
*diskaddr += rqe->datalen; /* bump the address */
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
deallocrqg(rqg);
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM; /* can't do it */
}
}
if (*diskaddr > diskend) /* we're finished, */
break; /* get out of here */
}
break;
case plex_striped:
{
while (*diskaddr < diskend) { /* until we get it all sorted out */
/* The offset of the start address from
* the start of the stripe */
stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
/* The plex-relative address of the
* start of the stripe */
stripebase = *diskaddr - stripeoffset;
/* The number of the subdisk in which
* the start is located */
sdno = stripeoffset / plex->stripesize;
/* The offset from the beginning of the stripe
* on this subdisk */
blockoffset = stripeoffset % plex->stripesize;
sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */
if ((sd->state != sd_up) || (plex->state != plex_up)) {
enum requeststatus s;
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
if (s) /* give up? */
return s; /* yup */
}
rqg = allocrqg(rq, 1); /* space for the request */
if (rqg == NULL) { /* malloc failed */
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM;
}
rqg->plexno = plexno;
rqe = &rqg->rqe[0]; /* point to the element */
rqe->rqg = rqg;
rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */
rqe->dataoffset = 0;
rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */
plex->stripesize - blockoffset); /* and the amount left in this stripe */
rqe->groupoffset = 0; /* no groups for striped plexes */
rqe->grouplen = 0;
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
rqe->flags = 0;
rqe->sdno = sd->sdno; /* put in the subdisk number */
rqe->driveno = sd->driveno;
if (rqe->sdoffset >= sd->sectors) { /* starts beyond the end of the subdisk? */
deallocrqg(rqg);
return REQUEST_EOF;
} else if (rqe->sdoffset + rqe->datalen > sd->sectors) /* ends beyond the end of the subdisk? */
rqe->datalen = sd->sectors - rqe->sdoffset; /* yes, truncate */
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
deallocrqg(rqg);
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM; /* can't do it */
}
*diskaddr += rqe->datalen; /* look at the remainder */
if (*diskaddr < diskend) { /* didn't finish the request on this stripe */
plex->multiblock++; /* count another one */
if (sdno == plex->subdisks - 1) /* last subdisk, */
plex->multistripe++; /* another stripe as well */
}
}
}
break;
default:
printf("vinum: invalid plex type in bre");
}
return status;
}
/* Build up a request structure for reading volumes.
* This function is not needed for plex reads, since there's
* no recovery if a plex read can't be satisified. */
enum requeststatus
build_read_request(struct request *rq, /* request */
int plexindex)
{ /* index in the volume's plex table */
BROKEN_GDB;
struct buf *bp;
daddr_t startaddr; /* offset of previous part of transfer */
daddr_t diskaddr; /* offset of current part of transfer */
daddr_t diskend; /* and end offset of transfer */
int plexno; /* plex index in vinum_conf */
struct rqgroup *rqg; /* point to the request we're working on */
struct volume *vol; /* volume in question */
off_t oldstart; /* note where we started */
int recovered = 0; /* set if we recover a read */
enum requeststatus status = REQUEST_OK;
bp = rq->bp; /* buffer pointer */
diskaddr = bp->b_blkno; /* start offset of transfer */
diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */
rqg = &rq->rqg[plexindex]; /* plex request */
vol = &VOL[rq->volplex.volno]; /* point to volume */
while (diskaddr < diskend) { /* build up request components */
startaddr = diskaddr;
status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
switch (status) {
case REQUEST_OK:
continue;
case REQUEST_RECOVERED:
recovered = 1;
break;
case REQUEST_EOF:
case REQUEST_ENOMEM:
return status;
/* if we get here, we have either had a failure or
* a RAID 5 recovery. We don't want to use the
* recovery, because it's expensive, so first we
* check if we have alternatives */
case REQUEST_DOWN: /* can't access the plex */
if (vol != NULL) { /* and this is volume I/O */
/* Try to satisfy the request
* from another plex */
for (plexno = 0; plexno < vol->plexes; plexno++) {
diskaddr = startaddr; /* start at the beginning again */
oldstart = startaddr; /* and note where that was */
if (plexno != plexindex) { /* don't try this plex again */
bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
if (diskaddr > oldstart) { /* we satisfied another part */
recovered = 1; /* we recovered from the problem */
status = REQUEST_OK; /* don't complain about it */
break;
}
}
if (plexno == (vol->plexes - 1)) /* couldn't satisfy the request */
return REQUEST_DOWN; /* failed */
}
} else
return REQUEST_DOWN; /* bad luck */
}
if (recovered)
vol->recovered_reads += recovered; /* adjust our recovery count */
}
return status;
}
/* Build up a request structure for writes.
* Return 0 if all subdisks involved in the request are up, 1 if some
* subdisks are not up, and -1 if the request is at least partially
* outside the bounds of the subdisks. */
enum requeststatus
build_write_request(struct request *rq)
{ /* request */
BROKEN_GDB;
struct buf *bp;
daddr_t diskstart; /* offset of current part of transfer */
daddr_t diskend; /* and end offset of transfer */
int plexno; /* plex index in vinum_conf */
struct volume *vol; /* volume in question */
enum requeststatus status;
bp = rq->bp; /* buffer pointer */
vol = &VOL[rq->volplex.volno]; /* point to volume */
diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */
status = REQUEST_OK;
for (plexno = 0; plexno < vol->plexes; plexno++) {
diskstart = bp->b_blkno; /* start offset of transfer */
status = min(status, bre(rq, /* build requests for the plex */
vol->plex[plexno],
&diskstart,
diskend));
}
return status;
}
/* Fill in the struct buf part of a request element. */
enum requeststatus
build_rq_buffer(struct rqelement *rqe, struct plex *plex)
{
BROKEN_GDB;
struct sd *sd; /* point to subdisk */
struct volume *vol;
struct buf *bp;
struct buf *ubp; /* user (high level) buffer header */
vol = &VOL[rqe->rqg->rq->volplex.volno];
sd = &SD[rqe->sdno]; /* point to subdisk */
bp = &rqe->b;
ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */
/* Initialize the buf struct */
bzero(&rqe->b, sizeof(struct buf));
bp->b_proc = ubp->b_proc; /* process pointer */
bp->b_flags = ubp->b_flags & (B_NOCACHE | B_READ | B_ASYNC); /* copy these flags from user bp */
bp->b_flags |= B_CALL | B_BUSY; /* inform us when it's done */
if (plex->state == plex_reviving)
bp->b_flags |= B_ORDERED; /* keep request order if we're reviving */
bp->b_iodone = complete_rqe; /* by calling us here */
bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */
bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */
bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */
bp->b_resid = bp->b_bcount; /* and it's still all waiting */
bp->b_bufsize = bp->b_bcount; /* and buffer size */
bp->b_vp = DRIVE[rqe->driveno].vp; /* drive vnode */
bp->b_rcred = FSCRED; /* we have the file system credentials */
bp->b_wcred = FSCRED; /* we have the file system credentials */
if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */
bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */
if (bp->b_data == NULL) { /* failed */
Debugger("XXX");
abortrequest(rqe->rqg->rq, ENOMEM);
return REQUEST_ENOMEM; /* no memory */
}
} else
/* Point directly to user buffer data. This means
* that we don't need to do anything when we have
* finished the transfer */
bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
return 0;
}
/* Abort a request: free resources and complete the
* user request with the specified error */
int
abortrequest(struct request *rq, int error)
{
struct buf *bp = rq->bp; /* user buffer */
bp->b_flags |= B_ERROR;
bp->b_error = error;
freerq(rq); /* free everything we're doing */
biodone(bp);
return error; /* and give up */
}
/* Check that our transfer will cover the
* complete address space of the user request.
*
* Return 1 if it can, otherwise 0 */
int
check_range_covered(struct request *rq)
{
/* XXX */
return 1;
}
/* Perform I/O on a subdisk */
void
sdio(struct buf *bp)
{
int s; /* spl */
struct sd *sd;
struct sdbuf *sbp;
daddr_t endoffset;
struct drive *drive;
sd = &SD[SDNO(bp->b_dev)]; /* point to the subdisk */
drive = &DRIVE[sd->driveno];
if (drive->state != drive_up) { /* XXX until we get the states fixed */
set_sd_state(SDNO(bp->b_dev), sd_obsolete, setstate_force);
bp->b_flags |= B_ERROR;
bp->b_error = EIO;
biodone(bp);
return;
}
/* XXX decide which states we will really accept here. up
* implies it could be involved with a plex, in which
* case we don't want to dick with it */
if ((sd->state != sd_up)
&& (sd->state != sd_initializing)
&& (sd->state != sd_reborn)) { /* we can't access it */
bp->b_flags |= B_ERROR;
bp->b_flags = EIO;
if (bp->b_flags & B_BUSY) /* XXX why isn't this always the case? */
biodone(bp);
return;
}
/* Get a buffer */
sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
if (sbp == NULL) {
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return;
}
bcopy(bp, &sbp->b, sizeof(struct buf)); /* start with the user's buffer */
sbp->b.b_flags |= B_CALL; /* tell us when it's done */
sbp->b.b_iodone = sdio_done; /* here */
sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */
sbp->b.b_vp = DRIVE[sd->driveno].vp; /* vnode */
sbp->b.b_blkno += sd->driveoffset;
sbp->bp = bp; /* note the address of the original header */
sbp->sdno = sd->sdno; /* note for statistics */
sbp->driveno = sd->driveno;
endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */
if (endoffset > sd->sectors) { /* beyond the end */
sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
if (sbp->b.b_bcount <= 0) { /* nothing to transfer */
bp->b_resid = bp->b_bcount; /* nothing transferred */
/* XXX Grrr. This doesn't seem to work. Return
* an error after all */
bp->b_flags |= B_ERROR;
bp->b_error = ENOSPC;
biodone(bp);
Free(sbp);
return;
}
}
if ((sbp->b.b_flags & B_READ) == 0) /* write */
sbp->b.b_vp->v_numoutput++; /* one more output going */
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
sbp->b.b_flags & B_READ ? "Read" : "Write",
sbp->b.b_dev,
sbp->sdno,
(u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
(int) sbp->b.b_blkno,
sbp->b.b_bcount); /* XXX */
if (debug & DEBUG_NUMOUTPUT)
printf(" vinumstart sd %d numoutput %ld\n",
sbp->sdno,
sbp->b.b_vp->v_numoutput);
#endif
s = splbio();
(*bdevsw[major(sbp->b.b_dev)]->d_strategy) (&sbp->b);
splx(s);
}
/* Simplified version of bounds_check_with_label
* Determine the size of the transfer, and make sure it is
* within the boundaries of the partition. Adjust transfer
* if needed, and signal errors or early completion.
*
* Volumes are simpler than disk slices: they only contain
* one component (though we call them a, b and c to make
* system utilities happy), and they always take up the
* complete space of the "partition".
*
* I'm still not happy with this: why should the label be
* protected? If it weren't so damned difficult to write
* one in the first pleace (because it's protected), it wouldn't
* be a problem.
*/
int
vinum_bounds_check(struct buf *bp, struct volume *vol)
{
int maxsize = vol->size; /* size of the partition (sectors) */
int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
/* Would this transfer overwrite the disk label? */
if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */
#if LABELSECTOR != 0
&& bp->b_blkno + size > LABELSECTOR /* and finishes after */
#endif
&& (!(vol->flags & VF_RAW)) /* and it's not raw */
&&major(bp->b_dev) == BDEV_MAJOR /* and it's the block device */
&& (bp->b_flags & B_READ) == 0 /* and it's a write */
&& (!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */
bp->b_error = EROFS; /* read-only */
bp->b_flags |= B_ERROR;
return -1;
}
if (size == 0) /* no transfer specified, */
return 0; /* treat as EOF */
/* beyond partition? */
if (bp->b_blkno < 0 /* negative start */
|| bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */
/* if exactly at end of disk, return an EOF */
if (bp->b_blkno == maxsize) {
bp->b_resid = bp->b_bcount;
return 0;
}
/* or truncate if part of it fits */
size = maxsize - bp->b_blkno;
if (size <= 0) { /* nothing to transfer */
bp->b_error = EINVAL;
bp->b_flags |= B_ERROR;
return -1;
}
bp->b_bcount = size << DEV_BSHIFT;
}
bp->b_pblkno = bp->b_blkno;
return 1;
}
/* Allocate a request group and hook
* it in in the list for rq */
struct rqgroup *
allocrqg(struct request *rq, int elements)
{
struct rqgroup *rqg; /* the one we're going to allocate */
int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
rqg = (struct rqgroup *) Malloc(size);
if (rqg != NULL) { /* malloc OK, */
if (rq->rqg) /* we already have requests */
rq->lrqg->next = rqg; /* hang it off the end */
else /* first request */
rq->rqg = rqg; /* at the start */
rq->lrqg = rqg; /* this one is the last in the list */
bzero(rqg, size); /* no old junk */
rqg->rq = rq; /* point back to the parent request */
rqg->count = elements; /* number of requests in the group */
} else
Debugger("XXX");
return rqg;
}
/* Deallocate a request group out of a chain. We do
* this by linear search: the chain is short, this
* almost never happens, and currently it can only
* happen to the first member of the chain. */
void
deallocrqg(struct rqgroup *rqg)
{
struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */
if (rqg->rq->rqg == rqg) /* we're first in line */
rqg->rq->rqg = rqg->next; /* unhook ourselves */
else {
while (rqgc->next != rqg) /* find the group */
rqgc = rqgc->next;
rqgc->next = rqg->next;
}
Free(rqgc);
}
/* Character device interface */
int
vinumread(dev_t dev, struct uio *uio, int ioflag)
{
return (physio(vinumstrategy, NULL, dev, 1, minphys, uio));
}
int
vinumwrite(dev_t dev, struct uio *uio, int ioflag)
{
return (physio(vinumstrategy, NULL, dev, 0, minphys, uio));
}

159
lkm/vinum/request.h Normal file
View File

@ -0,0 +1,159 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: request.h,v 1.10 1998/08/03 07:15:26 grog Exp grog $
*/
/* Information needed to set up a transfer */
/* struct buf is surprisingly big (about 300
* bytes), and it's part of the request, so this
* value is really important. Most requests
* don't need more than 2 subrequests per
* plex. The table is automatically extended if
* this value is too small. */
#define RQELTS 2 /* default of 2 requests per transfer */
enum xferinfo {
XFR_NORMAL_READ = 1,
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
XFR_RECOVERY_READ = 4,
XFR_DEGRADED_WRITE = 8,
XFR_PARITYLESS_WRITE = 0x10,
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
XFR_DATA_BLOCK = 0x40, /* data block in request */
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
XFR_MALLOCED = 0x200, /* this buffer is malloced */
#if DEBUG
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
#endif
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
/* operations that need a parity block */
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
/* operations that use the group parameters */
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
/* operations that that use the data parameters */
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
/* operations requiring read before write */
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
/* operations that need a malloced buffer */
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
};
/* Describe one low-level request, part
* of a high-level request. This is an
* extended struct buf buffer, and the first
* element *must* be a struct buf. We pass this structure
* to the I/O routines instead of a struct buf in oder
* to be able to locate the high-level request when it
* completes.
*
* All offsets and lengths are in "blocks", i.e. sectors */
struct rqelement {
struct buf b; /* buf structure */
struct rqgroup *rqg; /* pointer to our group */
/* Information about the transfer */
daddr_t sdoffset; /* offset in subdisk */
int useroffset; /* offset in user buffer of normal data */
/* dataoffset and datalen refer to "individual"
* data transfers (normal read, parityless write)
* and also degraded write.
*
* groupoffset and grouplen refer to the other
* "group" operations (normal write, recovery read)
* Both the offsets are relative to the start of the
* local buffer */
int dataoffset; /* offset in buffer of the normal data */
int groupoffset; /* offset in buffer of group data */
short datalen; /* length of normal data (sectors) */
short grouplen; /* length of group data (sectors) */
short buflen; /* total buffer length to allocate */
short flags; /* really enum xferinfo (see above) */
/* Ways to find other components */
short sdno; /* subdisk number */
short driveno; /* drive number */
};
/* A group of requests built to satisfy a certain
* component of a user request */
struct rqgroup {
struct rqgroup *next; /* pointer to next group */
struct request *rq; /* pointer to the request */
short count; /* number of requests in this group */
short active; /* and number active */
short plexno; /* index of plex */
int badsdno; /* index of bad subdisk or -1 */
enum xferinfo flags; /* description of transfer */
struct rqelement rqe[0]; /* and the elements of this request */
};
/* Describe one high-level request and the
* work we have to do to satisfy it */
struct request {
struct buf *bp; /* pointer to the high-level request */
int flags;
union {
int volno; /* volume index */
int plexno; /* or plex index */
} volplex;
int error; /* current error indication */
short isplex; /* set if this is a plex request */
short active; /* number of subrequests still active */
struct rqgroup *rqg; /* pointer to the first group of requests */
struct rqgroup *lrqg; /* and to the first group of requests */
struct request *next; /* link of waiting requests */
};
/* Extended buffer header for subdisk I/O. Includes
* a pointer to the user I/O request. */
struct sdbuf {
struct buf b; /* our buffer */
struct buf *bp; /* and pointer to parent */
short driveno; /* drive index */
short sdno; /* and subdisk index */
};
/* Values returned by rqe and friends.
* Be careful with these: they are in order of increasing
* seriousness. Some routines check for > REQUEST_RECOVERED
* to indicate a completely failed request. */
enum requeststatus {
REQUEST_OK, /* request built OK */
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
REQUEST_EOF, /* request failed: outside plex */
REQUEST_DOWN, /* request failed: subdisk down */
REQUEST_ENOMEM /* ran out of memory */
};

128
lkm/vinum/revive.c Normal file
View File

@ -0,0 +1,128 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: revive.c,v 1.1 1998/08/14 06:16:59 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
/* revive a block of a plex. Return an error
* indication. EAGAIN means successful copy, but
* that more blocks remain to be copied.
* XXX We should specify a block size here. At the moment,
* just take a default value. FIXME */
int
revive_block(int plexno)
{
struct plex *plex = &PLEX[plexno];
struct buf *bp;
int error = EAGAIN;
int size;
int s; /* priority level */
if (plex->revive_blocksize == 0) {
if (plex->stripesize != 0) /* we're striped, don't revive more than */
plex->revive_blocksize = min(DEFAULT_REVIVE_BLOCKSIZE, plex->stripesize); /* one block at a time */
else
plex->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
}
size = min(plex->revive_blocksize, plex->length - plex->revived) << DEV_BSHIFT;
s = splbio();
/* Get a buffer */
bp = geteblk(size);
if (bp == NULL) {
splx(s);
return ENOMEM;
}
if (bp->b_qindex != 0) /* on a queue, */
bremfree(bp); /* remove it */
splx(s);
/* Amount to transfer: block size, unless it
* would overlap the end */
bp->b_bufsize = size;
bp->b_bcount = bp->b_bufsize;
bp->b_resid = 0x0;
bp->b_blkno = plex->revived; /* we've got this far */
/* XXX what about reviving anonymous plexes? */
/* First, read the data from the volume. We don't
* care which plex, that's bre's job */
bp->b_dev = VINUMBDEV(plex->volno, 0, 0, VINUM_VOLUME_TYPE); /* create the device number */
bp->b_flags = B_BUSY | B_READ;
vinumstart(bp, 1);
biowait(bp);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else
/* Now write to the plex */
{
s = splbio();
if (bp->b_qindex != 0) /* on a queue, */
bremfree(bp); /* remove it */
splx(s);
bp->b_dev = VINUMBDEV(plex->volno, plex->volplexno, 0, VINUM_PLEX_TYPE); /* create the device number */
bp->b_flags = B_BUSY; /* make this a write */
bp->b_resid = 0x0;
vinumstart(bp, 1);
biowait(bp);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else {
plex->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
if (plex->revived >= plex->length) { /* finished */
plex->revived = 0;
plex->state = plex_up; /* do we need to do more? */
if (plex->volno >= 0) /* we have a volume, */
set_volume_state(plex->volno, volume_up, 0);
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
save_config(); /* and save the updated configuration */
error = 0; /* we're done */
}
}
while (plex->waitlist) { /* we have waiting requests */
launch_requests(plex->waitlist, 1); /* do them now */
plex->waitlist = plex->waitlist->next; /* and move on to the next */
}
}
if (bp->b_qindex == 0) /* not on a queue, */
brelse(bp); /* is this kosher? */
return error;
}

755
lkm/vinum/state.c Normal file
View File

@ -0,0 +1,755 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: state.c,v 2.6 1998/08/19 08:04:47 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
/* Update drive state */
/* Return 1 if the state changes, otherwise 0 */
int
set_drive_state(int driveno, enum drivestate state, int flags)
{
struct drive *drive = &DRIVE[driveno];
int oldstate = drive->state;
int sdno;
if (drive->state == drive_unallocated) /* no drive to do anything with, */
return 0;
if (state != oldstate) { /* don't change it if it's not different */
if (state == drive_down) { /* the drive's going down */
if (flags || (drive->opencount == 0)) { /* we can do it */
close_drive(drive);
drive->state = state;
printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
} else
return 0; /* don't do it */
}
drive->state = state; /* set the state */
if (((drive->state == drive_up)
|| ((drive->state == drive_coming_up)))
&& (drive->vp == NULL)) /* should be open, but we're not */
init_drive(drive); /* which changes the state again */
if ((state != oldstate) /* state has changed */
&&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
if (SD[sdno].driveno == driveno) /* belongs to this drive */
set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
}
save_config(); /* and save the updated configuration */
return 1;
}
}
return 0;
}
/* Try to set the subdisk state. Return 1 if state changed to
* what we wanted, -1 if it changed to something else, and 0
* if no change.
*
* This routine is called both from the user (up, down states
* only) and internally.
*/
int
set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
{
struct sd *sd = &SD[sdno];
int oldstate = sd->state;
int status = 1; /* status to return */
if (state == oldstate)
return 0; /* no change */
if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
return 0;
if (sd->driveoffset < 0) { /* not allocated space */
sd->state = sd_down;
if (state != sd_down)
return -1;
} else { /* space allocated */
switch (state) {
case sd_down:
if ((!flags & setstate_force) /* but gently */
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
return 0; /* don't do it */
break;
case sd_up:
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
return 0; /* not even by force */
switch (sd->state) {
case sd_obsolete:
case sd_down: /* been down, no data lost */
if ((sd->plexno) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
break;
/* XXX Get this right: make sure that other plexes in
* the volume cover this address space, otherwise
* we make this one sd_up */
sd->state = sd_reborn; /* here it is again */
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
status = -1;
break;
case sd_init: /* brand new */
if (flags & setstate_configuring) /* we're doing this while configuring */
break;
sd->state = sd_empty; /* nothing in it */
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
status = -1;
break;
case sd_initializing:
break; /* go on and do it */
case sd_empty:
if ((sd->plexno) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
break;
return 0; /* can't do it */
default: /* can't do it */
/* There's no way to bring subdisks up directly from
* other states. First they need to be initialized
* or revived */
return 0;
}
break;
default: /* other ones, only internal with force */
if (flags & setstate_force == 0) /* no force? What's this? */
return 0; /* don't do it */
}
}
sd->state = state;
printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
if ((flags & setstate_norecurse) == 0)
set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return status;
}
/* Called from request routines when they find
* a subdisk which is not kosher. Decide whether
* it warrants changing the state. Return
* REQUEST_DOWN if we can't use the subdisk,
* REQUEST_OK if we can. */
enum requeststatus
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
{
struct plex *plex = &PLEX[sd->plexno];
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
/* first, see if the plex wants to be accessed */
switch (plex->state) {
case plex_reviving:
/* When writing, we'll write anything that starts
* up to the current revive pointer, but we'll
* only accept a read which finishes before the
* current revive pointer.
*/
if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
if (writeop) { /* writing to a consistent down disk */
if (DRIVE[sd->driveno].state == drive_up)
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
else
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
}
return REQUEST_DOWN; /* that part of the plex is still down */
} else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
/* FALLTHROUGH */
case plex_up:
case plex_degraded:
case plex_flaky:
/* We can access the plex: let's see
* how the subdisk feels */
switch (sd->state) {
case sd_up:
return REQUEST_OK;
case sd_reborn:
if (writeop)
return REQUEST_OK; /* always write to a reborn disk */
/* Handle the mapping. We don't want to reject
* a read request to a reborn subdisk if that's
* all we have. XXX */
return REQUEST_DOWN;
case sd_down:
case sd_crashed:
if (writeop) { /* writing to a consistent down disk */
if (DRIVE[sd->driveno].state == drive_up)
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
else
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
}
return REQUEST_DOWN; /* and it's down one way or another */
default:
return REQUEST_DOWN;
}
default:
return REQUEST_DOWN;
}
}
void
add_defective_region(struct plex *plex, off_t offset, size_t length)
{
/* XXX get this ordered, and coalesce regions if necessary */
if (++plex->defective_regions > plex->defective_region_count)
EXPAND(plex->defective_region,
struct plexregion,
plex->defective_region_count,
PLEX_REGION_TABLE_SIZE);
plex->defective_region[plex->defective_regions - 1].offset = offset;
plex->defective_region[plex->defective_regions - 1].length = length;
}
void
add_unmapped_region(struct plex *plex, off_t offset, size_t length)
{
if (++plex->unmapped_regions > plex->unmapped_region_count)
EXPAND(plex->unmapped_region,
struct plexregion,
plex->unmapped_region_count,
PLEX_REGION_TABLE_SIZE);
plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
plex->unmapped_region[plex->unmapped_regions - 1].length = length;
}
/* Rebuild a plex free list and set state if
* we have a configuration error */
void
rebuild_plex_unmappedlist(struct plex *plex)
{
int sdno;
struct sd *sd;
int lastsdend = 0; /* end offset of last subdisk */
if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
Free(plex->unmapped_region);
plex->unmapped_region = NULL;
plex->unmapped_regions = 0;
plex->unmapped_region_count = 0;
}
if (plex->defective_region != NULL) {
Free(plex->defective_region);
plex->defective_region = NULL;
plex->defective_regions = 0;
plex->defective_region_count = 0;
}
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
if (sd->plexoffset < lastsdend) { /* overlap */
printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
} else if (sd->plexoffset > lastsdend) /* gap */
add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
else if (sd->state < sd_reborn) /* this part defective */
add_defective_region(plex, sd->plexoffset, sd->sectors);
lastsdend = sd->plexoffset + sd->sectors;
}
}
/* return a state map for the subdisks of a plex */
enum sdstates
sdstatemap(struct plex *plex, int *sddowncount)
{
int sdno;
enum sdstates statemap = 0; /* note the states we find */
*sddowncount = 0; /* no subdisks down yet */
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
switch (sd->state) {
case sd_empty:
statemap |= sd_emptystate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_init:
statemap |= sd_initstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_down:
statemap |= sd_downstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_crashed:
statemap |= sd_crashedstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_obsolete:
statemap |= sd_obsolete;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_stale:
statemap |= sd_stalestate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_reborn:
statemap |= sd_rebornstate;
break;
case sd_up:
statemap |= sd_upstate;
break;
default:
statemap |= sd_otherstate;
break;
}
}
return statemap;
}
/* determine the state of the volume relative to this plex */
enum volplexstate
vpstate(struct plex *plex)
{
struct volume *vol;
enum volplexstate state = volplex_onlyusdown; /* state to return */
int plexno;
if (plex->volno < 0) /* not associated with a volume */
return volplex_onlyusdown; /* assume the worst */
vol = &VOL[plex->volno]; /* point to our volume */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
state |= volplex_onlyus; /* yes */
} else {
if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
state |= volplex_otherup; /* and when they were up, they were up */
else
state |= volplex_alldown; /* and when they were down, they were down */
}
}
return state; /* and when they were only halfway up */
} /* they were neither up nor down */
/* Check if all bits b are set in a */
int allset(int a, int b);
int
allset(int a, int b)
{
return (a & b) == b;
}
/* Update the state of a plex dependent on its subdisks.
* Also rebuild the unmapped_region and defective_region table */
int
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
{
int sddowncount = 0; /* number of down subdisks */
struct plex *plex = &PLEX[plexno]; /* point to our plex */
enum plexstate oldstate = plex->state;
enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
return 0; /* no change */
if (plex->state == plex_unallocated) /* no plex to do anything with, */
return 0;
switch (state) {
case plex_up:
if ((plex->state == plex_initializing) /* we're initializing */
&&(statemap != sd_upstate)) /* but SDs aren't up yet */
return 0; /* do nothing */
/* We don't really care what our state was before
* if we want to come up. We rely entirely on the
* state of our subdisks and our volume */
switch (vps) {
case volplex_onlyusdown:
case volplex_alldown: /* another plex is down, and so are we */
if (statemap == sd_upstate) { /* all subdisks ready for action */
if ((plex->state == plex_init) /* we're brand spanking new */
&&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
/* Conceptually, an empty plex does not contain valid data,
* but normally we'll see this state when we have just
* created a plex, and it's either consistent from earlier,
* or we don't care about the previous contents (we're going
* to create a file system or use it for swap).
*
* We need to do this in one swell foop: on the next call
* we will no longer be just empty.
*
* We'll still come back to this function for the remaining
* plexes in the volume. They'll be up already, so that
* doesn't change anything, but it's not worth the additional
* code to stop doing it. */
struct volume *vol = &VOL[plex->volno];
int plexno;
for (plexno = 0; plexno < vol->plexes; plexno++)
PLEX[vol->plex[plexno]].state = plex_up;
}
plex->state = plex_up; /* bring up up, anyway */
} else
plex->state = plex_down;
break;
case volplex_onlyusup: /* only we are up: others are down */
case volplex_onlyus: /* we're up and alone */
if ((statemap == sd_upstate) /* subdisks all up */
||(statemap == sd_emptystate)) /* or all empty */
plex->state = plex_up; /* go for it */
else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
plex->state = plex_flaky;
else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
plex->state = plex_degraded; /* so far no corruption */
else
plex->state = plex_faulty;
break;
case volplex_otherup: /* another plex is up */
case volplex_otherupdown: /* other plexes are up and down */
if ((statemap == sd_upstate) /* subdisks all up */
||(statemap == sd_emptystate) /* or all empty */
) {
/* Is the data in all subdisks valid? */
if (statemap == statemap & (sd_downstate | sd_rebornstate | sd_upstate))
break; /* yes, we can bring the plex up */
plex->state = plex_reviving; /* we need reviving */
return EAGAIN;
} else
plex->state = plex_faulty; /* still in error */
break;
case volplex_allup: /* all plexes are up */
case volplex_someup:
if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
break; /* no change */
else
plex->state = plex_degraded; /* we're not all there */
}
if (plex->state != oldstate)
break;
return 0; /* no change */
case plex_down: /* want to take it down */
if (((vps == volplex_onlyus) /* we're the only one up */
||(vps == volplex_onlyusup)) /* we're the only one up */
&&(!(flags & setstate_force))) /* and we don't want to use force */
return 0; /* can't do it */
plex->state = state; /* do it */
break;
/* This is only requested by the driver.
* Trust ourselves */
case plex_faulty:
plex->state = state; /* do it */
break;
case plex_initializing:
/* XXX consider what safeguards we need here */
if ((flags & setstate_force) == 0)
return 0;
plex->state = state; /* do it */
break;
/* What's this? */
default:
return 0;
}
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
/* Now see what we have left, and whether
* we're taking the volume down */
if (plex->volno >= 0) { /* we have a volume */
struct volume *vol = &VOL[plex->volno];
vps = vpstate(plex); /* get our combined state again */
if ((flags & setstate_norecurse) == 0) { /* we can recurse */
if ((vol->state == volume_up)
&& (vps == volplex_alldown)) /* and we're all down */
set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
else if ((vol->state == volume_down)
&& (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
}
}
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
/* Update the state of a plex dependent on its plexes.
* Also rebuild the unmapped_region and defective_region table */
int
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
{
int plexno;
enum plexstates {
plex_downstate = 1, /* found a plex which is down */
plex_degradedstate = 2, /* found a plex which is halfway up */
plex_upstate = 4 /* found a plex which is completely up */
};
int plexstatemap = 0; /* note the states we find */
struct volume *vol = &VOL[volno]; /* point to our volume */
if (vol->state == state) /* we're there already */
return 0; /* no change */
if (vol->state == volume_unallocated) /* no volume to do anything with, */
return 0;
for (plexno = 0; plexno < vol->plexes; plexno++) {
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
switch (plex->state) {
case plex_degraded:
case plex_flaky:
case plex_reviving:
plexstatemap |= plex_degradedstate;
break;
case plex_up:
plexstatemap |= plex_upstate;
break;
default:
plexstatemap |= plex_downstate;
break;
}
}
if (state == volume_up) { /* want to come up */
if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
vol->state = volume_up; /* did it */
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
/* Here we should check whether we have enough
* coverage for the complete volume. Writeme XXX */
} else if (state == volume_down) { /* want to go down */
if ((vol->opencount == 0) /* not open */
||(flags & setstate_force != 0)) { /* or we're forcing */
vol->state = volume_down;
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
}
return 0; /* no change */
}
/* Start an object, in other words do what we can to get it up.
* This is called from vinumioctl (VINUMSTART).
* Return error indications via ioctl_reply
*/
void
start_object(struct vinum_ioctl_msg *data)
{
int status;
int realstatus; /* what we really have */
int objindex = data->index; /* data gets overwritten */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
switch (data->type) {
case drive_object:
status = set_drive_state(objindex, drive_up, setstate_none);
realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
break;
case sd_object:
status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
break;
case plex_object:
if (PLEX[objindex].state == plex_reviving) { /* reviving, */
ioctl_reply->error = revive_block(objindex); /* revive another block */
ioctl_reply->msg[0] = '\0'; /* no comment */
return;
}
status = set_plex_state(objindex, plex_up, setstate_none);
realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
break;
case volume_object:
status = set_volume_state(objindex, volume_up, setstate_none);
realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
break;
default:
ioctl_reply->error = EINVAL;
strcpy(ioctl_reply->msg, "Invalid object type");
return;
}
/* There's no point in saying anything here:
* the userland program does it better */
ioctl_reply->msg[0] = '\0';
if (realstatus == 0) /* couldn't do it */
ioctl_reply->error = EINVAL;
else
ioctl_reply->error = 0;
}
/* Stop an object, in other words do what we can to get it down
* This is called from vinumioctl (VINUMSTOP).
* Return error indications via ioctl_reply.
*/
void
stop_object(struct vinum_ioctl_msg *data)
{
int status = 1;
int objindex = data->index; /* save the number from change */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
switch (data->type) {
case drive_object:
status = set_drive_state(objindex, drive_down, data->force);
break;
case sd_object:
status = set_sd_state(objindex, sd_down, data->force);
break;
case plex_object:
status = set_plex_state(objindex, plex_down, data->force);
break;
case volume_object:
status = set_volume_state(objindex, volume_down, data->force);
break;
default:
ioctl_reply->error = EINVAL;
strcpy(ioctl_reply->msg, "Invalid object type");
return;
}
ioctl_reply->msg[0] = '\0';
if (status == 0) /* couldn't do it */
ioctl_reply->error = EINVAL;
else
ioctl_reply->error = 0;
}
/* VINUM_SETSTATE ioctl: set an object state
* msg is the message passed by the user */
void
setstate(struct vinum_ioctl_msg *msg)
{
int sdno;
struct sd *sd;
struct plex *plex;
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
switch (msg->state) {
case object_down:
stop_object(msg);
break;
case object_initializing:
switch (msg->type) {
case sd_object:
sd = &SD[msg->index];
if ((msg->index >= vinum_conf.subdisks_used)
|| (sd->state == sd_unallocated)) {
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
ioctl_reply->error = EFAULT;
return;
}
set_sd_state(msg->index, sd_initializing, msg->force);
if (sd->state != sd_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
} else
ioctl_reply->error = 0;
break;
case plex_object:
plex = &PLEX[msg->index];
if ((msg->index >= vinum_conf.plexes_used)
|| (plex->state == plex_unallocated)) {
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
ioctl_reply->error = EFAULT;
return;
}
set_plex_state(msg->index, plex_initializing, msg->force);
if (plex->state != plex_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
} else {
ioctl_reply->error = 0;
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
if (sd->state != sd_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
break;
}
}
}
break;
default:
strcpy(ioctl_reply->msg, "Invalid object");
ioctl_reply->error = EINVAL;
}
break;
case object_up:
start_object(msg);
}
}

88
lkm/vinum/statetexts.h Normal file
View File

@ -0,0 +1,88 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
*/
/* Created by ./makestatetext on Tue 4 Aug 15:53:16 CST 1998. Do not edit */
/* Drive state texts */
char *drivestatetext[] =
{
"unallocated",
"uninit",
"down",
"coming_up",
"up",
};
/* Subdisk state texts */
char *sdstatetext[] =
{
"unallocated",
"uninit",
"init",
"initializing",
"empty",
"obsolete",
"stale",
"crashed",
"down",
"reborn",
"up",
};
/* Plex state texts */
char *plexstatetext[] =
{
"unallocated",
"init",
"faulty",
"down",
"reviving",
"initializing",
"corrupt",
"degraded",
"flaky",
"up",
};
/* Volume state texts */
char *volstatetext[] =
{
"unallocated",
"uninit",
"down",
"up",
};

211
lkm/vinum/util.c Normal file
View File

@ -0,0 +1,211 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: util.c,v 1.7 1998/08/07 09:23:10 grog Exp grog $
*/
/* This file contains utility routines used both in kernel and user context */
#include "vinumhdr.h"
#include "statetexts.h"
#ifndef REALLYKERNEL
#include <stdio.h>
extern jmp_buf command_fail; /* return on a failed command */
#endif
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
/* Return drive state as a string */
char *
drive_state(enum drivestate state)
{
if (((unsigned) state) >= STATECOUNT(drive)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return drivestatetext[state];
}
/* Return volume state as a string */
char *
volume_state(enum volumestate state)
{
if (((unsigned) state) >= STATECOUNT(vol)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return volstatetext[state];
}
/* Return plex state as a string */
char *
plex_state(enum plexstate state)
{
if (((unsigned) state) >= STATECOUNT(plex)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return plexstatetext[state];
}
/* Return plex organization as a string */
char *
plex_org(enum plexorg org)
{
switch (org) {
case plex_disorg: /* disorganized */
return "disorg";
break;
case plex_concat: /* concatenated plex */
return "concat";
break;
case plex_striped: /* striped plex */
return "striped";
break;
default:
sprintf(numeric_state, "Invalid org %d", (int) org);
return numeric_state;
}
}
/* Return sd state as a string */
char *
sd_state(enum sdstate state)
{
if (((unsigned) state) >= STATECOUNT(sd)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return sdstatetext[state];
}
/* Now convert in the other direction */
/* These are currently used only internally,
* so we don't do too much error checking */
enum drivestate
DriveState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(drive); i++)
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
return (enum drivestate) i;
return -1;
}
enum sdstate
SdState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(sd); i++)
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
return (enum sdstate) i;
return -1;
}
enum plexstate
PlexState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(plex); i++)
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
return (enum plexstate) i;
return -1;
}
enum volumestate
VolState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(vol); i++)
if (strcmp(text, volstatetext[i]) == 0) /* found it */
return (enum volstate) i;
return -1;
}
/* Take a number with an optional scale factor and convert
* it to a number of bytes.
*
* The scale factors are:
*
* b blocks (of 512 bytes)
* k kilobytes (1024 bytes)
* m megabytes (of 1024 * 1024 bytes)
* g gigabytes (of 1024 * 1024 * 1024 bytes)
*/
u_int64_t
sizespec(char *spec)
{
u_int64_t size;
char *s;
size = 0;
s = spec;
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
size = size * 10 + *s++ - '0'; /* convert it */
switch (*s) {
case '\0':
return size;
case 'B':
case 'b':
return size * 512;
case 'K':
case 'k':
return size * 1024;
case 'M':
case 'm':
return size * 1024 * 1024;
case 'G':
case 'g':
return size * 1024 * 1024 * 1024;
}
}
#ifdef REALLYKERNEL
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
#else
fprintf(stderr, "Invalid length specification: %s", spec);
longjmp(command_fail, -1);
#endif
/* NOTREACHED */
return -1;
}

512
lkm/vinum/vinum.c Normal file
View File

@ -0,0 +1,512 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinum.c,v 1.19 1998/08/13 05:24:02 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#define REALLYKERNEL
#include "vinumhdr.h"
#include "sys/sysproto.h" /* for sync(2) */
#ifdef DEBUG
#include <sys/reboot.h>
int debug = 0;
#endif
/* pointer to ioctl p parameter, to save passing it around */
struct proc *myproc;
#if __FreeBSD__ < 3
STATIC struct cdevsw vinum_cdevsw;
STATIC struct bdevsw vinum_bdevsw =
{
vinumopen, vinumclose, vinumstrategy, vinumioctl,
vinumdump, vinumsize, 0,
"vinum", &vinum_cdevsw, -1
};
#else /* goodbye, bdevsw */
STATIC struct cdevsw vinum_cdevsw =
{
vinumopen, vinumclose, vinumread, vinumwrite,
vinumioctl, nostop, nullreset, nodevtotty,
seltrue, nommap, vinumstrategy, "vinum",
NULL, -1, vinumdump, vinumsize,
D_DISK, 0, -1
};
#endif
/* Called by main() during pseudo-device attachment. */
STATIC void vinumattach(void *);
STATIC void vinumgetdisklabel(dev_t);
void vinum_scandisk(void);
int vinum_inactive(void);
void free_vinum(int);
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
extern jmp_buf command_fail; /* return here if config fails */
struct _vinum_conf vinum_conf; /* configuration information */
STATIC int vinum_devsw_installed = 0;
/*
* Called by main() during pseudo-device attachment. All we need
* to do is allocate enough space for devices to be configured later, and
* add devsw entries.
*/
void
vinumattach(void *dummy)
{
BROKEN_GDB;
char *buf; /* pointer to temporary buffer */
struct _ioctl_reply *ioctl_reply; /* struct to return */
struct uio uio;
struct iovec iovec;
/* modload should prevent multiple loads, so this is worth a panic */
if ((vinum_conf.flags & VF_LOADED) != NULL)
panic("vinum: already loaded");
printf("vinum: loaded\n");
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
/* We don't have a p pointer here, so take it from curproc */
myproc = curproc;
#if __FreeBSD__ < 3
bdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_bdevsw);
#else
cdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_cdevsw);
#endif
#ifdef DEVFS
#error DEVFS not finished yet
#endif
uio.uio_iov = &iovec;
uio.uio_iovcnt = 1; /* just one buffer */
uio.uio_offset = 0; /* start at the beginning */
uio.uio_resid = 512; /* one sector */
uio.uio_segflg = UIO_SYSSPACE; /* we're in system space */
uio.uio_rw = UIO_READ; /* do we need this? */
uio.uio_procp = curproc; /* do it for our own process */
iovec.iov_len = 512;
buf = (char *) Malloc(iovec.iov_len); /* get a buffer */
CHECKALLOC(buf, "vinum: no memory\n"); /* can't get 512 bytes? */
iovec.iov_base = buf; /* read into buf */
/* allocate space: drives... */
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
CHECKALLOC(DRIVE, "vinum: no memory\n");
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
vinum_conf.drives_used = 0; /* and number in use */
/* volumes, ... */
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
CHECKALLOC(VOL, "vinum: no memory\n");
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
vinum_conf.volumes_used = 0; /* and number in use */
/* plexes, ... */
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
CHECKALLOC(PLEX, "vinum: no memory\n");
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
vinum_conf.plexes_used = 0; /* and number in use */
/* and subdisks */
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
CHECKALLOC(SD, "vinum: no memory\n");
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
vinum_conf.subdisks_used = 0; /* and number in use */
ioctl_reply = NULL; /* no reply on longjmp */
}
#ifdef ACTUALLY_LKM_NOT_KERNEL /* stuff for LKMs */
/* Check if we have anything open. If so, return 0 (not inactive),
* otherwise 1 (inactive) */
int
vinum_inactive(void)
{
BROKEN_GDB;
int i;
int can_do = 1; /* assume we can do it */
lock_config();
for (i = 0; i < vinum_conf.volumes_used; i++) {
if (VOL[i].pid != NULL) { /* volume is open */
can_do = 0;
break;
}
}
unlock_config();
return can_do;
}
/* Free all structures.
* If cleardrive is 0, save the configuration; otherwise
* remove the configuration from the drive.
*
* Before coming here, ensure that no volumes are open.
*/
void
free_vinum(int cleardrive)
{
BROKEN_GDB;
int i;
if (cleardrive) {
for (i = 0; i < vinum_conf.drives_used; i++)
remove_drive(i); /* remove the drive */
} else { /* keep the config */
save_config();
if (DRIVE != NULL) {
for (i = 0; i < vinum_conf.drives_used; i++)
free_drive(&DRIVE[i]); /* close files and things */
Free(DRIVE);
}
}
if (SD != NULL)
Free(SD);
if (PLEX != NULL) {
for (i = 0; i < vinum_conf.plexes_used; i++) {
struct plex *plex = &vinum_conf.plex[i];
if (plex->state != plex_unallocated) { /* we have real data there */
if (plex->sdnos)
Free(plex->sdnos);
if (plex->unmapped_regions)
Free(plex->unmapped_region);
if (plex->defective_regions)
Free(plex->defective_region);
}
}
Free(PLEX);
}
if (VOL != NULL)
Free(VOL);
bzero(&vinum_conf, sizeof(vinum_conf));
}
MOD_MISC(vinum);
/*
* Function called when loading the driver.
*/
STATIC int
vinum_load(struct lkm_table *lkmtp, int cmd)
{
BROKEN_GDB;
/* Debugger ("vinum_load"); */
vinumattach(NULL);
return 0; /* OK */
}
/*
* Function called when unloading the driver.
*/
STATIC int
vinum_unload(struct lkm_table *lkmtp, int cmd)
{
BROKEN_GDB;
if (vinum_inactive()) { /* is anything open? */
struct sync_args dummyarg =
{0};
#if __FreeBSD__ < 3
int retval;
#endif
printf("vinum: unloaded\n");
#if __FreeBSD__ < 3
sync(curproc, &dummyarg, &retval); /* write out buffers */
#else
sync(curproc, &dummyarg); /* write out buffers */
#endif
free_vinum(0); /* no: clean up */
#if __FreeBSD__ < 3
bdevsw[BDEV_MAJOR] = NULL; /* clear bdevsw */
#endif
cdevsw[CDEV_MAJOR] = NULL; /* and cdevsw */
return 0;
} else
return EBUSY;
}
/*
* Dispatcher function for the module (load/unload/stat).
*/
int
vinum_mod(struct lkm_table *lkmtp, int cmd, int ver)
{
BROKEN_GDB;
MOD_DISPATCH(vinum, /* module name */
lkmtp, /* LKM table */
cmd, /* command */
ver,
vinum_load, /* load with this function */
vinum_unload, /* and unload with this */
lkm_nullcmd);
}
#else /* not LKM */
#error "This driver must be compiled as a loadable kernel module"
#endif /* LKM */
/* ARGSUSED */
/* Open a vinum object
* At the moment, we only open volumes and the
* super device. It's a nice concept to be
* able to open drives, subdisks and plexes, but
* I can't think what good it could be */
int
vinumopen(dev_t dev,
int flags,
int fmt,
struct proc *p)
{
BROKEN_GDB;
int s; /* spl */
int error;
unsigned int index;
struct volume *vol;
struct plex *plex;
struct sd *sd;
struct devcode *device;
device = (struct devcode *) &dev;
error = 0;
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_VOLUME_TYPE:
index = VOLNO(dev);
if (index >= vinum_conf.volumes_used)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
s = splhigh(); /* quick lock */
if (error)
return error;
if (vol->opencount == 0)
vol->openflags = flags; /* set our flags */
vol->opencount++;
vol->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
if (VOLNO(dev) >= vinum_conf.volumes_used)
return ENXIO;
index = PLEXNO(dev); /* get plex index in vinum_conf */
if (index >= vinum_conf.plexes_used)
return ENXIO; /* no such device */
plex = &PLEX[index];
switch (plex->state) {
case plex_unallocated:
return EINVAL;
default:
s = splhigh();
if (plex->pid /* it's open already */
&& (plex->pid != p->p_pid)) { /* and not by us, */
splx(s);
return EBUSY; /* one at a time, please */
}
plex->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
}
case VINUM_SD_TYPE:
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
return ENXIO; /* no such device */
index = SDNO(dev); /* get the subdisk number */
if (index >= vinum_conf.subdisks_used)
return ENXIO; /* no such device */
sd = &SD[index];
/* Opening a subdisk is always a special operation, so we
* ignore the state as long as it represents a real subdisk */
switch (sd->state) {
case sd_unallocated:
case sd_uninit:
return EINVAL;
default:
s = splhigh();
if (sd->pid /* it's open already */
&& (sd->pid != p->p_pid)) { /* and not by us, */
splx(s);
return EBUSY; /* one at a time, please */
}
sd->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
}
case VINUM_DRIVE_TYPE:
default:
return ENODEV; /* don't know what to do with these */
case VINUM_SUPERDEV_TYPE:
if (p->p_ucred->cr_uid == 0) { /* root calling, */
vinum_conf.opencount++; /* one more opener */
return 0; /* no worries opening super dev */
} else
return EPERM; /* you can't do that! */
}
}
/* ARGSUSED */
int
vinumclose(dev_t dev,
int flags,
int fmt,
struct proc *p)
{
BROKEN_GDB;
unsigned int index;
struct volume *vol;
struct plex *plex;
struct sd *sd;
struct devcode *device = (struct devcode *) &dev;
index = VOLNO(dev);
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_VOLUME_TYPE:
if (index >= vinum_conf.volumes_used)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
vol->opencount = 0; /* reset our flags */
vol->pid = NULL; /* and forget who owned us */
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
if (VOLNO(dev) >= vinum_conf.volumes_used)
return ENXIO;
index = PLEXNO(dev); /* get plex index in vinum_conf */
if (index >= vinum_conf.plexes_used)
return ENXIO; /* no such device */
plex = &PLEX[index];
plex->pid = 0;
return 0;
case VINUM_SD_TYPE:
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
return ENXIO; /* no such device */
index = SDNO(dev); /* get the subdisk number */
if (index >= vinum_conf.subdisks_used)
return ENXIO; /* no such device */
sd = &SD[index];
sd->pid = 0;
return 0;
case VINUM_SUPERDEV_TYPE:
if (p->p_ucred->cr_uid == 0) /* root calling, */
vinum_conf.opencount--; /* one less opener */
return 0; /* no worries closing super dev */
case VINUM_DRIVE_TYPE:
default:
return ENODEV; /* don't know what to do with these */
}
}
/* size routine */
int
vinumsize(dev_t dev)
{
BROKEN_GDB;
struct volume *vol;
int size;
/* XXX This is bogus. We don't need to open
* a device to find its size */
vol = &VOL[VOLNO(dev)];
if (vol->state == volume_up)
size = vol->size;
else
return 0; /* err on the size of conservatism */
return size;
}
int
vinumdump(dev_t dev)
{
/* Not implemented. */
return ENXIO;
}

214
lkm/vinum/vinumext.h Normal file
View File

@ -0,0 +1,214 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumext.h,v 1.14 1998/08/11 00:03:57 grog Exp grog $
*/
/* vinumext.h: external definitions */
extern struct _vinum_conf vinum_conf; /* configuration information */
#ifdef DEBUG
extern debug; /* debug flags */
#endif
#define CHECKALLOC(ptr, msg) \
if (ptr == NULL) \
{ \
printf (msg); \
longjmp (command_fail, -1); \
}
#ifndef KERNEL
struct vnode;
struct proc;
#endif
#ifdef KERNEL
int give_sd_to_plex(int plexno, int sdno);
int give_plex_to_volume(int volno, int plexno);
int check_drive(char *);
enum drive_label_info read_drive_label(struct drive *drive);
int parse_config(char *, struct keywordset *);
int parse_user_config(char *cptr, struct keywordset *keyset);
u_int64_t sizespec(char *spec);
int volume_index(struct volume *volume);
int plex_index(struct plex *plex);
int sd_index(struct sd *sd);
int drive_index(struct drive *drive);
int my_plex(int volno, int plexno);
int my_sd(int plexno, int sdno);
int get_empty_drive(void);
int find_drive(const char *name, int create);
int find_drive_by_dev(const char *devname, int create);
int get_empty_sd(void);
int find_subdisk(const char *name, int create);
void free_sd(int sdno);
void free_volume(int volno);
int get_empty_plex(void);
int find_plex(const char *name, int create);
void free_plex(int plexno);
int get_empty_volume(void);
int find_volume(const char *name, int create);
void config_subdisk(void);
void config_plex(void);
void config_volume(void);
void config_drive(void);
void updateconfig(int);
void update_sd_config(int sdno, int kernelstate);
void update_plex_config(int plexno, int kernelstate);
void update_volume_config(int volno, int kernelstate);
void update_config(void);
void drive_io_done(struct buf *);
int save_config(void);
void write_config(char *, int);
int start_config(void);
void finish_config(int);
void remove(struct vinum_ioctl_msg *msg);
void remove_drive_entry(int driveno, int force, int recurse);
void remove_sd_entry(int sdno, int force, int recurse);
void remove_plex_entry(int plexno, int force, int recurse);
void remove_volume_entry(int volno, int force, int recurse);
void checkernel(char *);
int open_drive(struct drive *, struct proc *);
void close_drive(struct drive *drive);
int driveio(struct drive *, void *, size_t, off_t, int);
/* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
#define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) */
int set_drive_parms(struct drive *drive);
int init_drive(struct drive *);
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
void throw_rude_remark(int, char *,...);
int read_drive(struct drive *drive, void *buf, size_t length, off_t offset);
int write_drive(struct drive *drive, void *buf, size_t length, off_t offset);
void format_config(char *config, int len);
void checkkernel(char *op);
void free_drive(struct drive *drive);
void down_drive(struct drive *drive);
void remove_drive(int driveno);
/* I/O */
d_open_t vinumopen;
d_close_t vinumclose;
d_strategy_t vinumstrategy;
d_ioctl_t vinumioctl;
d_dump_t vinumdump;
d_psize_t vinumsize;
d_read_t vinumread;
d_write_t vinumwrite;
int vinumstart(struct buf *bp, int reviveok);
int launch_requests(struct request *rq, int reviveok);
/* XXX Do we need this? */
int vinumpart(dev_t);
/* Memory allocation */
void vinum_meminfo(caddr_t data);
int vinum_mallocinfo(caddr_t data);
void expand_table(void **, int, int);
void add_defective_region(struct plex *plex, off_t offset, size_t length);
void add_unmapped_region(struct plex *plex, off_t offset, size_t length);
void rebuild_plex_unmappedlist(struct plex *plex);
struct request;
struct rqgroup *allocrqg(struct request *rq, int elements);
void deallocrqg(struct rqgroup *rqg);
/* State transitions */
int set_drive_state(int driveno, enum drivestate state, int force);
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
void get_volume_label(struct volume *vol, struct disklabel *lp);
int write_volume_label(int);
void start_object(struct vinum_ioctl_msg *);
void stop_object(struct vinum_ioctl_msg *);
void setstate(struct vinum_ioctl_msg *msg);
void vinum_label(int);
int vinum_writedisklabel(struct volume *, struct disklabel *);
int initsd(int);
int restart_plex(int plexno);
int revive_block(int plexno);
/* Auxiliary functions */
enum sdstates sdstatemap(struct plex *plex, int *sddowncount);
enum volplexstate vpstate(struct plex *plex);
#endif
enum keyword get_keyword(char *, struct keywordset *);
void listconfig(void);
char *drive_state(enum drivestate);
char *volume_state(enum volumestate);
char *plex_state(enum plexstate);
char *plex_org(enum plexorg);
char *sd_state(enum sdstate);
enum drivestate DriveState(char *text);
enum sdstate SdState(char *text);
enum plexstate PlexState(char *text);
enum volumestate VolState(char *text);
struct drive *validdrive(int driveno, struct _ioctl_reply *);
struct sd *validsd(int sdno, struct _ioctl_reply *);
struct plex *validplex(int plexno, struct _ioctl_reply *);
struct volume *validvol(int volno, struct _ioctl_reply *);
int tokenize(char *, char *[]);
void resetstats(struct vinum_ioctl_msg *msg);
/* Locking */
int lockvol(struct volume *vol);
void unlockvol(struct volume *vol);
int lockplex(struct plex *plex);
void unlockplex(struct plex *plex);
int lockrange(struct plex *plex, off_t first, off_t last);
void unlockrange(struct plex *plex, off_t first, off_t last);
int lock_config(void);
void unlock_config(void);
#ifdef DEBUG
#define expandrq(prq) \
{ \
expand_table ((void **) &prq->rqe, \
prq->requests * sizeof (struct rqelement), \
(prq->requests + RQELTS) * sizeof (struct rqelement) ); \
bzero (&prq->rqe [prq->requests], RQELTS * sizeof (struct rqelement)); \
prq->rqcount += RQELTS; \
}
#else
void expandrq(struct plexrq *);
#endif

104
lkm/vinum/vinumhdr.h Normal file
View File

@ -0,0 +1,104 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
*/
/* Header files used by all modules */
/* $Id: vinumhdr.h,v 1.7 1998/08/07 04:41:18 grog Exp grog $ */
#ifdef KERNEL
#define REALLYKERNEL
#endif
#include <sys/param.h>
#ifdef REALLYKERNEL
#include <sys/systm.h>
#include <sys/kernel.h>
#endif
#ifdef DEVFS
#error "DEVFS code not complete yet"
#include <sys/devfsext.h>
#endif /*DEVFS */
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/dkstat.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/conf.h>
#include <sys/stat.h>
#include <sys/disklabel.h>
#include <ufs/ffs/fs.h>
#include <sys/mount.h>
#include <sys/device.h>
#undef KERNEL /* XXX */
#include <sys/disk.h>
#ifdef REALLYKERNEL
#define KERNEL
#endif
#include <sys/syslog.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/dkbad.h>
#include <setjmp.h>
#include <stdarg.h>
#include <vm/vm.h>
#ifdef USES_VM
/* XXX Do we need this? */
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_prot.h>
/* #include <vm/vm_page.h> */
#include <sys/vmmeter.h>
/* #include <machine/pmap.h> */
#include <machine/cputypes.h>
#endif /* USES_VM */
#include <vinumvar.h>
#include <vinumio.h>
#include "vinumkw.h"
#include "vinumext.h"
#undef Free /* defined in some funny net stuff */
#ifdef REALLYKERNEL
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
caddr_t MMalloc (int size, char *, int);
void FFree (void *mem, char *, int);
#else
#define Malloc(x) malloc ((x)) /* just the size */
#define Free(x) free ((x)) /* just the address */
#endif

132
lkm/vinum/vinumio.h Normal file
View File

@ -0,0 +1,132 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumio.h,v 1.10 1998/08/10 05:46:19 grog Exp grog $
*/
#define MAX_IOCTL_REPLY 256
#define L 'F' /* ID letter of our ioctls */
/* VINUM_CREATE returns a buffer of this kind */
struct _ioctl_reply {
int error;
char msg[MAX_IOCTL_REPLY];
};
/* ioctl requests */
#define BUFSIZE 1024 /* size of buffer, including continuations */
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
#define VINUM_GETCONFIG _IOR(L, 65, struct _vinum_conf) /* get global config */
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct drive) /* get drive config */
#define VINUM_SDCONFIG _IOWR(L, 67, struct sd) /* get subdisk config */
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct plex) /* get plex config */
#define VINUM_VOLCONFIG _IOWR(L, 69, struct volume) /* get volume config */
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct sd) /* get sd config for plex (plex, sdno) */
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
#define VINUM_SAVECONFIG _IOC(0, L, 72, 0) /* release locks, update, write config to disk */
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
#ifdef DEBUG
struct debuginfo {
int changeit;
int param;
};
#define VINUM_DEBUG _IOWR(L, 75, struct debuginfo) /* call the debugger from ioctl () */
#endif
enum objecttype {
drive_object,
sd_object,
plex_object,
volume_object,
invalid_object
};
/* Start an object. Pass two integers:
* msg [0] index in vinum_conf.<object>
* msg [1] type of object (see below)
*
* Return ioctl_reply
*/
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
/* The state to set with VINUM_SETSTATE. Since
* each object has a different set of states, we
* need to translate later */
enum objectstate {
object_down,
object_initializing,
object_up
};
/* This structure is used for modifying objects
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
* VINUM_DETACH, VINUM_REPLACE
*/
struct vinum_ioctl_msg {
int index;
enum objecttype type;
enum objectstate state; /* state to set (VINUM_SETSTATE) */
int force; /* do it even if it doesn't make sense */
int recurse; /* recurse (VINUM_REMOVE) */
int otherobject; /* superordinate object (attach),
* replacement object (replace) */
int rename; /* rename object (attach) */
int64_t offset; /* offset of subdisk (for attach) */
};
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
#define VINUM_STARTCONFIG _IOC(0, L, 78, 0) /* start a configuration operation */
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
#define VINUM_LABEL _IOC(IOC_IN | IOC_OUT, L, 81, MAX_IOCTL_REPLY) /* label a volume */
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
#define VINUM_REMOVE _IOC(IOC_IN | IOC_OUT, L, 83, MAX_IOCTL_REPLY) /* remove an object */
#define VINUM_GETUNMAPPED _IOWR(L, 84, struct plexregion) /* get unmapped element (plex, re) */
#define VINUM_GETDEFECTIVE _IOWR(L, 85, struct plexregion) /* get defective element (plex, re) */
#define VINUM_RESETSTATS _IOC(IOC_IN | IOC_OUT, L, 86, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_ATTACH _IOC(IOC_IN | IOC_OUT, L, 87, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_DETACH _IOC(IOC_IN | IOC_OUT, L, 88, MAX_IOCTL_REPLY) /* reset object stats */
struct vinum_rename_msg {
int index;
int recurse; /* rename subordinate objects too */
enum objecttype type;
char newname[MAXNAME]; /* new name to give to object */
};
#define VINUM_RENAME _IOC(IOC_IN | IOC_OUT, L, 89, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_REPLACE _IOC(IOC_IN | IOC_OUT, L, 90, MAX_IOCTL_REPLY) /* reset object stats */

787
lkm/vinum/vinumioctl.c Normal file
View File

@ -0,0 +1,787 @@
/* XXX replace all the checks on object validity with
* calls to valid<object> */
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumioctl.c,v 1.1 1998/08/14 08:46:10 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#define REALLYKERNEL
#include "vinumhdr.h"
#include "sys/sysproto.h" /* for sync(2) */
#ifdef DEBUG
#include <sys/reboot.h>
#endif
jmp_buf command_fail; /* return on a failed command */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
/* pointer to ioctl p parameter, to save passing it around */
struct proc *myproc;
int vinum_inactive(void);
void free_vinum(int);
void attachobject(struct vinum_ioctl_msg *);
void detachobject(struct vinum_ioctl_msg *);
void renameobject(struct vinum_rename_msg *);
void replaceobject(struct vinum_ioctl_msg *);
/* ioctl routine */
int
vinumioctl(dev_t dev,
#if __FreeBSD__ >= 3
u_long cmd,
#else
int cmd,
#endif
caddr_t data,
int flag,
struct proc *p)
{
BROKEN_GDB;
unsigned int objno;
int error = 0;
struct volume *vol;
unsigned int index; /* for transferring config info */
unsigned int sdno; /* for transferring config info */
int fe; /* free list element number */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
struct devcode *device = (struct devcode *) &dev;
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_SUPERDEV_TYPE:
myproc = p; /* save pointer to process */
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
error = setjmp(command_fail); /* come back here on error */
if (error) /* bombed out */
return 0; /* the reply will contain meaningful info */
switch (cmd) {
/* XXX #ifdef DEBUG */
case VINUM_DEBUG:
boothowto |= RB_GDB; /* serial debug line */
if (((struct debuginfo *) data)->changeit) /* change debug settings */
debug = (((struct debuginfo *) data)->param);
else
Debugger("vinum debug");
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
/* XXX #endif */
case VINUM_CREATE: /* create a vinum object */
error = lock_config(); /* get the config for us alone */
if (error) /* can't do it, */
return error; /* give up */
error = setjmp(command_fail); /* come back here on error */
if (error == 0) { /* first time, */
parse_user_config((char *) data, &keyword_set); /* update the config */
ioctl_reply->error = 0; /* no error if we make it here */
} else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
ioctl_reply->error = EINVAL; /* note that something's up */
ioctl_reply->msg[0] = '\0'; /* no message? */
}
unlock_config();
return 0; /* must be 0 to return the real error info */
case VINUM_GETCONFIG: /* get the configuration information */
bcopy(&vinum_conf, data, sizeof(vinum_conf));
return 0;
/* start configuring the subsystem */
case VINUM_STARTCONFIG:
return start_config(); /* just lock it */
/* Move the individual parts of the config to user space.
* Specify the index of the object in the first word of data,
* and return the object there
*/
case VINUM_DRIVECONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.drives_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&DRIVE[index], data, sizeof(struct drive)); /* copy the config item out */
return 0;
case VINUM_SDCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.subdisks_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&SD[index], data, sizeof(struct sd)); /* copy the config item out */
return 0;
case VINUM_PLEXCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.plexes_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&PLEX[index], data, sizeof(struct plex)); /* copy the config item out */
return 0;
case VINUM_VOLCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.volumes_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&VOL[index], data, sizeof(struct volume)); /* copy the config item out */
return 0;
case VINUM_PLEXSDCONFIG:
index = *(int *) data; /* get the plex index */
sdno = ((int *) data)[1]; /* and the sd index */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
return EFAULT; /* bang */
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
data,
sizeof(struct sd));
return 0;
case VINUM_SAVECONFIG:
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
finish_config(1); /* finish the configuration and update it */
error = save_config(); /* save configuration to disk */
} else
error = EINVAL; /* queue up for this one, please */
return error;
case VINUM_RELEASECONFIG: /* release the config */
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
finish_config(0); /* finish the configuration, don't change it */
error = save_config(); /* save configuration to disk */
} else
error = EINVAL; /* release what config? */
return error;
case VINUM_INIT:
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
case VINUM_RESETCONFIG:
if (vinum_inactive() && (vinum_conf.opencount < 2)) { /* if we're not active */
/* Note the open count. We may be called from v, so we'll be open.
* Keep the count so we don't underflow */
int oc = vinum_conf.opencount;
free_vinum(1); /* clean up everything */
printf("vinum: CONFIGURATION OBLITERATED\n");
vinum_conf.opencount = oc;
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
}
return EBUSY;
case VINUM_SETSTATE:
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
return 0;
case VINUM_MEMINFO:
vinum_meminfo(data);
return 0;
case VINUM_MALLOCINFO:
return vinum_mallocinfo(data);
case VINUM_LABEL: /* label a volume */
ioctl_reply->error = write_volume_label(*(int *) data); /* index of the volume to label */
ioctl_reply->msg[0] = '\0'; /* no message */
return 0;
case VINUM_REMOVE:
remove((struct vinum_ioctl_msg *) data); /* remove an object */
return 0;
case VINUM_GETFREELIST: /* get a drive free list element */
index = *(int *) data; /* get the drive index */
fe = ((int *) data)[1]; /* and the free list element */
if ((index >= (unsigned) vinum_conf.drives_used) /* plex doesn't exist */
||(DRIVE[index].state == drive_unallocated))
return ENODEV;
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
return ENOENT;
bcopy(&DRIVE[index].freelist[fe],
data,
sizeof(struct drive_freelist));
return 0;
case VINUM_GETDEFECTIVE: /* get a plex defective area element */
index = *(int *) data; /* get the plex index */
fe = ((int *) data)[1]; /* and the region number */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(PLEX[index].state == plex_unallocated))
return ENODEV;
if (fe >= PLEX[index].defective_regions) /* no such entry */
return ENOENT;
bcopy(&PLEX[index].defective_region[fe],
data,
sizeof(struct plexregion));
return 0;
case VINUM_GETUNMAPPED: /* get a plex unmapped area element */
index = *(int *) data; /* get the plex index */
fe = ((int *) data)[1]; /* and the region number */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(PLEX[index].state == plex_unallocated))
return ENODEV;
if (fe >= PLEX[index].unmapped_regions) /* no such entry */
return ENOENT;
bcopy(&PLEX[index].unmapped_region[fe],
data,
sizeof(struct plexregion));
return 0;
case VINUM_RESETSTATS:
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
return 0;
/* attach an object to a superordinate object */
case VINUM_ATTACH:
attachobject((struct vinum_ioctl_msg *) data);
return 0;
/* detach an object from a superordinate object */
case VINUM_DETACH:
detachobject((struct vinum_ioctl_msg *) data);
return 0;
/* rename an object */
case VINUM_RENAME:
renameobject((struct vinum_rename_msg *) data);
return 0;
/* replace an object */
case VINUM_REPLACE:
replaceobject((struct vinum_ioctl_msg *) data);
return 0;
default:
/* FALLTHROUGH */
}
default:
#if __FreeBSD__>=3
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %lx\n",
device->type,
device->sd,
device->plex,
device->major,
device->volume,
cmd); /* XXX */
#else
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %x\n",
device->type,
device->sd,
device->plex,
device->major,
device->volume,
cmd); /* XXX */
#endif
return EINVAL;
case VINUM_DRIVE_TYPE:
case VINUM_PLEX_TYPE:
return EAGAIN; /* try again next week */
case VINUM_SD_TYPE:
objno = SDNO(dev);
switch (cmd) {
case VINUM_INITSD: /* initialize subdisk */
return initsd(objno);
default:
return EINVAL;
}
break;
case VINUM_VOLUME_TYPE:
objno = VOLNO(dev);
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_used) /* not a valid volume */
return ENXIO;
vol = &VOL[objno];
if (vol->state != volume_up) /* not up, */
return EIO; /* I/O error */
switch (cmd) {
case DIOCGDINFO: /* get disk label */
get_volume_label(vol, (struct disklabel *) data);
break;
/* Care! DIOCGPART returns *pointers* to
* the caller, so we need to store this crap as well.
* And yes, we need it. */
case DIOCGPART: /* get partition information */
get_volume_label(vol, &vol->label);
((struct partinfo *) data)->disklab = &vol->label;
((struct partinfo *) data)->part = &vol->label.d_partitions[0];
break;
/* We don't have this stuff on hardware,
* so just pretend to do it so that
* utilities don't get upset. */
case DIOCWDINFO: /* write partition info */
case DIOCSDINFO: /* set partition info */
return 0; /* not a titty */
case DIOCWLABEL: /* set or reset label writeable */
if ((flag & FWRITE) == 0) /* not writeable? */
return EACCES; /* no, die */
if (*(int *) data != 0) /* set it? */
vol->flags |= VF_WLABEL; /* yes */
else
vol->flags &= ~VF_WLABEL; /* no, reset */
break;
default:
return ENOTTY; /* not my kind of ioctl */
}
break;
}
return 0; /* XXX */
}
/* The following four functions check the supplied
* object index and return a pointer to the object
* if it exists. Otherwise they longjump out via
* throw_rude_remark */
struct drive *
validdrive(int driveno, struct _ioctl_reply *reply)
{
if ((driveno < vinum_conf.drives_used)
&& (DRIVE[driveno].state != drive_unallocated))
return &DRIVE[driveno];
strcpy(reply->msg, "No such drive");
reply->error = ENOENT;
return NULL;
}
struct sd *
validsd(int sdno, struct _ioctl_reply *reply)
{
if ((sdno < vinum_conf.subdisks_used)
&& (SD[sdno].state != sd_unallocated))
return &SD[sdno];
strcpy(reply->msg, "No such subdisk");
reply->error = ENOENT;
return NULL;
}
struct plex *
validplex(int plexno, struct _ioctl_reply *reply)
{
if ((plexno < vinum_conf.plexes_used)
&& (PLEX[plexno].state != plex_unallocated))
return &PLEX[plexno];
strcpy(reply->msg, "No such plex");
reply->error = ENOENT;
return NULL;
}
struct volume *
validvol(int volno, struct _ioctl_reply *reply)
{
if ((volno < vinum_conf.volumes_used)
&& (VOL[volno].state != volume_unallocated))
return &VOL[volno];
strcpy(reply->msg, "No such volume");
reply->error = ENOENT;
return NULL;
}
/* reset an object's stats */
void
resetstats(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
switch (msg->type) {
case drive_object:
if (msg->index < vinum_conf.drives_used) {
struct drive *drive = &DRIVE[msg->index];
if (drive->state != drive_unallocated) {
drive->reads = 0; /* number of reads on this drive */
drive->writes = 0; /* number of writes on this drive */
drive->bytes_read = 0; /* number of bytes read */
drive->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case sd_object:
if (msg->index < vinum_conf.subdisks_used) {
struct sd *sd = &SD[msg->index];
if (sd->state != sd_unallocated) {
sd->reads = 0; /* number of reads on this subdisk */
sd->writes = 0; /* number of writes on this subdisk */
sd->bytes_read = 0; /* number of bytes read */
sd->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case plex_object:
if (msg->index < vinum_conf.plexes_used) {
struct plex *plex = &PLEX[msg->index];
if (plex->state != plex_unallocated) {
plex->reads = 0;
plex->writes = 0; /* number of writes on this plex */
plex->bytes_read = 0; /* number of bytes read */
plex->bytes_written = 0; /* number of bytes written */
plex->multiblock = 0; /* requests that needed more than one block */
plex->multistripe = 0; /* requests that needed more than one stripe */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case volume_object:
if (msg->index < vinum_conf.volumes_used) {
struct volume *vol = &VOL[msg->index];
if (vol->state != volume_unallocated) {
vol->bytes_read = 0; /* number of bytes read */
vol->bytes_written = 0; /* number of bytes written */
vol->reads = 0; /* number of reads on this volume */
vol->writes = 0; /* number of writes on this volume */
vol->recovered_reads = 0; /* reads recovered from another plex */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case invalid_object: /* can't get this */
reply->error = EINVAL;
return;
}
}
/* attach an object to a superior object */
void
attachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL) /* not a valid subdisk */
return;
plex = validplex(msg->otherobject, reply);
if (plex) {
if (sd->plexno >= 0) { /* already belong to a plex */
reply->error = EBUSY; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
sd->plexoffset = msg->offset; /* this is where we want it */
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
break;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
if (plex->organization != plex_concat) { /* can't attach to striped and raid-5 */
reply->error = EINVAL; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
vol = validvol(msg->otherobject, reply); /* and volume information */
if (vol) {
if ((vol->plexes == MAXPLEX) /* we have too many already */
||(plex->volno >= 0)) { /* or the plex has an owner */
reply->error = EINVAL; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
set_plex_state(plex->plexno, plex_down, setstate_force); /* make sure it's down */
give_plex_to_volume(msg->otherobject, msg->index); /* and give it to the volume */
update_plex_config(plex->plexno, 0);
save_config();
if (plex->state == plex_reviving)
reply->error = EAGAIN; /* need to revive it */
else
reply->error = 0;
}
}
}
/* detach an object from a superior object */
void
detachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct sd *sd;
struct plex *plex;
struct volume *vol;
int sdno;
int plexno;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL)
return;
if (sd->plexno < 0) { /* doesn't belong to a plex */
reply->error = ENOENT;
strcpy(reply->msg, "Subdisk is not attached");
return;
} else { /* valid plex number */
plex = &PLEX[sd->plexno];
if ((!msg->force) /* don't force things */
&&((plex->state == plex_up) /* and the plex is up */
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
reply->error = EBUSY; /* we need this sd */
reply->msg[0] = '\0';
return;
}
sd->plexno = -1; /* anonymous sd */
if (plex->subdisks == 1) { /* this was the only subdisk */
Free(plex->sdnos); /* free the subdisk array */
plex->sdnos = NULL; /* and note the fact */
plex->subdisks_allocated = 0; /* no subdisk space */
} else {
for (sdno = 0; sdno < plex->subdisks; sdno++) {
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
break;
}
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
bcopy(&plex->sdnos[sdno + 1],
&plex->sdnos[sdno],
(plex->subdisks - 1 - sdno) * sizeof(int));
}
plex->subdisks--;
rebuild_plex_unmappedlist(plex); /* rebuild the unmapped list */
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* this subdisk is named after the plex */
bcopy(sd->name,
&sd->name[3],
min(strlen(sd->name), MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
update_plex_config(plex->plexno, 0);
if ((plex->organization == plex_striped) /* we've just mutilated our plex, */
||(plex->organization == plex_striped)) /* the data no longer matches */
set_plex_state(plex->plexno,
plex_down,
setstate_force | setstate_configuring);
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
return;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
if (plex->volno >= 0) {
int volno = plex->volno;
vol = &VOL[volno];
if ((!msg->force) /* don't force things */
&&((vol->state == volume_up) /* and the volume is up */
&&(vol->plexes == 1))) { /* and this is the last plex */
/* XXX As elsewhere, check whether we will lose
* mapping by removing this plex */
reply->error = EBUSY; /* we need this plex */
reply->msg[0] = '\0';
return;
}
plex->volno = -1; /* anonymous plex */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (vol->plex[plexno] == msg->index) /* found our plex */
break;
}
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
bcopy(&vol[plexno + 1], &vol[plexno], (vol->plexes - 1 - plexno) * sizeof(int));
vol->plexes--;
if (!bcmp(vol->name, plex->name, strlen(vol->name))) { /* this plex is named after the volume */
/* First, check if the subdisks are the same */
if (msg->recurse) {
int sdno;
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]];
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* subdisk is named after the plex */
bcopy(sd->name, &sd->name[3], min(strlen(sd->name), MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
}
}
bcopy(plex->name, &plex->name[3], min(strlen(plex->name), MAXPLEXNAME - 3));
bcopy("ex-", plex->name, 3);
plex->name[MAXPLEXNAME - 1] = '\0';
}
update_plex_config(plex->plexno, 0);
update_volume_config(volno, 0);
save_config();
reply->error = 0;
} else {
reply->error = ENOENT;
strcpy(reply->msg, "Plex is not attached");
}
}
}
void
renameobject(struct vinum_rename_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
drive = validdrive(msg->index, reply);
if (drive) {
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
save_config();
reply->error = 0;
}
return;
case sd_object: /* you can't attach a subdisk to anything */
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
sd = validsd(msg->index, reply);
if (sd) {
bcopy(msg->newname, sd->name, MAXSDNAME);
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
return;
case plex_object: /* you can't attach a plex to anything */
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
plex = validplex(msg->index, reply);
if (plex) {
bcopy(msg->newname, plex->name, MAXPLEXNAME);
update_plex_config(plex->plexno, 0);
save_config();
reply->error = 0;
}
return;
case volume_object: /* you can't attach a volume to anything */
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
vol = validvol(msg->index, reply);
if (vol) {
bcopy(msg->newname, vol->name, MAXVOLNAME);
update_volume_config(msg->index, 0);
save_config();
reply->error = 0;
}
return;
case invalid_object:
reply->error = EINVAL;
reply->msg[0] = '\0';
}
}
/* Replace one object with another */
void
replaceobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
reply->error = ENODEV; /* until I know how to do this */
strcpy(reply->msg, "replace not implemented yet");
/* save_config (); */
}

120
lkm/vinum/vinumkw.h Normal file
View File

@ -0,0 +1,120 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumkw.h,v 1.7 1998/08/07 02:35:51 grog Exp grog $
*/
/* Command keywords that vinum knows. These include both user-level
* and kernel-level stuff */
/* Our complete vocabulary. The names of the commands are
* the same as the identifier without the kw_ at the beginning
* (i.e. kw_create defines the "create" keyword). Preprocessor
* magic in parser.c does the rest. */
enum keyword {
kw_create,
kw_modify,
kw_list,
kw_l = kw_list,
kw_ld, /* list drive */
kw_ls, /* list subdisk */
kw_lp, /* list plex */
kw_lv, /* list volume */
kw_set,
kw_rm,
kw_start,
kw_stop,
kw_drive,
kw_sd,
kw_subdisk = kw_sd,
kw_plex,
kw_volume,
kw_vol = kw_volume,
kw_read,
kw_readpol,
kw_org,
kw_name,
kw_concat,
kw_striped,
kw_raid5,
kw_driveoffset,
kw_plexoffset,
kw_len,
kw_length = kw_len,
kw_state,
kw_setupstate,
kw_d, /* flag names */
kw_f,
kw_r,
kw_s,
kw_v,
kw_round, /* round robin */
kw_prefer, /* prefer plex */
kw_device,
kw_init,
kw_label,
kw_resetconfig,
kw_writethrough,
kw_writeback,
kw_raw,
kw_resetstats,
kw_attach,
kw_detach,
kw_rename,
kw_printconfig,
kw_replace,
kw_detached,
#ifdef DEBUG
kw_debug, /* go into debugger */
kw_info,
#endif
kw_invalid_keyword = -1
};
struct _keywords {
char *name;
enum keyword keyword;
};
struct keywordset {
int size;
struct _keywords *k;
};
extern struct _keywords keywords[];
extern struct _keywords flag_keywords[];
extern struct keywordset keyword_set;
extern struct keywordset flag_set;

213
lkm/vinum/vinumstate.h Normal file
View File

@ -0,0 +1,213 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumstate.h,v 1.11 1998/08/04 06:22:49 grog Exp grog $
*/
/* This file gets read by makestatetext to create text files
* with the names of the states, so don't change the file
* format */
enum volumestate {
volume_unallocated,
/* present but unused. Must be 0 */
volume_uninit,
/* mentioned elsewhere but not defined */
volume_down,
/* The volume is up and functional, but not all plexes may be available */
volume_up,
volume_laststate = volume_up /* last value, for table dimensions */
};
enum plexstate {
/* An empty entry, not a plex at all. */
plex_unallocated,
/* The plex has been allocated, but there configuration
* is not complete */
plex_init,
/* A plex which has gone completely down because of
* I/O errors. */
plex_faulty,
/* A plex which has been taken down by the
* administrator. */
plex_down,
/* A plex which is currently being brought up after
* being not up. This involves copying data from
* another plex */
plex_reviving,
/* A plex which is being initialized */
plex_initializing,
/* *** The remaining states represent plexes which are
* at least partially up. Keep these separate so that
* they can be checked more easily. */
/* A plex entry which is at least partially up. Not
* all subdisks are available, and an inconsistency
* has occurred. If no other plex is uncorrupted,
* the volume is no longer consistent. */
plex_corrupt,
plex_firstup = plex_corrupt, /* first "up" state */
/* A plex entry which is at least partially up. Not
* all subdisks are available, but so far no
* inconsistency has occurred (this will change with
* the first write to the address space occupied by
* a defective subdisk). A RAID 5 plex with one subdisk
* down will remain degraded even after a write */
plex_degraded,
/* A plex which is really up, but which has a reborn
* subdisk which we don't completely trust, and
* which we don't want to read if we can avoid it */
plex_flaky,
/* A plex entry which is completely up. All subdisks
* are up. */
plex_up,
plex_laststate = plex_up /* last value, for table dimensions */
};
/* subdisk states */
enum sdstate {
/* An empty entry, not a subdisk at all. */
sd_unallocated,
/* A subdisk entry which has not been created
* completely. Some fields may be empty.
*/
sd_uninit,
/* A subdisk entry which has been created completely.
* All fields are correct, but the disk hasn't
* been updated.
*/
sd_init,
/* A subdisk entry which has been created completely and
* which is currently being initialized */
sd_initializing,
/* A subdisk entry which has been created completely.
* All fields are correct, and the disk has been
* updated, but there is no data on the disk.
*/
sd_empty,
/* *** The following states represent invalid data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down, and as a result updates have been
* missed.
*/
sd_obsolete,
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down, updates have been lost, and then
* the drive came up again.
*/
sd_stale,
/* *** The following states represent valid, inaccessible data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down. No attempt has been made to write
* to the subdisk since the crash.
*/
sd_crashed,
/* A subdisk entry which was up, which contained
* valid data, and which was taken down by the
* administrator. The data is valid. */
sd_down,
/* *** The following states represent accessible subdisks
* with valid data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down and up again. No updates were lost,
* but it is possible that the subdisk has been
* damaged. We won't read from this subdisk if we
* have a choice. If this is the only subdisk which
* covers this address space in the plex, we set its
* state to sd_up under these circumstances, so this
* status implies that there is another subdisk to
* fulfil the request.
*/
sd_reborn,
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data is valid.
*/
sd_up,
sd_laststate = sd_up /* last value, for table dimensions */
};
enum drivestate {
drive_unallocated,
/* present but unused. Must be 0 */
drive_uninit,
/* just mentioned in some other config entry */
drive_down,
/* not accessible */
drive_coming_up,
/* in the process of being brought up */
drive_up,
/* up and running */
drive_laststate = drive_up /* last value, for table dimensions */
};

510
lkm/vinum/vinumvar.h Normal file
View File

@ -0,0 +1,510 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumvar.h,v 1.15 1998/08/14 06:36:41 grog Exp grog $
*/
/* XXX gdb can't find our global pointers, so use this kludge to
* point to them locally. Remove after testing */
#define BROKEN_GDB struct _vinum_conf *VC = &vinum_conf
#include <sys/time.h>
#include "vinumstate.h"
/* Some configuration maxima. They're an enum because
* we can't define global constants. Sorry about that.
*
* These aren't as bad as they look: most of them
* are soft limits. Only the MAXCONFIG parameter is set in stone
*/
enum constants {
VINUM_HEADER = 512, /* size of header on disk */
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
/* XXX Do we still need this? */
MINVINUMSLICE = 1048576, /* minimum size of a slice */
CDEV_MAJOR = 91, /* major number for character device */
BDEV_MAJOR = 25, /* and block device */
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
/* type field in minor number */
VINUM_VOLUME_TYPE = 0,
VINUM_PLEX_TYPE = 1,
VINUM_SD_TYPE = 2,
VINUM_DRIVE_TYPE = 3,
VINUM_SUPERDEV_TYPE = 4, /* super device. */
/* Shifts for the individual fields in the device */
VINUM_TYPE_SHIFT = 28,
VINUM_VOL_SHIFT = 0,
VINUM_PLEX_SHIFT = 16,
VINUM_SD_SHIFT = 20,
VINUM_VOL_WIDTH = 8,
VINUM_PLEX_WIDTH = 3,
VINUM_SD_WIDTH = 8,
MAJORDEV_SHIFT = 8,
/* Create a block device number */
#define VINUMBDEV(v,p,s,t) ((BDEV_MAJOR << MAJORDEV_SHIFT) \
| (v << VINUM_VOL_SHIFT) \
| (p << VINUM_PLEX_SHIFT) \
| (s << VINUM_SD_SHIFT) \
| (t << VINUM_TYPE_SHIFT) )
/* And a character device number */
#define VINUMCDEV(v,p,s,t) ((CDEV_MAJOR << MAJORDEV_SHIFT) \
| (v << VINUM_VOL_SHIFT) \
| (p << VINUM_PLEX_SHIFT) \
| (s << VINUM_SD_SHIFT) \
| (t << VINUM_TYPE_SHIFT) )
/* extract device type */
#define DEVTYPE(x) ((x >> VINUM_TYPE_SHIFT) & 7)
/* extract volume number */
#define VOLNO(x) (x & ((1 << VINUM_VOL_WIDTH) - 1))
/* extract plex number */
#define PLEXNO(x) (VOL [VOLNO (x)].plex [(x >> VINUM_PLEX_SHIFT) & ((1 << VINUM_PLEX_WIDTH) - 1)])
/* extract subdisk number */
#define SDNO(x) (PLEX [PLEXNO (x)].sdnos [(x >> VINUM_SD_SHIFT) & ((1 << VINUM_SD_WIDTH) - 1)])
/* extract drive number */
#define DRIVENO(x) (SD [SDNO (x)].driveno)
VINUM_SUPERDEV = VINUMBDEV(0, 0, 0, VINUM_SUPERDEV_TYPE), /* superdevice number */
/* the number of object entries to cater for initially, and also the
* value by which they are incremented. It doesn't take long
* to extend them, so theoretically we could start with 1 of each, but
* it's untidy to allocate such small areas. These values are
* probably too small.
*/
INITIAL_DRIVES = 4,
INITIAL_VOLUMES = 4,
INITIAL_PLEXES = 8,
INITIAL_SUBDISKS = 16,
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
INITIAL_LOCKS = 8, /* number of locks to allocate to a volume */
DEFAULT_REVIVE_BLOCKSIZE = 32768, /* size of block to transfer in one op */
};
/* device numbers */
/*
* 31 30 28 27 20 19 18 16 15 8 7 0
* |-----------------------------------------------------------------------------------------------|
* |X | Type | Subdisk number | X| Plex | Major number | volume number |
* |-----------------------------------------------------------------------------------------------|
*
* 0x2 03 1 19 06
*/
struct devcode {
/* CARE. These fields assume a big-endian word. On a
* little-endian system, they're the wrong way around */
unsigned volume:8; /* up to 256 volumes */
unsigned major:8; /* this is where the major number fits */
unsigned plex:3; /* up to 8 plexes per volume */
unsigned unused:1; /* up for grabs */
unsigned sd:8; /* up to 256 subdisks per plex */
unsigned type:3; /* type of object */
/* type field
VINUM_VOLUME = 0,
VINUM_PLEX = 1,
VINUM_SUBDISK = 2,
VINUM_DRIVE = 3,
VINUM_SUPERDEV = 4, */
unsigned signbit:1; /* to make 32 bits */
};
#define VINUM_DIR "/dev/vinum"
#define VINUM_RDIR "/dev/rvinum"
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control"
#define MAXDRIVENAME 32 /* maximum length of a device name */
#define MAXSDNAME 64 /* maximum length of a subdisk name */
#define MAXPLEXNAME 64 /* maximum length of a plex name */
#define MAXVOLNAME 64 /* maximum length of a volume name */
#define MAXNAME 64 /* maximum length of any name */
#define MAXVOLPLEX 8 /* maximum number of plexes in a volume */
/* Flags for all objects. Most of them only apply to
* specific objects, but we have space for all in any
* 32 bit flags word. */
enum objflags {
VF_LOCKED = 1, /* somebody has locked access to this object */
VF_LOCKING = 2, /* we want access to this object */
VF_WRITETHROUGH = 8, /* volume: write through */
VF_INITED = 0x10, /* unit has been initialized */
VF_WLABEL = 0x20, /* label area is writable */
VF_LABELLING = 0x40, /* unit is currently being labelled */
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
VF_RAW = 0x100, /* raw volume (no file system) */
VF_LOADED = 0x200, /* module is loaded */
VF_CONFIGURING = 0x400, /* somebody is changing the config */
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
VF_KERNELOP = 0x8000, /* we're performing ops from kernel space */
};
/* Global configuration information for the vinum subsystem */
struct _vinum_conf {
/* Pointers to vinum structures */
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *volume;
/* the number allocated */
int drives_allocated;
int subdisks_allocated;
int plexes_allocated;
int volumes_allocated;
/* and the number currently in use */
int drives_used;
int subdisks_used;
int plexes_used;
int volumes_used;
int flags;
int opencount; /* number of times we've been opened */
#if DEBUG
int lastrq;
struct buf *lastbuf;
#endif
};
/* Use these defines to simplify code */
#define DRIVE vinum_conf.drive
#define SD vinum_conf.sd
#define PLEX vinum_conf.plex
#define VOL vinum_conf.volume
#define VFLAGS vinum_conf.flags
/* Slice header
* Vinum drives start with this structure:
*
* Sector
* |--------------------------------------|
* | PDP-11 memorial boot block | 0
* |--------------------------------------|
* | Disk label, maybe | 1
* |--------------------------------------|
* | Slice definition (vinum_hdr) | 2
* |--------------------------------------|
* | |
* | Configuration info, first copy | 3
* | |
* |--------------------------------------|
* | |
* | Configuration info, second copy | 3 + size of config
* | |
* |--------------------------------------|
*/
/* Sizes and offsets of our information */
enum {
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
VINUMHEADERLEN = 512, /* size of vinum label */
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
MAXCONFIG = 65536, /* and size of config copy */
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
};
/* hostname is 256 bytes long, but we don't need to shlep
* multiple copies in vinum. We use the host name just
* to identify this system, and 32 bytes should be ample
* for that purpose */
#define VINUMHOSTNAMELEN 32
struct vinum_label {
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
char name[MAXDRIVENAME]; /* our name of the drive */
struct timeval date_of_birth; /* the time it was created */
struct timeval last_update; /* and the time of last update */
off_t drive_size; /* total size in bytes of the drive.
* This value includes the headers */
};
struct vinum_hdr {
long long magic; /* we're long on magic numbers */
/* XXX Get these right for big-endian */
#define VINUM_MAGIC 22322600044678729LL /* should be this */
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
int config_length; /* size in bytes of each copy of the
* configuration info.
* This must be a multiple of the sector size. */
struct vinum_label label; /* unique label */
};
/* Information returned from read_drive_label */
enum drive_label_info {
DL_CANT_OPEN, /* invalid partition */
DL_NOT_OURS, /* valid partition, but no vinum label */
DL_DELETED_LABEL, /* valid partition, deleted label found */
DL_WRONG_DRIVE, /* drive name doesn't match */
DL_OURS /* valid partition and label found */
};
/*** Drive definitions ***/
/* A drive corresponds to a disk slice. We use a different term to show
* the difference in usage: it doesn't have to be a slice, and could
* theroretically be a complete, unpartitioned disk */
struct drive {
enum drivestate state; /* current state */
int subdisks_allocated; /* number of entries in sd */
int subdisks_used; /* and the number used */
int blocksize; /* size of fs blocks */
u_int64_t sectors_available; /* number of sectors still available */
int secsperblock;
int lasterror; /* last error on drive */
int driveno; /* index of drive in vinum_conf */
int opencount; /* number of up subdisks */
u_int64_t reads; /* number of reads on this drive */
u_int64_t writes; /* number of writes on this drive */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
dev_t dev; /* and device number */
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
struct vnode *vp; /* vnode pointer */
struct proc *p;
struct vinum_label label; /* and the label information */
struct partinfo partinfo; /* partition information */
int freelist_size; /* number of entries alloced in free list */
int freelist_entries; /* number of entries used in free list */
struct drive_freelist { /* sorted list of free space on drive */
u_int64_t offset;
long sectors;
} *freelist;
};
/*** Subdisk definitions ***/
struct sd {
enum sdstate state; /* state */
/* offsets in blocks */
int64_t driveoffset; /* offset on drive */
int64_t plexoffset; /* offset in plex */
u_int64_t sectors; /* and length in sectors */
int plexno; /* index of plex, if it belongs */
int driveno; /* index of the drive on which it is located */
int sdno; /* our index in vinum_conf */
int pid; /* pid of process which opened us */
u_int64_t reads; /* number of reads on this subdisk */
u_int64_t writes; /* number of writes on this subdisk */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
char name[MAXSDNAME]; /* name of subdisk */
};
/*** Plex definitions ***/
/* kinds of plex organization */
enum plexorg {
plex_disorg, /* disorganized */
plex_concat, /* concatenated plex */
plex_striped, /* striped plex */
plex_raid5 /* RAID5 plex */
};
/* Region in plex (either defective or unmapped) */
struct plexregion {
u_int64_t offset; /* start of region */
u_int64_t length; /* length */
};
struct plex {
enum plexorg organization; /* Plex organization */
enum plexstate state; /* and current state */
u_int64_t length; /* total length of plex (max offset) */
int flags;
int stripesize; /* size of stripe or raid band, in sectors */
int subdisks; /* number of associated subdisks */
int subdisks_allocated; /* number of subdisks allocated space for */
int *sdnos; /* list of component subdisks */
int plexno; /* index of plex in vinum_conf */
int volno; /* index of volume */
int volplexno; /* number of plex in volume */
int pid; /* pid of process which opened us */
/* Lock information */
int locks; /* number of locks used */
int alloclocks; /* number of locks allocated */
struct rangelock *lock; /* ranges of locked addresses */
/* Statistics */
u_int64_t reads; /* number of reads on this plex */
u_int64_t writes; /* number of writes on this plex */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t multiblock; /* requests that needed more than one block */
u_int64_t multistripe; /* requests that needed more than one stripe */
/* revive parameters */
u_int64_t revived; /* block number of current revive request */
int revive_blocksize; /* revive block size (bytes) */
int revive_interval; /* and time to wait between transfers */
struct request *waitlist; /* list of requests waiting on revive op */
/* geometry control */
int defective_regions; /* number of regions which are defective */
int defective_region_count; /* number of entries in defective_region */
struct plexregion *defective_region; /* list of offset/length pairs: defective sds */
int unmapped_regions; /* number of regions which are missing */
int unmapped_region_count; /* number of entries in unmapped_region */
struct plexregion *unmapped_region; /* list of offset/length pairs: missing sds */
char name[MAXPLEXNAME]; /* name of plex */
};
/*** Volume definitions ***/
#define MAXPLEX 8 /* maximum number of plexes */
struct volume {
enum volumestate state; /* current state */
int plexes; /* number of plexes */
int preferred_plex; /* plex to read from, -1 for round-robin */
int last_plex_read; /* index of plex used for last read,
* for round-robin */
dev_t devno; /* device number */
int flags; /* status and configuration flags */
int opencount; /* number of opens (all the same process) */
int openflags; /* flags supplied to last open(2) */
u_int64_t size; /* size of volume */
int disk; /* disk index */
int blocksize; /* logical block size */
int active; /* number of outstanding requests active */
int subops; /* and the number of suboperations */
pid_t pid; /* pid of locker */
/* Statistics */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t reads; /* number of reads on this volume */
u_int64_t writes; /* number of writes on this volume */
u_int64_t recovered_reads; /* reads recovered from another plex */
/* Unlike subdisks in the plex, space for the plex pointers is static */
int plex[MAXPLEX]; /* index of plexes */
char name[MAXVOLNAME]; /* name of volume */
struct disklabel label; /* for DIOCGPART */
};
/* Table expansion. Expand table, which contains oldcount
* entries of type element, by increment entries, and change
* oldcount accordingly */
#define EXPAND(table, element, oldcount, increment) \
{ \
expand_table ((void **) &table, \
oldcount * sizeof (element), \
(oldcount + increment) * sizeof (element) ); \
oldcount += increment; \
}
/* Information on vinum's memory usage */
struct meminfo {
int mallocs; /* number of malloced blocks */
int total_malloced; /* total amount malloced */
int highwater; /* maximum number of mallocs */
struct mc *malloced; /* pointer to kernel table */
};
struct mc {
int seq;
int size;
short line;
short flags;
#define ALLOC_KVA 1 /* allocated via kva calls */
int *databuf; /* really vm_object_t */
caddr_t address;
char file[16];
};
/* These enums are used by the state transition
* routines. They're in bit map format:
*
* Bit 0: Other plexes in the volume are down
* Bit 1: Other plexes in the volume are up
* Bit 2: The current plex is up
* Maybe they should be local to
* state.c */
enum volplexstate {
volplex_onlyusdown = 0, /* we're the only plex, and we're down */
volplex_alldown, /* 1: another plex is down, and so are we */
volplex_otherup, /* 2: another plex is up */
volplex_otherupdown, /* other plexes are up and down */
volplex_onlyus, /* 4: we're up and alone */
volplex_onlyusup, /* only we are up, others are down */
volplex_allup, /* all plexes are up */
volplex_someup /* some plexes are up, including us */
};
/* state map for plex */
enum sdstates {
sd_emptystate = 1,
sd_downstate = 2, /* found an SD which is down */
sd_crashedstate = 4, /* found an SD which is crashed */
sd_obsoletestate = 8, /* found an SD which is obsolete */
sd_stalestate = 16, /* found an SD which is stale */
sd_rebornstate = 32, /* found an SD which is reborn */
sd_upstate = 64, /* found an SD which is up */
sd_initstate = 128, /* found an SD which is init */
sd_otherstate = 256 /* found an SD in some other state */
};
/* This is really just a parameter to pass to
* set_<foo>_state, but since it needs to be known
* in the external definitions, we need to define
* it here */
enum setstateflags {
setstate_none = 0, /* no flags */
setstate_force = 1, /* force the state change */
setstate_configuring = 2, /* we're currently configuring, don't save */
setstate_recursing = 4, /* we're called from another setstate function */
setstate_norecurse = 8 /* don't call other setstate functions */
};
#ifdef DEBUG
/* Debugging stuff */
#define DEBUG_ADDRESSES 1
#define DEBUG_NUMOUTPUT 2
#endif

37
sys/dev/vinum/COPYRIGHT Normal file
View File

@ -0,0 +1,37 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
*/

26
sys/dev/vinum/Makefile Normal file
View File

@ -0,0 +1,26 @@
# $Id: Makefile.lkm.lite,v 1.2 1998/08/13 06:07:29 grog Exp grog $
.PATH: ${.CURDIR}/../../sys/dev/ccd
KMOD= vinum_mod
SRCS= vinum.c vinum.h vnode_if.h parser.c config.c io.c util.c vinumhdr.h request.h \
state.c memory.c request.c lock.c vinumext.h vinumio.h vinumkw.h \
vinumstate.h vinumvar.h revive.c vinumioctl.c interrupt.c
NOMAN=
PSEUDO_LKM=
CFLAGS = -I. -O -g -I/usr/include/machine -DDEBUG -Wall -Wno-unused -Wno-parentheses
CLEANFILES+= vinum.h vnode_if.h vnode_if.c
all:
# We don't need this, but the Makefile wants it
vinum.h:
touch $@
state.h: maketabs vinumstate.h
./maketabs >state.h
maketabs: maketabs.c
${CC} -g -o maketabs maketabs.c
.include <bsd.kmod.mk>

40
sys/dev/vinum/makestatetext Executable file
View File

@ -0,0 +1,40 @@
#!/bin/sh
# Make statetexts.h from vinumstate.h
# $Id: makestatetext,v 1.4 1998/03/13 05:36:16 grog Exp grog $
infile=vinumstate.h
ofile=statetexts.h
cat <COPYRIGHT > $ofile
echo >>$ofile "/* Created by $0 on" `date`. "Do not edit */"
echo >>$ofile
echo >>$ofile "/* Drive state texts */"
echo >>$ofile "char *drivestatetext [] =
{ "
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Subdisk state texts */
char *sdstatetext [] =
{
FOO
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Plex state texts */
char *plexstatetext [] =
{
FOO
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Volume state texts */
char *volstatetext [] =
{
FOO
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
FOO

159
sys/dev/vinum/request.h Normal file
View File

@ -0,0 +1,159 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: request.h,v 1.10 1998/08/03 07:15:26 grog Exp grog $
*/
/* Information needed to set up a transfer */
/* struct buf is surprisingly big (about 300
* bytes), and it's part of the request, so this
* value is really important. Most requests
* don't need more than 2 subrequests per
* plex. The table is automatically extended if
* this value is too small. */
#define RQELTS 2 /* default of 2 requests per transfer */
enum xferinfo {
XFR_NORMAL_READ = 1,
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
XFR_RECOVERY_READ = 4,
XFR_DEGRADED_WRITE = 8,
XFR_PARITYLESS_WRITE = 0x10,
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
XFR_DATA_BLOCK = 0x40, /* data block in request */
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
XFR_MALLOCED = 0x200, /* this buffer is malloced */
#if DEBUG
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
#endif
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
/* operations that need a parity block */
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
/* operations that use the group parameters */
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
/* operations that that use the data parameters */
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
/* operations requiring read before write */
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
/* operations that need a malloced buffer */
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
};
/* Describe one low-level request, part
* of a high-level request. This is an
* extended struct buf buffer, and the first
* element *must* be a struct buf. We pass this structure
* to the I/O routines instead of a struct buf in oder
* to be able to locate the high-level request when it
* completes.
*
* All offsets and lengths are in "blocks", i.e. sectors */
struct rqelement {
struct buf b; /* buf structure */
struct rqgroup *rqg; /* pointer to our group */
/* Information about the transfer */
daddr_t sdoffset; /* offset in subdisk */
int useroffset; /* offset in user buffer of normal data */
/* dataoffset and datalen refer to "individual"
* data transfers (normal read, parityless write)
* and also degraded write.
*
* groupoffset and grouplen refer to the other
* "group" operations (normal write, recovery read)
* Both the offsets are relative to the start of the
* local buffer */
int dataoffset; /* offset in buffer of the normal data */
int groupoffset; /* offset in buffer of group data */
short datalen; /* length of normal data (sectors) */
short grouplen; /* length of group data (sectors) */
short buflen; /* total buffer length to allocate */
short flags; /* really enum xferinfo (see above) */
/* Ways to find other components */
short sdno; /* subdisk number */
short driveno; /* drive number */
};
/* A group of requests built to satisfy a certain
* component of a user request */
struct rqgroup {
struct rqgroup *next; /* pointer to next group */
struct request *rq; /* pointer to the request */
short count; /* number of requests in this group */
short active; /* and number active */
short plexno; /* index of plex */
int badsdno; /* index of bad subdisk or -1 */
enum xferinfo flags; /* description of transfer */
struct rqelement rqe[0]; /* and the elements of this request */
};
/* Describe one high-level request and the
* work we have to do to satisfy it */
struct request {
struct buf *bp; /* pointer to the high-level request */
int flags;
union {
int volno; /* volume index */
int plexno; /* or plex index */
} volplex;
int error; /* current error indication */
short isplex; /* set if this is a plex request */
short active; /* number of subrequests still active */
struct rqgroup *rqg; /* pointer to the first group of requests */
struct rqgroup *lrqg; /* and to the first group of requests */
struct request *next; /* link of waiting requests */
};
/* Extended buffer header for subdisk I/O. Includes
* a pointer to the user I/O request. */
struct sdbuf {
struct buf b; /* our buffer */
struct buf *bp; /* and pointer to parent */
short driveno; /* drive index */
short sdno; /* and subdisk index */
};
/* Values returned by rqe and friends.
* Be careful with these: they are in order of increasing
* seriousness. Some routines check for > REQUEST_RECOVERED
* to indicate a completely failed request. */
enum requeststatus {
REQUEST_OK, /* request built OK */
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
REQUEST_EOF, /* request failed: outside plex */
REQUEST_DOWN, /* request failed: subdisk down */
REQUEST_ENOMEM /* ran out of memory */
};

View File

@ -0,0 +1,88 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
*/
/* Created by ./makestatetext on Tue 4 Aug 15:53:16 CST 1998. Do not edit */
/* Drive state texts */
char *drivestatetext[] =
{
"unallocated",
"uninit",
"down",
"coming_up",
"up",
};
/* Subdisk state texts */
char *sdstatetext[] =
{
"unallocated",
"uninit",
"init",
"initializing",
"empty",
"obsolete",
"stale",
"crashed",
"down",
"reborn",
"up",
};
/* Plex state texts */
char *plexstatetext[] =
{
"unallocated",
"init",
"faulty",
"down",
"reviving",
"initializing",
"corrupt",
"degraded",
"flaky",
"up",
};
/* Volume state texts */
char *volstatetext[] =
{
"unallocated",
"uninit",
"down",
"up",
};

512
sys/dev/vinum/vinum.c Normal file
View File

@ -0,0 +1,512 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinum.c,v 1.19 1998/08/13 05:24:02 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#define REALLYKERNEL
#include "vinumhdr.h"
#include "sys/sysproto.h" /* for sync(2) */
#ifdef DEBUG
#include <sys/reboot.h>
int debug = 0;
#endif
/* pointer to ioctl p parameter, to save passing it around */
struct proc *myproc;
#if __FreeBSD__ < 3
STATIC struct cdevsw vinum_cdevsw;
STATIC struct bdevsw vinum_bdevsw =
{
vinumopen, vinumclose, vinumstrategy, vinumioctl,
vinumdump, vinumsize, 0,
"vinum", &vinum_cdevsw, -1
};
#else /* goodbye, bdevsw */
STATIC struct cdevsw vinum_cdevsw =
{
vinumopen, vinumclose, vinumread, vinumwrite,
vinumioctl, nostop, nullreset, nodevtotty,
seltrue, nommap, vinumstrategy, "vinum",
NULL, -1, vinumdump, vinumsize,
D_DISK, 0, -1
};
#endif
/* Called by main() during pseudo-device attachment. */
STATIC void vinumattach(void *);
STATIC void vinumgetdisklabel(dev_t);
void vinum_scandisk(void);
int vinum_inactive(void);
void free_vinum(int);
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
extern jmp_buf command_fail; /* return here if config fails */
struct _vinum_conf vinum_conf; /* configuration information */
STATIC int vinum_devsw_installed = 0;
/*
* Called by main() during pseudo-device attachment. All we need
* to do is allocate enough space for devices to be configured later, and
* add devsw entries.
*/
void
vinumattach(void *dummy)
{
BROKEN_GDB;
char *buf; /* pointer to temporary buffer */
struct _ioctl_reply *ioctl_reply; /* struct to return */
struct uio uio;
struct iovec iovec;
/* modload should prevent multiple loads, so this is worth a panic */
if ((vinum_conf.flags & VF_LOADED) != NULL)
panic("vinum: already loaded");
printf("vinum: loaded\n");
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
/* We don't have a p pointer here, so take it from curproc */
myproc = curproc;
#if __FreeBSD__ < 3
bdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_bdevsw);
#else
cdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_cdevsw);
#endif
#ifdef DEVFS
#error DEVFS not finished yet
#endif
uio.uio_iov = &iovec;
uio.uio_iovcnt = 1; /* just one buffer */
uio.uio_offset = 0; /* start at the beginning */
uio.uio_resid = 512; /* one sector */
uio.uio_segflg = UIO_SYSSPACE; /* we're in system space */
uio.uio_rw = UIO_READ; /* do we need this? */
uio.uio_procp = curproc; /* do it for our own process */
iovec.iov_len = 512;
buf = (char *) Malloc(iovec.iov_len); /* get a buffer */
CHECKALLOC(buf, "vinum: no memory\n"); /* can't get 512 bytes? */
iovec.iov_base = buf; /* read into buf */
/* allocate space: drives... */
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
CHECKALLOC(DRIVE, "vinum: no memory\n");
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
vinum_conf.drives_used = 0; /* and number in use */
/* volumes, ... */
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
CHECKALLOC(VOL, "vinum: no memory\n");
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
vinum_conf.volumes_used = 0; /* and number in use */
/* plexes, ... */
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
CHECKALLOC(PLEX, "vinum: no memory\n");
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
vinum_conf.plexes_used = 0; /* and number in use */
/* and subdisks */
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
CHECKALLOC(SD, "vinum: no memory\n");
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
vinum_conf.subdisks_used = 0; /* and number in use */
ioctl_reply = NULL; /* no reply on longjmp */
}
#ifdef ACTUALLY_LKM_NOT_KERNEL /* stuff for LKMs */
/* Check if we have anything open. If so, return 0 (not inactive),
* otherwise 1 (inactive) */
int
vinum_inactive(void)
{
BROKEN_GDB;
int i;
int can_do = 1; /* assume we can do it */
lock_config();
for (i = 0; i < vinum_conf.volumes_used; i++) {
if (VOL[i].pid != NULL) { /* volume is open */
can_do = 0;
break;
}
}
unlock_config();
return can_do;
}
/* Free all structures.
* If cleardrive is 0, save the configuration; otherwise
* remove the configuration from the drive.
*
* Before coming here, ensure that no volumes are open.
*/
void
free_vinum(int cleardrive)
{
BROKEN_GDB;
int i;
if (cleardrive) {
for (i = 0; i < vinum_conf.drives_used; i++)
remove_drive(i); /* remove the drive */
} else { /* keep the config */
save_config();
if (DRIVE != NULL) {
for (i = 0; i < vinum_conf.drives_used; i++)
free_drive(&DRIVE[i]); /* close files and things */
Free(DRIVE);
}
}
if (SD != NULL)
Free(SD);
if (PLEX != NULL) {
for (i = 0; i < vinum_conf.plexes_used; i++) {
struct plex *plex = &vinum_conf.plex[i];
if (plex->state != plex_unallocated) { /* we have real data there */
if (plex->sdnos)
Free(plex->sdnos);
if (plex->unmapped_regions)
Free(plex->unmapped_region);
if (plex->defective_regions)
Free(plex->defective_region);
}
}
Free(PLEX);
}
if (VOL != NULL)
Free(VOL);
bzero(&vinum_conf, sizeof(vinum_conf));
}
MOD_MISC(vinum);
/*
* Function called when loading the driver.
*/
STATIC int
vinum_load(struct lkm_table *lkmtp, int cmd)
{
BROKEN_GDB;
/* Debugger ("vinum_load"); */
vinumattach(NULL);
return 0; /* OK */
}
/*
* Function called when unloading the driver.
*/
STATIC int
vinum_unload(struct lkm_table *lkmtp, int cmd)
{
BROKEN_GDB;
if (vinum_inactive()) { /* is anything open? */
struct sync_args dummyarg =
{0};
#if __FreeBSD__ < 3
int retval;
#endif
printf("vinum: unloaded\n");
#if __FreeBSD__ < 3
sync(curproc, &dummyarg, &retval); /* write out buffers */
#else
sync(curproc, &dummyarg); /* write out buffers */
#endif
free_vinum(0); /* no: clean up */
#if __FreeBSD__ < 3
bdevsw[BDEV_MAJOR] = NULL; /* clear bdevsw */
#endif
cdevsw[CDEV_MAJOR] = NULL; /* and cdevsw */
return 0;
} else
return EBUSY;
}
/*
* Dispatcher function for the module (load/unload/stat).
*/
int
vinum_mod(struct lkm_table *lkmtp, int cmd, int ver)
{
BROKEN_GDB;
MOD_DISPATCH(vinum, /* module name */
lkmtp, /* LKM table */
cmd, /* command */
ver,
vinum_load, /* load with this function */
vinum_unload, /* and unload with this */
lkm_nullcmd);
}
#else /* not LKM */
#error "This driver must be compiled as a loadable kernel module"
#endif /* LKM */
/* ARGSUSED */
/* Open a vinum object
* At the moment, we only open volumes and the
* super device. It's a nice concept to be
* able to open drives, subdisks and plexes, but
* I can't think what good it could be */
int
vinumopen(dev_t dev,
int flags,
int fmt,
struct proc *p)
{
BROKEN_GDB;
int s; /* spl */
int error;
unsigned int index;
struct volume *vol;
struct plex *plex;
struct sd *sd;
struct devcode *device;
device = (struct devcode *) &dev;
error = 0;
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_VOLUME_TYPE:
index = VOLNO(dev);
if (index >= vinum_conf.volumes_used)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
s = splhigh(); /* quick lock */
if (error)
return error;
if (vol->opencount == 0)
vol->openflags = flags; /* set our flags */
vol->opencount++;
vol->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
if (VOLNO(dev) >= vinum_conf.volumes_used)
return ENXIO;
index = PLEXNO(dev); /* get plex index in vinum_conf */
if (index >= vinum_conf.plexes_used)
return ENXIO; /* no such device */
plex = &PLEX[index];
switch (plex->state) {
case plex_unallocated:
return EINVAL;
default:
s = splhigh();
if (plex->pid /* it's open already */
&& (plex->pid != p->p_pid)) { /* and not by us, */
splx(s);
return EBUSY; /* one at a time, please */
}
plex->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
}
case VINUM_SD_TYPE:
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
return ENXIO; /* no such device */
index = SDNO(dev); /* get the subdisk number */
if (index >= vinum_conf.subdisks_used)
return ENXIO; /* no such device */
sd = &SD[index];
/* Opening a subdisk is always a special operation, so we
* ignore the state as long as it represents a real subdisk */
switch (sd->state) {
case sd_unallocated:
case sd_uninit:
return EINVAL;
default:
s = splhigh();
if (sd->pid /* it's open already */
&& (sd->pid != p->p_pid)) { /* and not by us, */
splx(s);
return EBUSY; /* one at a time, please */
}
sd->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
}
case VINUM_DRIVE_TYPE:
default:
return ENODEV; /* don't know what to do with these */
case VINUM_SUPERDEV_TYPE:
if (p->p_ucred->cr_uid == 0) { /* root calling, */
vinum_conf.opencount++; /* one more opener */
return 0; /* no worries opening super dev */
} else
return EPERM; /* you can't do that! */
}
}
/* ARGSUSED */
int
vinumclose(dev_t dev,
int flags,
int fmt,
struct proc *p)
{
BROKEN_GDB;
unsigned int index;
struct volume *vol;
struct plex *plex;
struct sd *sd;
struct devcode *device = (struct devcode *) &dev;
index = VOLNO(dev);
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_VOLUME_TYPE:
if (index >= vinum_conf.volumes_used)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
vol->opencount = 0; /* reset our flags */
vol->pid = NULL; /* and forget who owned us */
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
if (VOLNO(dev) >= vinum_conf.volumes_used)
return ENXIO;
index = PLEXNO(dev); /* get plex index in vinum_conf */
if (index >= vinum_conf.plexes_used)
return ENXIO; /* no such device */
plex = &PLEX[index];
plex->pid = 0;
return 0;
case VINUM_SD_TYPE:
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
return ENXIO; /* no such device */
index = SDNO(dev); /* get the subdisk number */
if (index >= vinum_conf.subdisks_used)
return ENXIO; /* no such device */
sd = &SD[index];
sd->pid = 0;
return 0;
case VINUM_SUPERDEV_TYPE:
if (p->p_ucred->cr_uid == 0) /* root calling, */
vinum_conf.opencount--; /* one less opener */
return 0; /* no worries closing super dev */
case VINUM_DRIVE_TYPE:
default:
return ENODEV; /* don't know what to do with these */
}
}
/* size routine */
int
vinumsize(dev_t dev)
{
BROKEN_GDB;
struct volume *vol;
int size;
/* XXX This is bogus. We don't need to open
* a device to find its size */
vol = &VOL[VOLNO(dev)];
if (vol->state == volume_up)
size = vol->size;
else
return 0; /* err on the size of conservatism */
return size;
}
int
vinumdump(dev_t dev)
{
/* Not implemented. */
return ENXIO;
}

1712
sys/dev/vinum/vinumconfig.c Normal file

File diff suppressed because it is too large Load Diff

214
sys/dev/vinum/vinumext.h Normal file
View File

@ -0,0 +1,214 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumext.h,v 1.14 1998/08/11 00:03:57 grog Exp grog $
*/
/* vinumext.h: external definitions */
extern struct _vinum_conf vinum_conf; /* configuration information */
#ifdef DEBUG
extern debug; /* debug flags */
#endif
#define CHECKALLOC(ptr, msg) \
if (ptr == NULL) \
{ \
printf (msg); \
longjmp (command_fail, -1); \
}
#ifndef KERNEL
struct vnode;
struct proc;
#endif
#ifdef KERNEL
int give_sd_to_plex(int plexno, int sdno);
int give_plex_to_volume(int volno, int plexno);
int check_drive(char *);
enum drive_label_info read_drive_label(struct drive *drive);
int parse_config(char *, struct keywordset *);
int parse_user_config(char *cptr, struct keywordset *keyset);
u_int64_t sizespec(char *spec);
int volume_index(struct volume *volume);
int plex_index(struct plex *plex);
int sd_index(struct sd *sd);
int drive_index(struct drive *drive);
int my_plex(int volno, int plexno);
int my_sd(int plexno, int sdno);
int get_empty_drive(void);
int find_drive(const char *name, int create);
int find_drive_by_dev(const char *devname, int create);
int get_empty_sd(void);
int find_subdisk(const char *name, int create);
void free_sd(int sdno);
void free_volume(int volno);
int get_empty_plex(void);
int find_plex(const char *name, int create);
void free_plex(int plexno);
int get_empty_volume(void);
int find_volume(const char *name, int create);
void config_subdisk(void);
void config_plex(void);
void config_volume(void);
void config_drive(void);
void updateconfig(int);
void update_sd_config(int sdno, int kernelstate);
void update_plex_config(int plexno, int kernelstate);
void update_volume_config(int volno, int kernelstate);
void update_config(void);
void drive_io_done(struct buf *);
int save_config(void);
void write_config(char *, int);
int start_config(void);
void finish_config(int);
void remove(struct vinum_ioctl_msg *msg);
void remove_drive_entry(int driveno, int force, int recurse);
void remove_sd_entry(int sdno, int force, int recurse);
void remove_plex_entry(int plexno, int force, int recurse);
void remove_volume_entry(int volno, int force, int recurse);
void checkernel(char *);
int open_drive(struct drive *, struct proc *);
void close_drive(struct drive *drive);
int driveio(struct drive *, void *, size_t, off_t, int);
/* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
#define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) */
int set_drive_parms(struct drive *drive);
int init_drive(struct drive *);
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
void throw_rude_remark(int, char *,...);
int read_drive(struct drive *drive, void *buf, size_t length, off_t offset);
int write_drive(struct drive *drive, void *buf, size_t length, off_t offset);
void format_config(char *config, int len);
void checkkernel(char *op);
void free_drive(struct drive *drive);
void down_drive(struct drive *drive);
void remove_drive(int driveno);
/* I/O */
d_open_t vinumopen;
d_close_t vinumclose;
d_strategy_t vinumstrategy;
d_ioctl_t vinumioctl;
d_dump_t vinumdump;
d_psize_t vinumsize;
d_read_t vinumread;
d_write_t vinumwrite;
int vinumstart(struct buf *bp, int reviveok);
int launch_requests(struct request *rq, int reviveok);
/* XXX Do we need this? */
int vinumpart(dev_t);
/* Memory allocation */
void vinum_meminfo(caddr_t data);
int vinum_mallocinfo(caddr_t data);
void expand_table(void **, int, int);
void add_defective_region(struct plex *plex, off_t offset, size_t length);
void add_unmapped_region(struct plex *plex, off_t offset, size_t length);
void rebuild_plex_unmappedlist(struct plex *plex);
struct request;
struct rqgroup *allocrqg(struct request *rq, int elements);
void deallocrqg(struct rqgroup *rqg);
/* State transitions */
int set_drive_state(int driveno, enum drivestate state, int force);
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
void get_volume_label(struct volume *vol, struct disklabel *lp);
int write_volume_label(int);
void start_object(struct vinum_ioctl_msg *);
void stop_object(struct vinum_ioctl_msg *);
void setstate(struct vinum_ioctl_msg *msg);
void vinum_label(int);
int vinum_writedisklabel(struct volume *, struct disklabel *);
int initsd(int);
int restart_plex(int plexno);
int revive_block(int plexno);
/* Auxiliary functions */
enum sdstates sdstatemap(struct plex *plex, int *sddowncount);
enum volplexstate vpstate(struct plex *plex);
#endif
enum keyword get_keyword(char *, struct keywordset *);
void listconfig(void);
char *drive_state(enum drivestate);
char *volume_state(enum volumestate);
char *plex_state(enum plexstate);
char *plex_org(enum plexorg);
char *sd_state(enum sdstate);
enum drivestate DriveState(char *text);
enum sdstate SdState(char *text);
enum plexstate PlexState(char *text);
enum volumestate VolState(char *text);
struct drive *validdrive(int driveno, struct _ioctl_reply *);
struct sd *validsd(int sdno, struct _ioctl_reply *);
struct plex *validplex(int plexno, struct _ioctl_reply *);
struct volume *validvol(int volno, struct _ioctl_reply *);
int tokenize(char *, char *[]);
void resetstats(struct vinum_ioctl_msg *msg);
/* Locking */
int lockvol(struct volume *vol);
void unlockvol(struct volume *vol);
int lockplex(struct plex *plex);
void unlockplex(struct plex *plex);
int lockrange(struct plex *plex, off_t first, off_t last);
void unlockrange(struct plex *plex, off_t first, off_t last);
int lock_config(void);
void unlock_config(void);
#ifdef DEBUG
#define expandrq(prq) \
{ \
expand_table ((void **) &prq->rqe, \
prq->requests * sizeof (struct rqelement), \
(prq->requests + RQELTS) * sizeof (struct rqelement) ); \
bzero (&prq->rqe [prq->requests], RQELTS * sizeof (struct rqelement)); \
prq->rqcount += RQELTS; \
}
#else
void expandrq(struct plexrq *);
#endif

104
sys/dev/vinum/vinumhdr.h Normal file
View File

@ -0,0 +1,104 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
*/
/* Header files used by all modules */
/* $Id: vinumhdr.h,v 1.7 1998/08/07 04:41:18 grog Exp grog $ */
#ifdef KERNEL
#define REALLYKERNEL
#endif
#include <sys/param.h>
#ifdef REALLYKERNEL
#include <sys/systm.h>
#include <sys/kernel.h>
#endif
#ifdef DEVFS
#error "DEVFS code not complete yet"
#include <sys/devfsext.h>
#endif /*DEVFS */
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/dkstat.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/conf.h>
#include <sys/stat.h>
#include <sys/disklabel.h>
#include <ufs/ffs/fs.h>
#include <sys/mount.h>
#include <sys/device.h>
#undef KERNEL /* XXX */
#include <sys/disk.h>
#ifdef REALLYKERNEL
#define KERNEL
#endif
#include <sys/syslog.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/dkbad.h>
#include <setjmp.h>
#include <stdarg.h>
#include <vm/vm.h>
#ifdef USES_VM
/* XXX Do we need this? */
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_prot.h>
/* #include <vm/vm_page.h> */
#include <sys/vmmeter.h>
/* #include <machine/pmap.h> */
#include <machine/cputypes.h>
#endif /* USES_VM */
#include <vinumvar.h>
#include <vinumio.h>
#include "vinumkw.h"
#include "vinumext.h"
#undef Free /* defined in some funny net stuff */
#ifdef REALLYKERNEL
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
caddr_t MMalloc (int size, char *, int);
void FFree (void *mem, char *, int);
#else
#define Malloc(x) malloc ((x)) /* just the size */
#define Free(x) free ((x)) /* just the address */
#endif

View File

@ -0,0 +1,190 @@
/* interrupt.c: bottom half of the driver */
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: interrupt.c,v 1.1 1998/08/13 06:12:27 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
#include <miscfs/specfs/specdev.h>
#include <sys/resourcevar.h>
void complete_raid5_write(struct rqelement *);
void freerq(struct request *rq);
void free_rqg(struct rqgroup *rqg);
void complete_rqe(struct buf *bp);
void sdio_done(struct buf *bp);
/* Take a completed buffer, transfer the data back if
* it's a read, and complete the high-level request
* if this is the last subrequest.
*
* The bp parameter is in fact a struct rqelement, which
* includes a couple of extras at the end.
*/
void
complete_rqe(struct buf *bp)
{
BROKEN_GDB;
struct rqelement *rqe;
struct request *rq;
struct rqgroup *rqg;
struct buf *ubp; /* user buffer */
rqe = (struct rqelement *) bp; /* point to the element element that completed */
rqg = rqe->rqg; /* and the request group */
rq = rqg->rq; /* and the complete request */
if ((bp->b_flags & B_ERROR) != 0) { /* transfer in error */
if (bp->b_error != 0) /* did it return a number? */
rq->error = bp->b_error; /* yes, put it in. */
else if (rq->error == 0) /* no: do we have one already? */
rq->error = EIO; /* no: catchall "I/O error" */
if (rq->error == EIO) /* I/O error, */
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* take the subdisk down */
}
/* Now update the statistics */
if (bp->b_flags & B_READ) { /* read operation */
DRIVE[rqe->driveno].reads++;
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
SD[rqe->sdno].reads++;
SD[rqe->sdno].bytes_read += bp->b_bcount;
PLEX[rqe->rqg->plexno].reads++;
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
} else { /* write operation */
DRIVE[rqe->driveno].writes++;
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
SD[rqe->sdno].writes++;
SD[rqe->sdno].bytes_written += bp->b_bcount;
PLEX[rqe->rqg->plexno].writes++;
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
}
ubp = rq->bp; /* user buffer */
rqg->active--; /* one less request active */
if (rqg->active == 0) /* request group finished, */
rq->active--; /* one less */
if (rq->active == 0) { /* request finished, */
#if DEBUG
if (debug & 4) {
if (ubp->b_resid != 0) /* still something to transfer? */
Debugger("resid");
{
int i;
for (i = 0; i < ubp->b_bcount; i += 512) /* XXX debug */
if (((char *) ubp->b_data)[i] != '<') { /* and not what we expected */
printf("At 0x%x (offset 0x%x): '%c' (0x%x)\n",
(int) (&((char *) ubp->b_data)[i]),
i,
((char *) ubp->b_data)[i],
((char *) ubp->b_data)[i]);
Debugger("complete_request checksum");
}
}
}
#endif
if (rq->error) { /* did we have an error? */
ubp->b_flags |= B_ERROR; /* yes, propagate to user */
ubp->b_error = rq->error;
} else
ubp->b_resid = 0; /* completed our transfer */
if (rq->isplex == 0) /* volume request, */
VOL[rq->volplex.volno].active--; /* another request finished */
biodone(ubp); /* top level buffer completed */
freerq(rq); /* return the request storage */
}
}
/* Free a request block and anything hanging off it */
void
freerq(struct request *rq)
{
BROKEN_GDB;
struct rqgroup *rqg;
struct rqgroup *nrqg; /* next in chain */
int rqno;
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
for (rqno = 0; rqno < rqg->count; rqno++)
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
Free(rqg->rqe[rqno].b.b_data); /* free it */
nrqg = rqg->next; /* note the next one */
Free(rqg); /* and free this one */
}
Free(rq); /* free the request itself */
}
void
free_rqg(struct rqgroup *rqg)
{
if ((rqg->flags & XFR_GROUPOP) /* RAID 5 request */
&&(rqg->rqe) /* got a buffer structure */
&&(rqg->rqe->b.b_data)) /* and it has a buffer allocated */
Free(rqg->rqe->b.b_data); /* free it */
}
/* I/O on subdisk completed */
void
sdio_done(struct buf *bp)
{
struct sdbuf *sbp;
sbp = (struct sdbuf *) bp;
if (sbp->b.b_flags & B_ERROR) { /* had an error */
bp->b_flags |= B_ERROR;
bp->b_error = sbp->b.b_error;
}
bp->b_resid = sbp->b.b_resid;
biodone(sbp->bp); /* complete the caller's I/O */
/* Now update the statistics */
if (bp->b_flags & B_READ) { /* read operation */
DRIVE[sbp->driveno].reads++;
DRIVE[sbp->driveno].bytes_read += bp->b_bcount;
SD[sbp->sdno].reads++;
SD[sbp->sdno].bytes_read += bp->b_bcount;
} else { /* write operation */
DRIVE[sbp->driveno].writes++;
DRIVE[sbp->driveno].bytes_written += bp->b_bcount;
SD[sbp->sdno].writes++;
SD[sbp->sdno].bytes_written += bp->b_bcount;
}
Free(sbp);
}

886
sys/dev/vinum/vinumio.c Normal file
View File

@ -0,0 +1,886 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: io.c,v 1.16 1998/08/10 23:47:21 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#if __FreeBSD__ < 3 /* this is in sys/disklabel.h in 3.0 and on */
#define DTYPE_VINUM 12 /* vinum volume */
#endif
#define REALLYKERNEL
#include "vinumhdr.h"
#include <miscfs/specfs/specdev.h>
extern jmp_buf command_fail; /* return on a failed command */
struct _ioctl_reply *ioctl_reply; /* data pointer, for returning error messages */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
/* pointer to ioctl p parameter, to save passing it around */
extern struct proc *myproc;
/* Open the device associated with the drive, and set drive's vp */
int
open_drive(struct drive *drive, struct proc *p)
{
BROKEN_GDB;
struct nameidata nd;
struct vattr va;
int error;
if (drive->devicename[0] == '\0') /* no device name */
sprintf(drive->devicename, "/dev/%s", drive->label.name); /* get it from the drive name */
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, drive->devicename, p);
error = vn_open(&nd, FREAD | FWRITE, 0); /* open the device */
if (error != 0) { /* can't open? */
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = error;
printf("vinum open_drive %s: failed with error %d\n", drive->devicename, error); /* XXX */
return error;
}
drive->vp = nd.ni_vp;
drive->p = p;
if (drive->vp->v_usecount > 1) { /* already in use? */
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = EBUSY;
printf("vinum open_drive %s: Drive in use\n", drive->devicename); /* XXX */
return EBUSY;
}
error = VOP_GETATTR(drive->vp, &va, NOCRED, p);
if (error) {
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = error;
printf("vinum open_drive %s: GETAATTR returns error %d\n", drive->devicename, error); /* XXX */
return error;
}
drive->dev = va.va_rdev; /* device */
if (va.va_type != VBLK) { /* only consider block devices */
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1); /* this also closes the drive */
drive->lasterror = ENOTBLK;
printf("vinum open_drive %s: Not a block device\n", drive->devicename); /* XXX */
return ENOTBLK;
}
drive->vp->v_numoutput = 0;
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
return 0;
}
/* Set some variables in the drive struct
* in more convenient form. Return error indication */
int
set_drive_parms(struct drive *drive)
{
drive->blocksize = BLKDEV_IOSIZE; /* XXX do we need this? */
drive->secsperblock = drive->blocksize /* number of sectors per block */
/ drive->partinfo.disklab->d_secsize;
/* Now update the label part */
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
#if __FreeBSD__ >= 3
getmicrotime(&drive->label.date_of_birth); /* and current time */
#else
drive->label.date_of_birth = time; /* and current time */
#endif
drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
*((u_int64_t) drive->partinfo.disklab->d_secsize);
/* number of sectors available for subdisks */
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
/* XXX Bug in 3.0 as of January 1998: you can open
* non-existent slices. They have a length of 0 */
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
set_drive_state(drive->driveno, drive_down, 1);
printf("vinum open_drive %s: Drive too small\n", drive->devicename); /* XXX */
drive->lasterror = ENOSPC;
return ENOSPC;
}
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
drive->freelist = (struct drive_freelist *)
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
if (drive->freelist == NULL) /* can't malloc, dammit */
return ENOSPC;
drive->freelist_entries = 1; /* just (almost) the complete drive */
drive->freelist[0].offset = DATASTART; /* starts here */
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
set_drive_state(drive->driveno, drive_up, 1); /* our drive is accessible */
return 0;
}
/* Initialize a drive: open the device and add device
* information */
int
init_drive(struct drive *drive)
{
BROKEN_GDB;
int error;
if (drive->devicename[0] == '\0') { /* no device name yet, default to drive name */
drive->lasterror = EINVAL;
printf("vinum: Can't open drive without drive name\n"); /* XXX */
return EINVAL;
}
error = open_drive(drive, myproc); /* open the drive */
if (error)
return error;
error = VOP_IOCTL(drive->vp, /* get the partition information */
DIOCGPART,
(caddr_t) & drive->partinfo,
FREAD,
NOCRED,
myproc);
if (error) {
printf("vinum open_drive %s: Can't get partition information, error %d\n",
drive->devicename,
error); /* XXX */
close_drive(drive);
drive->lasterror = error;
set_drive_state(drive->driveno, drive_down, 1);
return error;
}
if (drive->partinfo.part->p_fstype != 0) { /* not plain */
drive->lasterror = EFTYPE;
printf("vinum open_drive %s: Wrong partition type for vinum\n", drive->devicename); /* XXX */
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
return EFTYPE;
}
return set_drive_parms(drive); /* set various odds and ends */
}
/* Close a drive if it's open. No errors */
void
close_drive(struct drive *drive)
{
if (drive->vp) {
vn_close(drive->vp, FREAD | FWRITE, NOCRED, drive->p);
drive->vp = NULL;
}
}
/* Remove drive from the configuration.
* Caller must ensure that it isn't active
*/
void
remove_drive(int driveno)
{
BROKEN_GDB;
struct drive *drive = &vinum_conf.drive[driveno];
long long int nomagic = VINUM_NOMAGIC; /* no magic number */
write_drive(drive, /* obliterate the magic, but leave a hint */
(char *) &nomagic,
8,
VINUM_LABEL_OFFSET);
close_drive(drive); /* and close it */
drive->state = drive_unallocated; /* and forget everything we knew about it */
save_config(); /* and save the updated configuration */
}
/* Transfer drive data. Usually called from one of these defines;
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
*
* Return error number
*/
int
driveio(struct drive *drive, void *buf, size_t length, off_t offset, int flag)
{
BROKEN_GDB;
int error;
struct buf *bp;
int spl;
error = 0;
/* Get a buffer */
bp = (struct buf *) Malloc(sizeof(struct buf)); /* get a buffer */
CHECKALLOC(bp, "Can't allocate memory");
bzero(&buf, sizeof(buf));
bp->b_flags = B_BUSY | flag; /* tell us when it's done */
bp->b_iodone = drive_io_done; /* here */
bp->b_proc = myproc; /* process */
bp->b_dev = drive->vp->v_un.vu_specinfo->si_rdev; /* device */
if (offset & (drive->partinfo.disklab->d_secsize - 1)) /* not on a block boundary */
bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
bp->b_data = buf;
bp->b_vp = drive->vp; /* vnode */
bp->b_bcount = length;
bp->b_bufsize = length;
(*bdevsw[major(bp->b_dev)]->d_strategy) (bp); /* initiate the transfer */
spl = splbio();
while ((bp->b_flags & B_DONE) == 0) {
bp->b_flags |= B_CALL; /* wake me again */
tsleep((caddr_t) bp, PRIBIO, "driveio", 0); /* and wait for it to complete */
}
splx(spl);
if (bp->b_flags & B_ERROR) /* didn't work */
error = bp->b_error; /* get the error return */
Free(bp); /* then return the buffer */
return error;
}
/* Read data from a drive
* Return error number
*/
int
read_drive(struct drive *drive, void *buf, size_t length, off_t offset)
{
BROKEN_GDB;
int error;
struct buf *bp;
daddr_t nextbn;
long bscale;
struct uio uio;
struct iovec iov;
daddr_t blocknum; /* block number */
int blockoff; /* offset in block */
int count; /* amount to transfer */
iov.iov_base = buf;
iov.iov_len = length;
uio.uio_iov = &iov;
uio.uio_iovcnt = length;
uio.uio_offset = offset;
uio.uio_resid = length;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_READ;
uio.uio_procp = myproc;
bscale = btodb(drive->blocksize); /* mask off offset from block number */
do {
blocknum = btodb(uio.uio_offset) & ~(bscale - 1); /* get the block number */
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
uio.uio_resid);
/* XXX Check this. I think the test is wrong */
if (drive->vp->v_lastr + bscale == blocknum) { /* did our last read finish in this block? */
nextbn = blocknum + bscale; /* note the end of the transfer */
error = breadn(drive->vp, /* and read with read-ahead */
blocknum,
(int) drive->blocksize,
&nextbn,
(int *) &drive->blocksize,
1,
NOCRED,
&bp);
} else /* random read: just read this block */
error = bread(drive->vp, blocknum, (int) drive->blocksize, NOCRED, &bp);
drive->vp->v_lastr = blocknum; /* note the last block we read */
count = min(count, drive->blocksize - bp->b_resid);
if (error) {
brelse(bp);
return error;
}
error = uiomove((char *) bp->b_data + blockoff, count, &uio); /* move the data */
brelse(bp);
}
while (error == 0 && uio.uio_resid > 0 && count != 0);
return error;
}
/* Write data to a drive
* Return error number
*/
int
write_drive(struct drive *drive, void *buf, size_t length, off_t offset)
{
BROKEN_GDB;
int error;
struct buf *bp;
struct uio uio;
struct iovec iov;
daddr_t blocknum; /* block number */
int blockoff; /* offset in block */
int count; /* amount to transfer */
int blockshift;
if (drive->state == drive_down) /* currently down */
return 0; /* ignore */
if (drive->vp == NULL) {
drive->lasterror = ENODEV;
return ENODEV; /* not configured yet */
}
iov.iov_base = buf;
iov.iov_len = length;
uio.uio_iov = &iov;
uio.uio_iovcnt = length;
uio.uio_offset = offset;
uio.uio_resid = length;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_WRITE;
uio.uio_procp = myproc;
error = 0;
blockshift = btodb(drive->blocksize) - 1; /* amount to shift block number
* to get sector number */
do {
blocknum = btodb(uio.uio_offset) & ~blockshift; /* get the block number */
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
uio.uio_resid);
if (count == drive->blocksize) /* the whole block */
bp = getblk(drive->vp, blocknum, drive->blocksize, 0, 0); /* just get it */
else /* partial block: */
error = bread(drive->vp, /* read it first */
blocknum,
drive->blocksize,
NOCRED,
&bp);
count = min(count, drive->blocksize - bp->b_resid); /* how much will we transfer now? */
if (error == 0)
error = uiomove((char *) bp->b_data + blockoff, /* move the data to the block */
count,
&uio);
if (error) {
brelse(bp);
drive->lasterror = error;
switch (error) {
case EIO:
set_drive_state(drive->driveno, drive_down, 1);
break;
/* XXX Add other possibilities here */
default:
}
return error;
}
if (count + blockoff == drive->blocksize)
/* The transfer goes to the end of the block. There's
* no need to wait for any more data to arrive. */
bawrite(bp); /* start the write now */
else
bdwrite(bp); /* do a delayed write */
}
while (error == 0 && uio.uio_resid > 0 && count != 0);
if (error)
drive->lasterror = error;
return error; /* OK */
}
/* Wake up on completion */
void
drive_io_done(struct buf *bp)
{
BROKEN_GDB;
wakeup((caddr_t) bp); /* Wachet auf! */
bp->b_flags &= ~B_CALL; /* don't do this again */
}
/* Check a drive for a vinum header. If found,
* update the drive information. We come here
* with a partially populated drive structure
* which includes the device name.
*
* Return information on what we found
*/
enum drive_label_info
read_drive_label(struct drive *drive)
{
BROKEN_GDB;
int error;
int result; /* result of our search */
struct vinum_hdr *vhdr; /* and as header */
error = init_drive(drive); /* find the drive */
if (error) /* find the drive */
return DL_CANT_OPEN; /* not ours */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
CHECKALLOC(vhdr, "Can't allocate memory");
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
if (drive->label.name[0] /* we have a name for this drive */
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
drive->lasterror = EINVAL;
result = DL_WRONG_DRIVE; /* it's the wrong drive */
} else {
set_drive_parms(drive); /* and set other parameters */
result = DL_OURS;
}
/* We copy the drive anyway so that we have
* the correct name in the drive info. This
* may not be the name specified */
drive->label = vhdr->label; /* put in the label information */
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
result = DL_DELETED_LABEL;
else
result = DL_NOT_OURS; /* we could have it, but we don't yet */
Free(vhdr); /* that's all. */
return result;
}
/* Check a drive for a vinum header. If found,
* read configuration information from the drive and
* incorporate the data into the configuration.
*
* Return error number
*/
int
check_drive(char *drivename)
{
BROKEN_GDB;
int error;
struct nameidata nd; /* mount point credentials */
char *config_text; /* read the config info from disk into here */
volatile char *cptr; /* pointer into config information */
char *eptr; /* end pointer into config information */
int driveno;
struct drive *drive;
char *config_line; /* copy the config line to */
driveno = find_drive_by_dev(drivename, 1); /* doesn't exist, create it */
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
strcpy(drive->devicename, drivename); /* put in device name */
if (read_drive_label(drive) == DL_OURS) { /* ours! */
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
CHECKALLOC(config_text, "Can't allocate memory");
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
CHECKALLOC(config_line, "Can't allocate memory");
/* Read in both copies of the configuration information */
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
if (error != 0) {
printf("vinum: Can't read device %s, error %d\n", drive->devicename, error);
Free(config_text);
Free(config_line);
free_drive(drive); /* give it back */
return error;
}
/* XXX At this point, check that the two copies are the same, and do something useful if not.
* In particular, consider which is newer, and what this means for the integrity of the
* data on the drive */
/* Parse the configuration, and add it to the global configuration */
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
volatile int parse_status; /* return value from parse_config */
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
*eptr++ = *cptr++;
*eptr = '\0'; /* and delimit */
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
parse_status = parse_config(config_line, &keyword_set); /* parse the config line */
if (parse_status < 0) { /* error in config */
/* This config should have been parsed in user
* space. If we run into problems here, something
* serious is afoot. Complain and let the user
* snarf the config to see what's wrong */
printf("vinum: Config error on drive %s, aborting integration\n", nd.ni_dirp);
Free(config_text);
Free(config_line);
free_drive(drive); /* give it back */
return EINVAL;
}
}
while (*cptr == '\n')
cptr++; /* skip to next line */
}
Free(config_text);
if ((vinum_conf.flags & VF_READING_CONFIG) == 0) /* not reading config */
updateconfig(0); /* update object states */
printf("vinum: read configuration from %s\n", drivename);
return 0; /* it all worked */
} else { /* no vinum label found */
if (drive->lasterror) {
set_drive_state(drive->driveno, drive_down, 1);
return drive->lasterror;
} else
return ENODEV; /* not our device */
}
}
/* Kludge: kernel printf doesn't handle longs correctly XXX */
static char *lltoa(long long l, char *s);
static char *sappend(char *txt, char *s);
static char *
lltoa(long long l, char *s)
{
if (l < 0) {
*s++ = '-';
l = -l;
}
if (l > 9) {
s = lltoa(l / 10, s);
l %= 10;
}
*s++ = l + '0';
return s;
}
static char *
sappend(char *txt, char *s)
{
while (*s++ = *txt++);
return s - 1;
}
/* Format the configuration in text form into the buffer
* at config. Don't go beyond len bytes
* XXX this stinks. Fix soon. */
void
format_config(char *config, int len)
{
BROKEN_GDB;
int i;
int j;
char *s = config;
bzero(config, len);
/* First write the drive configuration */
for (i = 0; i < vinum_conf.drives_used; i++) {
struct drive *drive;
drive = &vinum_conf.drive[i];
if (drive->state != drive_unallocated) {
sprintf(s,
"drive %s state %s device %s\n",
drive->label.name,
drive_state(drive->state),
drive->devicename);
while (*s)
s++; /* find the end */
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* Then the volume configuration */
for (i = 0; i < vinum_conf.volumes_used; i++) {
struct volume *vol;
vol = &vinum_conf.volume[i];
if (vol->state != volume_unallocated) {
if (vol->preferred_plex >= 0) /* preferences, */
sprintf(s,
"volume %s state %s readpol prefer %s",
vol->name,
volume_state(vol->state),
vinum_conf.plex[vol->preferred_plex].name);
else /* default round-robin */
sprintf(s,
"volume %s state %s",
vol->name,
volume_state(vol->state));
while (*s)
s++; /* find the end */
s = sappend("\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* Then the plex configuration */
for (i = 0; i < vinum_conf.plexes_used; i++) {
struct plex *plex;
plex = &vinum_conf.plex[i];
if (plex->state != plex_unallocated) {
sprintf(s, "plex name %s state %s org %s ",
plex->name,
plex_state(plex->state),
plex_org(plex->organization));
while (*s)
s++; /* find the end */
if ((plex->organization == plex_striped)
) {
sprintf(s, "%db ", (int) plex->stripesize);
while (*s)
s++; /* find the end */
}
if (plex->volno >= 0) /* we have a volume */
sprintf(s, "vol %s ", vinum_conf.volume[plex->volno].name);
while (*s)
s++; /* find the end */
for (j = 0; j < plex->subdisks; j++) {
sprintf(s, " sd %s", vinum_conf.sd[plex->sdnos[j]].name);
}
s = sappend("\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* And finally the subdisk configuration */
for (i = 0; i < vinum_conf.subdisks_used; i++) {
struct sd *sd = &vinum_conf.sd[i]; /* XXX */
if (vinum_conf.sd[i].state != sd_unallocated) {
sprintf(s,
"sd name %s drive %s plex %s state %s len ",
sd->name,
vinum_conf.drive[sd->driveno].label.name,
vinum_conf.plex[sd->plexno].name,
sd_state(sd->state));
while (*s)
s++; /* find the end */
s = lltoa(sd->sectors, s);
s = sappend("b driveoffset ", s);
s = lltoa(sd->driveoffset, s);
s = sappend("b plexoffset ", s);
s = lltoa(sd->plexoffset, s);
s = sappend("b\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
}
/* Write the configuration to all vinum slices */
int
save_config(void)
{
BROKEN_GDB;
int error;
int written_config; /* set when we firstnwrite the config to disk */
int driveno;
struct drive *drive; /* point to current drive info */
struct vinum_hdr *vhdr; /* and as header */
char *config; /* point to config data */
int wlabel_on; /* to set writing label on/off */
/* don't save the configuration while we're still working on it */
if (vinum_conf.flags & VF_CONFIGURING)
return 0;
written_config = 0; /* no config written yet */
/* Build a volume header */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
CHECKALLOC(vhdr, "Can't allocate config data");
vhdr->magic = VINUM_MAGIC; /* magic number */
vhdr->config_length = MAXCONFIG; /* length of following config info */
config = Malloc(MAXCONFIG); /* get space for the config data */
CHECKALLOC(config, "Can't allocate config data");
format_config(config, MAXCONFIG);
error = 0; /* no errors yet */
for (driveno = 0; driveno < vinum_conf.drives_used; driveno++) {
drive = &vinum_conf.drive[driveno]; /* point to drive */
if (drive->state != drive_down) {
#if (__FreeBSD__ >= 3)
getmicrotime(&drive->label.last_update); /* time of last update is now */
#else
drive->label.last_update = time; /* time of last update is now */
#endif
bcopy((char *) &drive->label, /* and the label info from the drive structure */
(char *) &vhdr->label,
sizeof(vhdr->label));
if ((drive->state != drive_unallocated)
&& (drive->state != drive_uninit)) {
wlabel_on = 1; /* enable writing the label */
error = VOP_IOCTL(drive->vp, /* make the label writeable */
DIOCWLABEL,
(caddr_t) & wlabel_on,
FWRITE,
NOCRED,
myproc);
if (error == 0)
error = write_drive(drive, vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (error == 0)
error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET);
wlabel_on = 0; /* enable writing the label */
VOP_IOCTL(drive->vp, /* make the label non-writeable again */
DIOCWLABEL,
(caddr_t) & wlabel_on,
FWRITE,
NOCRED,
myproc);
if (error) {
printf("vinum: Can't write config to %s, error %d\n", drive->devicename, error);
set_drive_state(drive->driveno, drive_down, 1);
} else
written_config = 1; /* we've written it on at least one drive */
}
}
}
Free(vhdr);
Free(config);
return written_config == 0; /* return 1 if we failed to write config */
}
/* Disk labels are a mess. The correct way to access them
* is with the DIOC[GSW]DINFO ioctls, but some programs, such
* as newfs, access the disk directly, so we have to write
* things there. We do this only on request. If a user
* request tries to read it directly, we fake up one on the fly.
*/
/* get_volume_label returns a label structure to lp, which
* is allocated by the caller */
void
get_volume_label(struct volume *vol, struct disklabel *lp)
{
bzero(lp, sizeof(struct disklabel));
strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
lp->d_type = DTYPE_VINUM;
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
lp->d_rpm = 14400 * vol->plexes; /* to keep them guessing */
lp->d_interleave = 1;
lp->d_flags = 0;
/* Fitting unto the vine, a vinum has a single
* track with all its sectors */
lp->d_secsize = DEV_BSIZE; /* bytes per sector */
lp->d_nsectors = vol->size; /* data sectors per track */
lp->d_ntracks = 1; /* tracks per cylinder */
lp->d_ncylinders = 1; /* data cylinders per unit */
lp->d_secpercyl = vol->size; /* data sectors per cylinder */
lp->d_secperunit = vol->size; /* data sectors per unit */
lp->d_bbsize = BBSIZE;
lp->d_sbsize = SBSIZE;
lp->d_magic = DISKMAGIC;
lp->d_magic2 = DISKMAGIC;
/* Set up partitions a, b and c to be identical
* and the size of the volume. a is UFS, b is
* swap, c is nothing */
lp->d_partitions[0].p_size = vol->size;
lp->d_partitions[0].p_fsize = 1024;
lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */
lp->d_partitions[0].p_frag = 8; /* and fragments per block */
lp->d_partitions[SWAP_PART].p_size = vol->size;
lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
lp->d_partitions[LABEL_PART].p_size = vol->size;
lp->d_npartitions = LABEL_PART + 1;
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
lp->d_checksum = dkcksum(lp);
}
int
write_volume_label(int volno)
{
struct disklabel *lp;
struct buf *bp;
struct disklabel *dlp;
struct volume *vol;
int error;
lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
if (lp == 0)
return ENOMEM;
if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_used) /* invalid volume */
return ENOENT;
vol = &VOL[volno]; /* volume in question */
if (vol->state == volume_unallocated) /* nothing there */
return ENOENT;
get_volume_label(vol, lp); /* get the label */
/* Now write to disk. This code is derived from the
* system writedisklabel (), which does silly things
* like reading the label and refusing to write
* unless it's already there. */
bp = geteblk((int) lp->d_secsize); /* get a buffer */
bp->b_dev = minor(vol->devno) | (CDEV_MAJOR << MAJORDEV_SHIFT); /* our own raw volume */
bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
bp->b_bcount = lp->d_secsize;
bzero(bp->b_data, lp->d_secsize);
dlp = (struct disklabel *) bp->b_data;
*dlp = *lp;
bp->b_flags &= ~B_INVAL;
bp->b_flags |= B_BUSY | B_WRITE;
vinumstrategy(bp); /* write it out */
error = biowait(bp);
bp->b_flags |= B_INVAL | B_AGE;
brelse(bp);
return error;
}
/* Initialize a subdisk */
int
initsd(int sdno)
{
return 0;
}

132
sys/dev/vinum/vinumio.h Normal file
View File

@ -0,0 +1,132 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumio.h,v 1.10 1998/08/10 05:46:19 grog Exp grog $
*/
#define MAX_IOCTL_REPLY 256
#define L 'F' /* ID letter of our ioctls */
/* VINUM_CREATE returns a buffer of this kind */
struct _ioctl_reply {
int error;
char msg[MAX_IOCTL_REPLY];
};
/* ioctl requests */
#define BUFSIZE 1024 /* size of buffer, including continuations */
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
#define VINUM_GETCONFIG _IOR(L, 65, struct _vinum_conf) /* get global config */
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct drive) /* get drive config */
#define VINUM_SDCONFIG _IOWR(L, 67, struct sd) /* get subdisk config */
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct plex) /* get plex config */
#define VINUM_VOLCONFIG _IOWR(L, 69, struct volume) /* get volume config */
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct sd) /* get sd config for plex (plex, sdno) */
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
#define VINUM_SAVECONFIG _IOC(0, L, 72, 0) /* release locks, update, write config to disk */
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
#ifdef DEBUG
struct debuginfo {
int changeit;
int param;
};
#define VINUM_DEBUG _IOWR(L, 75, struct debuginfo) /* call the debugger from ioctl () */
#endif
enum objecttype {
drive_object,
sd_object,
plex_object,
volume_object,
invalid_object
};
/* Start an object. Pass two integers:
* msg [0] index in vinum_conf.<object>
* msg [1] type of object (see below)
*
* Return ioctl_reply
*/
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
/* The state to set with VINUM_SETSTATE. Since
* each object has a different set of states, we
* need to translate later */
enum objectstate {
object_down,
object_initializing,
object_up
};
/* This structure is used for modifying objects
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
* VINUM_DETACH, VINUM_REPLACE
*/
struct vinum_ioctl_msg {
int index;
enum objecttype type;
enum objectstate state; /* state to set (VINUM_SETSTATE) */
int force; /* do it even if it doesn't make sense */
int recurse; /* recurse (VINUM_REMOVE) */
int otherobject; /* superordinate object (attach),
* replacement object (replace) */
int rename; /* rename object (attach) */
int64_t offset; /* offset of subdisk (for attach) */
};
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
#define VINUM_STARTCONFIG _IOC(0, L, 78, 0) /* start a configuration operation */
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
#define VINUM_LABEL _IOC(IOC_IN | IOC_OUT, L, 81, MAX_IOCTL_REPLY) /* label a volume */
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
#define VINUM_REMOVE _IOC(IOC_IN | IOC_OUT, L, 83, MAX_IOCTL_REPLY) /* remove an object */
#define VINUM_GETUNMAPPED _IOWR(L, 84, struct plexregion) /* get unmapped element (plex, re) */
#define VINUM_GETDEFECTIVE _IOWR(L, 85, struct plexregion) /* get defective element (plex, re) */
#define VINUM_RESETSTATS _IOC(IOC_IN | IOC_OUT, L, 86, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_ATTACH _IOC(IOC_IN | IOC_OUT, L, 87, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_DETACH _IOC(IOC_IN | IOC_OUT, L, 88, MAX_IOCTL_REPLY) /* reset object stats */
struct vinum_rename_msg {
int index;
int recurse; /* rename subordinate objects too */
enum objecttype type;
char newname[MAXNAME]; /* new name to give to object */
};
#define VINUM_RENAME _IOC(IOC_IN | IOC_OUT, L, 89, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_REPLACE _IOC(IOC_IN | IOC_OUT, L, 90, MAX_IOCTL_REPLY) /* reset object stats */

787
sys/dev/vinum/vinumioctl.c Normal file
View File

@ -0,0 +1,787 @@
/* XXX replace all the checks on object validity with
* calls to valid<object> */
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumioctl.c,v 1.1 1998/08/14 08:46:10 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#define REALLYKERNEL
#include "vinumhdr.h"
#include "sys/sysproto.h" /* for sync(2) */
#ifdef DEBUG
#include <sys/reboot.h>
#endif
jmp_buf command_fail; /* return on a failed command */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
/* pointer to ioctl p parameter, to save passing it around */
struct proc *myproc;
int vinum_inactive(void);
void free_vinum(int);
void attachobject(struct vinum_ioctl_msg *);
void detachobject(struct vinum_ioctl_msg *);
void renameobject(struct vinum_rename_msg *);
void replaceobject(struct vinum_ioctl_msg *);
/* ioctl routine */
int
vinumioctl(dev_t dev,
#if __FreeBSD__ >= 3
u_long cmd,
#else
int cmd,
#endif
caddr_t data,
int flag,
struct proc *p)
{
BROKEN_GDB;
unsigned int objno;
int error = 0;
struct volume *vol;
unsigned int index; /* for transferring config info */
unsigned int sdno; /* for transferring config info */
int fe; /* free list element number */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
struct devcode *device = (struct devcode *) &dev;
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_SUPERDEV_TYPE:
myproc = p; /* save pointer to process */
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
error = setjmp(command_fail); /* come back here on error */
if (error) /* bombed out */
return 0; /* the reply will contain meaningful info */
switch (cmd) {
/* XXX #ifdef DEBUG */
case VINUM_DEBUG:
boothowto |= RB_GDB; /* serial debug line */
if (((struct debuginfo *) data)->changeit) /* change debug settings */
debug = (((struct debuginfo *) data)->param);
else
Debugger("vinum debug");
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
/* XXX #endif */
case VINUM_CREATE: /* create a vinum object */
error = lock_config(); /* get the config for us alone */
if (error) /* can't do it, */
return error; /* give up */
error = setjmp(command_fail); /* come back here on error */
if (error == 0) { /* first time, */
parse_user_config((char *) data, &keyword_set); /* update the config */
ioctl_reply->error = 0; /* no error if we make it here */
} else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
ioctl_reply->error = EINVAL; /* note that something's up */
ioctl_reply->msg[0] = '\0'; /* no message? */
}
unlock_config();
return 0; /* must be 0 to return the real error info */
case VINUM_GETCONFIG: /* get the configuration information */
bcopy(&vinum_conf, data, sizeof(vinum_conf));
return 0;
/* start configuring the subsystem */
case VINUM_STARTCONFIG:
return start_config(); /* just lock it */
/* Move the individual parts of the config to user space.
* Specify the index of the object in the first word of data,
* and return the object there
*/
case VINUM_DRIVECONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.drives_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&DRIVE[index], data, sizeof(struct drive)); /* copy the config item out */
return 0;
case VINUM_SDCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.subdisks_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&SD[index], data, sizeof(struct sd)); /* copy the config item out */
return 0;
case VINUM_PLEXCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.plexes_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&PLEX[index], data, sizeof(struct plex)); /* copy the config item out */
return 0;
case VINUM_VOLCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.volumes_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&VOL[index], data, sizeof(struct volume)); /* copy the config item out */
return 0;
case VINUM_PLEXSDCONFIG:
index = *(int *) data; /* get the plex index */
sdno = ((int *) data)[1]; /* and the sd index */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
return EFAULT; /* bang */
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
data,
sizeof(struct sd));
return 0;
case VINUM_SAVECONFIG:
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
finish_config(1); /* finish the configuration and update it */
error = save_config(); /* save configuration to disk */
} else
error = EINVAL; /* queue up for this one, please */
return error;
case VINUM_RELEASECONFIG: /* release the config */
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
finish_config(0); /* finish the configuration, don't change it */
error = save_config(); /* save configuration to disk */
} else
error = EINVAL; /* release what config? */
return error;
case VINUM_INIT:
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
case VINUM_RESETCONFIG:
if (vinum_inactive() && (vinum_conf.opencount < 2)) { /* if we're not active */
/* Note the open count. We may be called from v, so we'll be open.
* Keep the count so we don't underflow */
int oc = vinum_conf.opencount;
free_vinum(1); /* clean up everything */
printf("vinum: CONFIGURATION OBLITERATED\n");
vinum_conf.opencount = oc;
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
}
return EBUSY;
case VINUM_SETSTATE:
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
return 0;
case VINUM_MEMINFO:
vinum_meminfo(data);
return 0;
case VINUM_MALLOCINFO:
return vinum_mallocinfo(data);
case VINUM_LABEL: /* label a volume */
ioctl_reply->error = write_volume_label(*(int *) data); /* index of the volume to label */
ioctl_reply->msg[0] = '\0'; /* no message */
return 0;
case VINUM_REMOVE:
remove((struct vinum_ioctl_msg *) data); /* remove an object */
return 0;
case VINUM_GETFREELIST: /* get a drive free list element */
index = *(int *) data; /* get the drive index */
fe = ((int *) data)[1]; /* and the free list element */
if ((index >= (unsigned) vinum_conf.drives_used) /* plex doesn't exist */
||(DRIVE[index].state == drive_unallocated))
return ENODEV;
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
return ENOENT;
bcopy(&DRIVE[index].freelist[fe],
data,
sizeof(struct drive_freelist));
return 0;
case VINUM_GETDEFECTIVE: /* get a plex defective area element */
index = *(int *) data; /* get the plex index */
fe = ((int *) data)[1]; /* and the region number */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(PLEX[index].state == plex_unallocated))
return ENODEV;
if (fe >= PLEX[index].defective_regions) /* no such entry */
return ENOENT;
bcopy(&PLEX[index].defective_region[fe],
data,
sizeof(struct plexregion));
return 0;
case VINUM_GETUNMAPPED: /* get a plex unmapped area element */
index = *(int *) data; /* get the plex index */
fe = ((int *) data)[1]; /* and the region number */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(PLEX[index].state == plex_unallocated))
return ENODEV;
if (fe >= PLEX[index].unmapped_regions) /* no such entry */
return ENOENT;
bcopy(&PLEX[index].unmapped_region[fe],
data,
sizeof(struct plexregion));
return 0;
case VINUM_RESETSTATS:
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
return 0;
/* attach an object to a superordinate object */
case VINUM_ATTACH:
attachobject((struct vinum_ioctl_msg *) data);
return 0;
/* detach an object from a superordinate object */
case VINUM_DETACH:
detachobject((struct vinum_ioctl_msg *) data);
return 0;
/* rename an object */
case VINUM_RENAME:
renameobject((struct vinum_rename_msg *) data);
return 0;
/* replace an object */
case VINUM_REPLACE:
replaceobject((struct vinum_ioctl_msg *) data);
return 0;
default:
/* FALLTHROUGH */
}
default:
#if __FreeBSD__>=3
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %lx\n",
device->type,
device->sd,
device->plex,
device->major,
device->volume,
cmd); /* XXX */
#else
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %x\n",
device->type,
device->sd,
device->plex,
device->major,
device->volume,
cmd); /* XXX */
#endif
return EINVAL;
case VINUM_DRIVE_TYPE:
case VINUM_PLEX_TYPE:
return EAGAIN; /* try again next week */
case VINUM_SD_TYPE:
objno = SDNO(dev);
switch (cmd) {
case VINUM_INITSD: /* initialize subdisk */
return initsd(objno);
default:
return EINVAL;
}
break;
case VINUM_VOLUME_TYPE:
objno = VOLNO(dev);
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_used) /* not a valid volume */
return ENXIO;
vol = &VOL[objno];
if (vol->state != volume_up) /* not up, */
return EIO; /* I/O error */
switch (cmd) {
case DIOCGDINFO: /* get disk label */
get_volume_label(vol, (struct disklabel *) data);
break;
/* Care! DIOCGPART returns *pointers* to
* the caller, so we need to store this crap as well.
* And yes, we need it. */
case DIOCGPART: /* get partition information */
get_volume_label(vol, &vol->label);
((struct partinfo *) data)->disklab = &vol->label;
((struct partinfo *) data)->part = &vol->label.d_partitions[0];
break;
/* We don't have this stuff on hardware,
* so just pretend to do it so that
* utilities don't get upset. */
case DIOCWDINFO: /* write partition info */
case DIOCSDINFO: /* set partition info */
return 0; /* not a titty */
case DIOCWLABEL: /* set or reset label writeable */
if ((flag & FWRITE) == 0) /* not writeable? */
return EACCES; /* no, die */
if (*(int *) data != 0) /* set it? */
vol->flags |= VF_WLABEL; /* yes */
else
vol->flags &= ~VF_WLABEL; /* no, reset */
break;
default:
return ENOTTY; /* not my kind of ioctl */
}
break;
}
return 0; /* XXX */
}
/* The following four functions check the supplied
* object index and return a pointer to the object
* if it exists. Otherwise they longjump out via
* throw_rude_remark */
struct drive *
validdrive(int driveno, struct _ioctl_reply *reply)
{
if ((driveno < vinum_conf.drives_used)
&& (DRIVE[driveno].state != drive_unallocated))
return &DRIVE[driveno];
strcpy(reply->msg, "No such drive");
reply->error = ENOENT;
return NULL;
}
struct sd *
validsd(int sdno, struct _ioctl_reply *reply)
{
if ((sdno < vinum_conf.subdisks_used)
&& (SD[sdno].state != sd_unallocated))
return &SD[sdno];
strcpy(reply->msg, "No such subdisk");
reply->error = ENOENT;
return NULL;
}
struct plex *
validplex(int plexno, struct _ioctl_reply *reply)
{
if ((plexno < vinum_conf.plexes_used)
&& (PLEX[plexno].state != plex_unallocated))
return &PLEX[plexno];
strcpy(reply->msg, "No such plex");
reply->error = ENOENT;
return NULL;
}
struct volume *
validvol(int volno, struct _ioctl_reply *reply)
{
if ((volno < vinum_conf.volumes_used)
&& (VOL[volno].state != volume_unallocated))
return &VOL[volno];
strcpy(reply->msg, "No such volume");
reply->error = ENOENT;
return NULL;
}
/* reset an object's stats */
void
resetstats(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
switch (msg->type) {
case drive_object:
if (msg->index < vinum_conf.drives_used) {
struct drive *drive = &DRIVE[msg->index];
if (drive->state != drive_unallocated) {
drive->reads = 0; /* number of reads on this drive */
drive->writes = 0; /* number of writes on this drive */
drive->bytes_read = 0; /* number of bytes read */
drive->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case sd_object:
if (msg->index < vinum_conf.subdisks_used) {
struct sd *sd = &SD[msg->index];
if (sd->state != sd_unallocated) {
sd->reads = 0; /* number of reads on this subdisk */
sd->writes = 0; /* number of writes on this subdisk */
sd->bytes_read = 0; /* number of bytes read */
sd->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case plex_object:
if (msg->index < vinum_conf.plexes_used) {
struct plex *plex = &PLEX[msg->index];
if (plex->state != plex_unallocated) {
plex->reads = 0;
plex->writes = 0; /* number of writes on this plex */
plex->bytes_read = 0; /* number of bytes read */
plex->bytes_written = 0; /* number of bytes written */
plex->multiblock = 0; /* requests that needed more than one block */
plex->multistripe = 0; /* requests that needed more than one stripe */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case volume_object:
if (msg->index < vinum_conf.volumes_used) {
struct volume *vol = &VOL[msg->index];
if (vol->state != volume_unallocated) {
vol->bytes_read = 0; /* number of bytes read */
vol->bytes_written = 0; /* number of bytes written */
vol->reads = 0; /* number of reads on this volume */
vol->writes = 0; /* number of writes on this volume */
vol->recovered_reads = 0; /* reads recovered from another plex */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case invalid_object: /* can't get this */
reply->error = EINVAL;
return;
}
}
/* attach an object to a superior object */
void
attachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL) /* not a valid subdisk */
return;
plex = validplex(msg->otherobject, reply);
if (plex) {
if (sd->plexno >= 0) { /* already belong to a plex */
reply->error = EBUSY; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
sd->plexoffset = msg->offset; /* this is where we want it */
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
break;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
if (plex->organization != plex_concat) { /* can't attach to striped and raid-5 */
reply->error = EINVAL; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
vol = validvol(msg->otherobject, reply); /* and volume information */
if (vol) {
if ((vol->plexes == MAXPLEX) /* we have too many already */
||(plex->volno >= 0)) { /* or the plex has an owner */
reply->error = EINVAL; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
set_plex_state(plex->plexno, plex_down, setstate_force); /* make sure it's down */
give_plex_to_volume(msg->otherobject, msg->index); /* and give it to the volume */
update_plex_config(plex->plexno, 0);
save_config();
if (plex->state == plex_reviving)
reply->error = EAGAIN; /* need to revive it */
else
reply->error = 0;
}
}
}
/* detach an object from a superior object */
void
detachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct sd *sd;
struct plex *plex;
struct volume *vol;
int sdno;
int plexno;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL)
return;
if (sd->plexno < 0) { /* doesn't belong to a plex */
reply->error = ENOENT;
strcpy(reply->msg, "Subdisk is not attached");
return;
} else { /* valid plex number */
plex = &PLEX[sd->plexno];
if ((!msg->force) /* don't force things */
&&((plex->state == plex_up) /* and the plex is up */
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
reply->error = EBUSY; /* we need this sd */
reply->msg[0] = '\0';
return;
}
sd->plexno = -1; /* anonymous sd */
if (plex->subdisks == 1) { /* this was the only subdisk */
Free(plex->sdnos); /* free the subdisk array */
plex->sdnos = NULL; /* and note the fact */
plex->subdisks_allocated = 0; /* no subdisk space */
} else {
for (sdno = 0; sdno < plex->subdisks; sdno++) {
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
break;
}
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
bcopy(&plex->sdnos[sdno + 1],
&plex->sdnos[sdno],
(plex->subdisks - 1 - sdno) * sizeof(int));
}
plex->subdisks--;
rebuild_plex_unmappedlist(plex); /* rebuild the unmapped list */
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* this subdisk is named after the plex */
bcopy(sd->name,
&sd->name[3],
min(strlen(sd->name), MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
update_plex_config(plex->plexno, 0);
if ((plex->organization == plex_striped) /* we've just mutilated our plex, */
||(plex->organization == plex_striped)) /* the data no longer matches */
set_plex_state(plex->plexno,
plex_down,
setstate_force | setstate_configuring);
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
return;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
if (plex->volno >= 0) {
int volno = plex->volno;
vol = &VOL[volno];
if ((!msg->force) /* don't force things */
&&((vol->state == volume_up) /* and the volume is up */
&&(vol->plexes == 1))) { /* and this is the last plex */
/* XXX As elsewhere, check whether we will lose
* mapping by removing this plex */
reply->error = EBUSY; /* we need this plex */
reply->msg[0] = '\0';
return;
}
plex->volno = -1; /* anonymous plex */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (vol->plex[plexno] == msg->index) /* found our plex */
break;
}
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
bcopy(&vol[plexno + 1], &vol[plexno], (vol->plexes - 1 - plexno) * sizeof(int));
vol->plexes--;
if (!bcmp(vol->name, plex->name, strlen(vol->name))) { /* this plex is named after the volume */
/* First, check if the subdisks are the same */
if (msg->recurse) {
int sdno;
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]];
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* subdisk is named after the plex */
bcopy(sd->name, &sd->name[3], min(strlen(sd->name), MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
}
}
bcopy(plex->name, &plex->name[3], min(strlen(plex->name), MAXPLEXNAME - 3));
bcopy("ex-", plex->name, 3);
plex->name[MAXPLEXNAME - 1] = '\0';
}
update_plex_config(plex->plexno, 0);
update_volume_config(volno, 0);
save_config();
reply->error = 0;
} else {
reply->error = ENOENT;
strcpy(reply->msg, "Plex is not attached");
}
}
}
void
renameobject(struct vinum_rename_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
drive = validdrive(msg->index, reply);
if (drive) {
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
save_config();
reply->error = 0;
}
return;
case sd_object: /* you can't attach a subdisk to anything */
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
sd = validsd(msg->index, reply);
if (sd) {
bcopy(msg->newname, sd->name, MAXSDNAME);
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
return;
case plex_object: /* you can't attach a plex to anything */
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
plex = validplex(msg->index, reply);
if (plex) {
bcopy(msg->newname, plex->name, MAXPLEXNAME);
update_plex_config(plex->plexno, 0);
save_config();
reply->error = 0;
}
return;
case volume_object: /* you can't attach a volume to anything */
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
vol = validvol(msg->index, reply);
if (vol) {
bcopy(msg->newname, vol->name, MAXVOLNAME);
update_volume_config(msg->index, 0);
save_config();
reply->error = 0;
}
return;
case invalid_object:
reply->error = EINVAL;
reply->msg[0] = '\0';
}
}
/* Replace one object with another */
void
replaceobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
reply->error = ENODEV; /* until I know how to do this */
strcpy(reply->msg, "replace not implemented yet");
/* save_config (); */
}

120
sys/dev/vinum/vinumkw.h Normal file
View File

@ -0,0 +1,120 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumkw.h,v 1.7 1998/08/07 02:35:51 grog Exp grog $
*/
/* Command keywords that vinum knows. These include both user-level
* and kernel-level stuff */
/* Our complete vocabulary. The names of the commands are
* the same as the identifier without the kw_ at the beginning
* (i.e. kw_create defines the "create" keyword). Preprocessor
* magic in parser.c does the rest. */
enum keyword {
kw_create,
kw_modify,
kw_list,
kw_l = kw_list,
kw_ld, /* list drive */
kw_ls, /* list subdisk */
kw_lp, /* list plex */
kw_lv, /* list volume */
kw_set,
kw_rm,
kw_start,
kw_stop,
kw_drive,
kw_sd,
kw_subdisk = kw_sd,
kw_plex,
kw_volume,
kw_vol = kw_volume,
kw_read,
kw_readpol,
kw_org,
kw_name,
kw_concat,
kw_striped,
kw_raid5,
kw_driveoffset,
kw_plexoffset,
kw_len,
kw_length = kw_len,
kw_state,
kw_setupstate,
kw_d, /* flag names */
kw_f,
kw_r,
kw_s,
kw_v,
kw_round, /* round robin */
kw_prefer, /* prefer plex */
kw_device,
kw_init,
kw_label,
kw_resetconfig,
kw_writethrough,
kw_writeback,
kw_raw,
kw_resetstats,
kw_attach,
kw_detach,
kw_rename,
kw_printconfig,
kw_replace,
kw_detached,
#ifdef DEBUG
kw_debug, /* go into debugger */
kw_info,
#endif
kw_invalid_keyword = -1
};
struct _keywords {
char *name;
enum keyword keyword;
};
struct keywordset {
int size;
struct _keywords *k;
};
extern struct _keywords keywords[];
extern struct _keywords flag_keywords[];
extern struct keywordset keyword_set;
extern struct keywordset flag_set;

137
sys/dev/vinum/vinumlock.c Normal file
View File

@ -0,0 +1,137 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: lock.c,v 1.6 1998/07/28 06:32:57 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
/* Lock routines. Currently, we lock either an individual volume
* or the global configuration. I don't think tsleep and
* wakeup are SMP safe. FIXME XXX */
/* Lock a volume, wait if it's in use */
int
lockvol(struct volume *vol)
{
int error;
while ((vol->flags & VF_LOCKED) != 0) {
vol->flags |= VF_LOCKING;
/* It would seem to make more sense to sleep on
* the address 'vol'. Unfortuntaly we can't
* guarantee that this address won't change due to
* table expansion. The address we choose won't change. */
if ((error = tsleep(&vinum_conf.volume + vol->devno,
PRIBIO | PCATCH,
"volock",
0)) != 0)
return error;
}
vol->flags |= VF_LOCKED;
return 0;
}
/* Unlock a volume and let the next one at it */
void
unlockvol(struct volume *vol)
{
vol->flags &= ~VF_LOCKED;
if ((vol->flags & VF_LOCKING) != 0) {
vol->flags &= ~VF_LOCKING;
wakeup(&vinum_conf.volume + vol->devno);
}
}
/* Lock a plex, wait if it's in use */
int
lockplex(struct plex *plex)
{
int error;
while ((plex->flags & VF_LOCKED) != 0) {
plex->flags |= VF_LOCKING;
/* It would seem to make more sense to sleep on
* the address 'plex'. Unfortuntaly we can't
* guarantee that this address won't change due to
* table expansion. The address we choose won't change. */
if ((error = tsleep(&vinum_conf.plex + plex->sdnos[0],
PRIBIO | PCATCH,
"plexlk",
0)) != 0)
return error;
}
plex->flags |= VF_LOCKED;
return 0;
}
/* Unlock a plex and let the next one at it */
void
unlockplex(struct plex *plex)
{
plex->flags &= ~VF_LOCKED;
if ((plex->flags & VF_LOCKING) != 0) {
plex->flags &= ~VF_LOCKING;
wakeup(&vinum_conf.plex + plex->plexno);
}
}
/* Get a lock for the global config, wait if it's not available */
int
lock_config(void)
{
int error;
while ((vinum_conf.flags & VF_LOCKED) != 0) {
vinum_conf.flags |= VF_LOCKING;
if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0)
return error;
}
vinum_conf.flags |= VF_LOCKED;
return 0;
}
/* Unlock and wake up any waiters */
void
unlock_config(void)
{
vinum_conf.flags &= ~VF_LOCKED;
if ((vinum_conf.flags & VF_LOCKING) != 0) {
vinum_conf.flags &= ~VF_LOCKING;
wakeup(&vinum_conf);
}
}

186
sys/dev/vinum/vinummemory.c Normal file
View File

@ -0,0 +1,186 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: memory.c,v 1.16 1998/08/08 04:43:22 grog Exp grog $
*/
#define REALLYKERNEL
#define USES_VM
#include "vinumhdr.h"
extern jmp_buf command_fail; /* return on a failed command */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
void freedatabuf(struct mc *me);
caddr_t allocdatabuf(struct mc *me);
void
expand_table(void **table, int oldsize, int newsize)
{
if (newsize > oldsize) {
int *temp;
temp = (int *) Malloc(newsize); /* allocate a new table */
CHECKALLOC(temp, "vinum: Can't expand table\n");
if (*table != NULL) { /* already something there, */
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
Free(*table);
}
*table = temp;
}
}
#ifndef DEBUG
/* increase the size of a request block */
void
expandrq(struct plexrq *prq)
{
expand_table((void **) &prq->rqe,
prq->requests * sizeof(struct rqelement),
(prq->requests + RQELTS) * sizeof(struct rqelement));
bzero(&prq->rqe[prq->requests], RQELTS * sizeof(struct rqelement)); /* clear the new part */
prq->rqcount += RQELTS;
}
#endif
#if DEBUG /* XXX debug */
#define MALLOCENTRIES 16384
int malloccount = 0;
int highwater = 0; /* highest index ever allocated */
static struct mc malloced[MALLOCENTRIES];
static total_malloced;
caddr_t
MMalloc(int size, char *file, int line)
{
caddr_t result;
int i;
static int seq = 0;
int s;
struct mc me; /* information to pass to allocdatabuf */
if (malloccount >= MALLOCENTRIES) { /* too many */
printf("vinum: can't allocate table space to trace memory allocation");
return 0; /* can't continue */
}
result = malloc(size, M_DEVBUF, M_WAITOK); /* use malloc for smaller and irregular stuff */
if (result == NULL)
printf("vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
else {
me.flags = 0; /* allocation via malloc */
s = splhigh();
for (i = 0; i < malloccount; i++) {
if (((result + size) > malloced[i].address)
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
Debugger("Malloc overlap");
}
if (result) {
i = malloccount++;
total_malloced += size;
malloced[i].address = result;
malloced[i].size = size;
malloced[i].line = line;
malloced[i].seq = seq++;
malloced[i].flags = me.flags;
malloced[i].databuf = me.databuf; /* only used with kva alloc */
bcopy(file, malloced[i].file, min(strlen(file) + 1, 16));
}
if (malloccount > highwater)
highwater = malloccount;
splx(s);
}
return result;
}
void
FFree(void *mem, char *file, int line)
{
int i;
int s;
s = splhigh();
for (i = 0; i < malloccount; i++) {
if ((caddr_t) mem == malloced[i].address) { /* found it */
bzero(mem, malloced[i].size); /* XXX */
free(mem, M_DEVBUF);
malloccount--;
total_malloced -= malloced[i].size;
if (i < malloccount) /* more coming after */
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
splx(s);
return;
}
}
splx(s);
printf("Freeing unallocated data at 0x%08x from %s, line %d\n", (int) mem, file, line);
Debugger("Free");
}
void
vinum_meminfo(caddr_t data)
{
struct meminfo *m = (struct meminfo *) data;
m->mallocs = malloccount;
m->total_malloced = total_malloced;
m->malloced = malloced;
m->highwater = highwater;
}
int
vinum_mallocinfo(caddr_t data)
{
struct mc *m = (struct mc *) data;
unsigned int ent = *(int *) data; /* 1st word is index */
if (ent >= malloccount)
return ENOENT;
m->address = malloced[ent].address;
m->size = malloced[ent].size;
m->line = malloced[ent].line;
m->seq = malloced[ent].seq;
bcopy(malloced[ent].file, m->file, 16);
return 0;
}
#endif

206
sys/dev/vinum/vinumparser.c Normal file
View File

@ -0,0 +1,206 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: parser.c,v 1.11 1998/08/10 08:50:42 grog Exp grog $
*/
/* This file contains the parser for the configuration routines. It's used
* both in the kernel and in the user interface program, thus the separate file. */
/* Go through a text and split up into text tokens. These are either non-blank
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
* " characters may be escaped by \, which otherwise has no special meaning.
*
* Delimit by following with a \0, and return pointers to the starts at token [].
* Return the number of tokens found as the return value.
*
* This method has the restriction that a closing " or ' must be followed by
* grey space.
*
* Error conditions are end of line before end of quote, or no space after
* a closing quote. In this case, tokenize() returns -1. */
#include <sys/param.h>
#ifdef KERNEL
#undef KERNEL /* XXX */
#define REALLYKERNEL
#else
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#endif
/* All this mess for a single struct definition */
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/disklabel.h>
#include <sys/mount.h>
#include <sys/device.h>
#include <sys/disk.h>
#include "sys/buf.h"
#include <vinumvar.h>
#include "vinumkw.h"
#include "vinumio.h"
#include "vinumext.h"
#ifdef REALLYKERNEL
#define isspace(c) ((c == ' ') || (c == '\t')) /* check for white space */
#else /* get it from the headers */
#include <ctype.h>
#endif
/* enum keyword is defined in vinumvar.h */
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
/* Normal keywords. These are all the words that vinum knows. */
struct _keywords keywords[] =
{keypair(drive),
keypair(sd),
keypair(subdisk),
keypair(plex),
keypair(volume),
keypair(vol),
keypair(setupstate),
keypair(readpol),
keypair(org),
keypair(name),
keypair(writethrough),
keypair(writeback),
keypair(raw),
keypair(device),
keypair(concat),
keypair(raid5),
keypair(striped),
keypair(plexoffset),
keypair(driveoffset),
keypair(length),
keypair(len),
keypair(state),
keypair(round),
keypair(prefer),
keypair(rename),
keypair(detached),
#ifndef KERNEL /* for vinum(8) only */
#ifdef DEBUG
keypair(debug),
#endif
keypair(attach),
keypair(detach),
keypair(printconfig),
keypair(replace),
keypair(create),
keypair(read),
keypair(modify),
keypair(list),
keypair(l),
keypair(ld),
keypair(ls),
keypair(lp),
keypair(lv),
keypair(info),
keypair(set),
keypair(rm),
keypair(init),
keypair(label),
keypair(resetconfig),
keypair(start),
keypair(stop),
keypair(resetstats)
#endif
};
struct keywordset keyword_set = KEYWORDSET(keywords);
#ifndef KERNEL
struct _keywords flag_keywords[] =
{flagkeypair(f),
flagkeypair(d),
flagkeypair(v),
flagkeypair(s),
flagkeypair(r)
};
struct keywordset flag_set = KEYWORDSET(flag_keywords);
#endif
int
tokenize(char *cptr, char *token[])
{
char delim; /* delimiter for searching for the partner */
int tokennr; /* index of this token */
tokennr = 0; /* none found yet */
for (;;) {
while (isspace(*cptr))
cptr++; /* skip initial white space */
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
return tokennr; /* return number of tokens found */
delim = *cptr;
token[tokennr] = cptr; /* point to it */
tokennr++; /* one more */
/* XXX this is broken. It leaves superfluous \\ characters in the text */
if ((delim == '\'') || (delim == '"')) { /* delimitered */
for (;;) {
cptr++;
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
cptr++; /* move on past */
if (!isspace(*cptr)) /* error, no space after closing quote */
return -1;
*cptr++ = '\0'; /* delimit */
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
return -1;
}
} else { /* not quoted */
while ((*cptr != '\0') && (!isspace(*cptr)) && (*cptr != '\n'))
cptr++;
if (*cptr != '\0') /* not end of the line, */
*cptr++ = '\0'; /* delimit and move to the next */
}
}
}
/* Find a keyword and return an index */
enum keyword
get_keyword(char *name, struct keywordset *keywordset)
{
int i;
struct _keywords *keywords = keywordset->k; /* point to the keywords */
for (i = 0; i < keywordset->size; i++)
if (!strcmp(name, keywords[i].name))
return (enum keyword) keywords[i].keyword;
return kw_invalid_keyword;
}

View File

@ -0,0 +1,882 @@
/* XXX to do:
* Decide where we need splbio ()
*/
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: request.c,v 1.17 1998/08/13 06:04:47 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
#include <miscfs/specfs/specdev.h>
#include <sys/resourcevar.h>
/* pointer to ioctl p parameter, to save passing it around */
extern struct proc *myproc;
enum requeststatus bre(struct request *rq,
int plexno,
daddr_t * diskstart,
daddr_t diskend);
enum requeststatus bre5(struct request *rq,
int plexno,
daddr_t * diskstart,
daddr_t diskend);
enum requeststatus build_read_request(struct request *rq, int volplexno);
enum requeststatus build_write_request(struct request *rq);
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
void freerq(struct request *rq);
void free_rqg(struct rqgroup *rqg);
int find_alternate_sd(struct request *rq);
int check_range_covered(struct request *);
void complete_rqe(struct buf *bp);
void complete_raid5_write(struct rqelement *);
int abortrequest(struct request *rq, int error);
void sdio(struct buf *bp);
void sdio_done(struct buf *bp);
int vinum_bounds_check(struct buf *bp, struct volume *vol);
caddr_t allocdatabuf(struct rqelement *rqe);
void freedatabuf(struct rqelement *rqe);
void
vinumstrategy(struct buf *bp)
{
BROKEN_GDB;
int volno;
struct volume *vol = NULL;
int s;
struct devcode *device = (struct devcode *) &bp->b_dev; /* decode device number */
enum requeststatus status;
switch (device->type) {
case VINUM_SD_TYPE:
sdio(bp);
return;
/* In fact, vinum doesn't handle drives: they're
* handled directly by the disk drivers */
case VINUM_DRIVE_TYPE:
default:
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
biodone(bp);
return;
case VINUM_VOLUME_TYPE: /* volume I/O */
volno = VOLNO(bp->b_dev);
vol = &VOL[volno];
if (vol->state != volume_up) { /* can't access this volume */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
biodone(bp);
return;
}
if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */
biodone(bp); /* have nothing to do with this */
return;
}
/* FALLTHROUGH */
/* Plex I/O is pretty much the same as volume I/O
* for a single plex. Indicate this by passing a NULL
* pointer (set above) for the volume */
case VINUM_PLEX_TYPE:
bp->b_resid = bp->b_bcount; /* transfer everything */
vinumstart(bp, 0);
return;
}
}
/* Start a transfer. Return -1 on error,
* 0 if OK, 1 if we need to retry.
* Parameter reviveok is set when doing
* transfers for revives: it allows transfers to
* be started immediately when a revive is in
* progress. During revive, normal transfers
* are queued if they share address space with
* a currently active revive operation. */
int
vinumstart(struct buf *bp, int reviveok)
{
BROKEN_GDB;
int plexno;
int maxplex; /* maximum number of plexes to handle */
struct volume *vol;
struct rqgroup *rqg; /* current plex's requests */
struct rqelement *rqe; /* individual element */
struct request *rq; /* build up our request here */
int rqno; /* index in request list */
enum requeststatus status;
/* XXX In these routines, we're assuming that
* we will always be called with bp->b_bcount
* which is a multiple of the sector size. This
* is a reasonable assumption, since we are only
* called from system routines. Should we check
* anyway? */
if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */
bp->b_error = EINVAL; /* invalid size */
bp->b_flags |= B_ERROR;
biodone(bp);
return -1;
}
rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
if (rq == NULL) { /* can't do it */
bp->b_error = ENOMEM; /* can't get memory */
bp->b_flags |= B_ERROR;
biodone(bp);
return -1;
}
bzero(rq, sizeof(struct request));
/* Note the volume ID. This can be NULL, which
* the request building functions use as an
* indication for single plex I/O */
rq->bp = bp; /* and the user buffer struct */
if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */
rq->volplex.volno = VOLNO(bp->b_dev); /* get the volume number */
vol = &VOL[rq->volplex.volno]; /* and point to it */
vol->active++; /* one more active request */
maxplex = vol->plexes; /* consider all its plexes */
} else {
vol = NULL; /* no volume */
rq->volplex.plexno = PLEXNO(bp->b_dev); /* point to the plex */
rq->isplex = 1; /* note that it's a plex */
maxplex = 1; /* just the one plex */
}
if (bp->b_flags & B_READ) {
/* This is a read request. Decide
* which plex to read from.
*
* There's a potential race condition here,
* since we're not locked, and we could end
* up multiply incrementing the round-robin
* counter. This doesn't have any serious
* effects, however. */
if (vol != NULL) {
vol->reads++;
vol->bytes_read += bp->b_bcount;
plexno = vol->preferred_plex; /* get the plex to use */
if (plexno < 0) { /* round robin */
plexno = vol->last_plex_read;
vol->last_plex_read++;
if (vol->last_plex_read == vol->plexes) /* got the the end? */
vol->last_plex_read = 0; /* wrap around */
}
status = build_read_request(rq, plexno); /* build a request */
} else {
daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */
status = bre(rq, /* build a request list */
rq->volplex.plexno,
&diskaddr,
diskaddr + (bp->b_bcount / DEV_BSIZE));
}
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
if (status == REQUEST_DOWN) { /* not enough subdisks */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
}
biodone(bp);
freerq(rq);
return -1;
}
return launch_requests(rq, reviveok); /* now start the requests if we can */
} else
/* This is a write operation. We write to all
* plexes. If this is a RAID 5 plex, we must also
* update the parity stripe. */
{
if (vol != NULL) {
vol->writes++;
vol->bytes_written += bp->b_bcount;
status = build_write_request(rq); /* Not all the subdisks are up */
} else { /* plex I/O */
daddr_t diskstart;
diskstart = bp->b_blkno; /* start offset of transfer */
status = bre(rq,
PLEXNO(bp->b_dev),
&diskstart,
bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */
}
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
if (status == REQUEST_DOWN) { /* not enough subdisks */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
}
if ((bp->b_flags & B_DONE) == 0)
biodone(bp);
freerq(rq);
return -1;
}
return launch_requests(rq, reviveok); /* start the requests */
}
}
/* Call the low-level strategy routines to
* perform the requests in a struct request */
int
launch_requests(struct request *rq, int reviveok)
{
struct rqgroup *rqg;
int rqno; /* loop index */
struct rqelement *rqe; /* current element */
int s;
/* First find out whether we're reviving, and the
* request contains a conflict. If so, we hang
* the request off plex->waitlist of the first
* plex we find which is reviving */
if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */
&&(!reviveok)) { /* and we don't want to do it now, */
struct volume *vol = &VOL[VOLNO(rq->bp->b_dev)];
struct plex *plex;
int plexno;
for (plexno = 0; plexno < vol->plexes; plexno++) { /* find the reviving plex */
plex = &PLEX[vol->plex[plexno]];
if (plex->state == plex_reviving) /* found it */
break;
}
if (plexno < vol->plexes) { /* found it? */
struct request *waitlist = plex->waitlist; /* point to the waiting list */
while (waitlist->next != NULL) /* find the end */
waitlist = waitlist->next;
waitlist->next = rq; /* hook our request there */
return 0; /* and get out of here */
} else /* bad vinum, bad */
printf("vinum: can't find reviving plex for volume %s\n", vol->name);
}
rq->active = 0; /* nothing yet */
/* XXX This is probably due to a bug */
if (rq->rqg == NULL) { /* no request */
abortrequest(rq, EINVAL);
return -1;
}
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf("Request: %x\nWrite dev 0x%x, offset 0x%x, length %ld\n",
(u_int) rq,
rq->bp->b_dev,
rq->bp->b_blkno,
rq->bp->b_bcount); /* XXX */
vinum_conf.lastrq = (int) rq;
vinum_conf.lastbuf = rq->bp;
#endif
for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */
rqg->active = rqg->count; /* they're all active */
rq->active++; /* one more active request group */
for (rqno = 0; rqno < rqg->count; rqno++) {
rqe = &rqg->rqe[rqno];
if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */
rqg->active--; /* one less active request */
else {
struct drive *drive = &DRIVE[rqe->driveno]; /* drive to access */
if ((rqe->b.b_flags & B_READ) == 0)
rqe->b.b_vp->v_numoutput++; /* one more output going */
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
rqe->b.b_flags & B_READ ? "Read" : "Write",
rqe->b.b_dev,
rqe->sdno,
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
rqe->b.b_blkno,
rqe->b.b_bcount); /* XXX */
if (debug & DEBUG_NUMOUTPUT)
printf(" vinumstart sd %d numoutput %ld\n",
rqe->sdno,
rqe->b.b_vp->v_numoutput);
#endif
/* fire off the request */
s = splbio();
(*bdevsw[major(rqe->b.b_dev)]->d_strategy) (&rqe->b);
splx(s);
}
/* XXX Do we need caching? Think about this more */
}
}
return 0;
}
/* define the low-level requests needed to perform a
* high-level I/O operation for a specific plex 'plexno'.
*
* Return 0 if all subdisks involved in the request are up, 1 if some
* subdisks are not up, and -1 if the request is at least partially
* outside the bounds of the subdisks.
*
* Modify the pointer *diskstart to point to the end address. On
* read, return on the first bad subdisk, so that the caller
* (build_read_request) can try alternatives.
*
* On entry to this routine, the rqg structures are not assigned. The
* assignment is performed by expandrq(). Strictly speaking, the
* elements rqe->sdno of all entries should be set to -1, since 0
* (from bzero) is a valid subdisk number. We avoid this problem by
* initializing the ones we use, and not looking at the others (index
* >= rqg->requests).
*/
enum requeststatus
bre(struct request *rq,
int plexno,
daddr_t * diskaddr,
daddr_t diskend)
{
BROKEN_GDB;
int sdno;
struct sd *sd;
struct rqgroup *rqg;
struct buf *bp; /* user's bp */
struct plex *plex;
enum requeststatus status; /* return value */
daddr_t plexoffset; /* offset of transfer in plex */
daddr_t stripebase; /* base address of stripe (1st subdisk) */
daddr_t stripeoffset; /* offset in stripe */
daddr_t blockoffset; /* offset in stripe on subdisk */
struct rqelement *rqe; /* point to this request information */
daddr_t diskstart = *diskaddr; /* remember where this transfer starts */
bp = rq->bp; /* buffer pointer */
status = REQUEST_OK; /* return value: OK until proven otherwise */
plex = &PLEX[plexno]; /* point to the plex */
switch (plex->organization) {
case plex_concat:
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
if ((*diskaddr < (sd->plexoffset + sd->sectors)) /* The request starts before the end of this */
&&(diskend > sd->plexoffset)) { /* subdisk and ends after the start of this sd */
if ((sd->state != sd_up) || (plex->state != plex_up)) {
enum requeststatus s;
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
if (s) /* give up? */
return s; /* yup */
}
rqg = allocrqg(rq, 1); /* space for the request */
if (rqg == NULL) { /* malloc failed */
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM;
}
rqg->plexno = plexno;
rqe = &rqg->rqe[0]; /* point to the element */
rqe->rqg = rqg; /* group */
rqe->sdno = sd->sdno; /* put in the subdisk number */
plexoffset = max(sd->plexoffset, *diskaddr); /* start offset in plex */
rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */
rqe->dataoffset = 0;
rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */
sd->sectors - rqe->sdoffset);
rqe->groupoffset = 0; /* no groups for concatenated plexes */
rqe->grouplen = 0;
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
rqe->flags = 0;
rqe->driveno = sd->driveno;
*diskaddr += rqe->datalen; /* bump the address */
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
deallocrqg(rqg);
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM; /* can't do it */
}
}
if (*diskaddr > diskend) /* we're finished, */
break; /* get out of here */
}
break;
case plex_striped:
{
while (*diskaddr < diskend) { /* until we get it all sorted out */
/* The offset of the start address from
* the start of the stripe */
stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
/* The plex-relative address of the
* start of the stripe */
stripebase = *diskaddr - stripeoffset;
/* The number of the subdisk in which
* the start is located */
sdno = stripeoffset / plex->stripesize;
/* The offset from the beginning of the stripe
* on this subdisk */
blockoffset = stripeoffset % plex->stripesize;
sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */
if ((sd->state != sd_up) || (plex->state != plex_up)) {
enum requeststatus s;
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
if (s) /* give up? */
return s; /* yup */
}
rqg = allocrqg(rq, 1); /* space for the request */
if (rqg == NULL) { /* malloc failed */
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM;
}
rqg->plexno = plexno;
rqe = &rqg->rqe[0]; /* point to the element */
rqe->rqg = rqg;
rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */
rqe->dataoffset = 0;
rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */
plex->stripesize - blockoffset); /* and the amount left in this stripe */
rqe->groupoffset = 0; /* no groups for striped plexes */
rqe->grouplen = 0;
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
rqe->flags = 0;
rqe->sdno = sd->sdno; /* put in the subdisk number */
rqe->driveno = sd->driveno;
if (rqe->sdoffset >= sd->sectors) { /* starts beyond the end of the subdisk? */
deallocrqg(rqg);
return REQUEST_EOF;
} else if (rqe->sdoffset + rqe->datalen > sd->sectors) /* ends beyond the end of the subdisk? */
rqe->datalen = sd->sectors - rqe->sdoffset; /* yes, truncate */
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
deallocrqg(rqg);
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM; /* can't do it */
}
*diskaddr += rqe->datalen; /* look at the remainder */
if (*diskaddr < diskend) { /* didn't finish the request on this stripe */
plex->multiblock++; /* count another one */
if (sdno == plex->subdisks - 1) /* last subdisk, */
plex->multistripe++; /* another stripe as well */
}
}
}
break;
default:
printf("vinum: invalid plex type in bre");
}
return status;
}
/* Build up a request structure for reading volumes.
* This function is not needed for plex reads, since there's
* no recovery if a plex read can't be satisified. */
enum requeststatus
build_read_request(struct request *rq, /* request */
int plexindex)
{ /* index in the volume's plex table */
BROKEN_GDB;
struct buf *bp;
daddr_t startaddr; /* offset of previous part of transfer */
daddr_t diskaddr; /* offset of current part of transfer */
daddr_t diskend; /* and end offset of transfer */
int plexno; /* plex index in vinum_conf */
struct rqgroup *rqg; /* point to the request we're working on */
struct volume *vol; /* volume in question */
off_t oldstart; /* note where we started */
int recovered = 0; /* set if we recover a read */
enum requeststatus status = REQUEST_OK;
bp = rq->bp; /* buffer pointer */
diskaddr = bp->b_blkno; /* start offset of transfer */
diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */
rqg = &rq->rqg[plexindex]; /* plex request */
vol = &VOL[rq->volplex.volno]; /* point to volume */
while (diskaddr < diskend) { /* build up request components */
startaddr = diskaddr;
status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
switch (status) {
case REQUEST_OK:
continue;
case REQUEST_RECOVERED:
recovered = 1;
break;
case REQUEST_EOF:
case REQUEST_ENOMEM:
return status;
/* if we get here, we have either had a failure or
* a RAID 5 recovery. We don't want to use the
* recovery, because it's expensive, so first we
* check if we have alternatives */
case REQUEST_DOWN: /* can't access the plex */
if (vol != NULL) { /* and this is volume I/O */
/* Try to satisfy the request
* from another plex */
for (plexno = 0; plexno < vol->plexes; plexno++) {
diskaddr = startaddr; /* start at the beginning again */
oldstart = startaddr; /* and note where that was */
if (plexno != plexindex) { /* don't try this plex again */
bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
if (diskaddr > oldstart) { /* we satisfied another part */
recovered = 1; /* we recovered from the problem */
status = REQUEST_OK; /* don't complain about it */
break;
}
}
if (plexno == (vol->plexes - 1)) /* couldn't satisfy the request */
return REQUEST_DOWN; /* failed */
}
} else
return REQUEST_DOWN; /* bad luck */
}
if (recovered)
vol->recovered_reads += recovered; /* adjust our recovery count */
}
return status;
}
/* Build up a request structure for writes.
* Return 0 if all subdisks involved in the request are up, 1 if some
* subdisks are not up, and -1 if the request is at least partially
* outside the bounds of the subdisks. */
enum requeststatus
build_write_request(struct request *rq)
{ /* request */
BROKEN_GDB;
struct buf *bp;
daddr_t diskstart; /* offset of current part of transfer */
daddr_t diskend; /* and end offset of transfer */
int plexno; /* plex index in vinum_conf */
struct volume *vol; /* volume in question */
enum requeststatus status;
bp = rq->bp; /* buffer pointer */
vol = &VOL[rq->volplex.volno]; /* point to volume */
diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */
status = REQUEST_OK;
for (plexno = 0; plexno < vol->plexes; plexno++) {
diskstart = bp->b_blkno; /* start offset of transfer */
status = min(status, bre(rq, /* build requests for the plex */
vol->plex[plexno],
&diskstart,
diskend));
}
return status;
}
/* Fill in the struct buf part of a request element. */
enum requeststatus
build_rq_buffer(struct rqelement *rqe, struct plex *plex)
{
BROKEN_GDB;
struct sd *sd; /* point to subdisk */
struct volume *vol;
struct buf *bp;
struct buf *ubp; /* user (high level) buffer header */
vol = &VOL[rqe->rqg->rq->volplex.volno];
sd = &SD[rqe->sdno]; /* point to subdisk */
bp = &rqe->b;
ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */
/* Initialize the buf struct */
bzero(&rqe->b, sizeof(struct buf));
bp->b_proc = ubp->b_proc; /* process pointer */
bp->b_flags = ubp->b_flags & (B_NOCACHE | B_READ | B_ASYNC); /* copy these flags from user bp */
bp->b_flags |= B_CALL | B_BUSY; /* inform us when it's done */
if (plex->state == plex_reviving)
bp->b_flags |= B_ORDERED; /* keep request order if we're reviving */
bp->b_iodone = complete_rqe; /* by calling us here */
bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */
bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */
bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */
bp->b_resid = bp->b_bcount; /* and it's still all waiting */
bp->b_bufsize = bp->b_bcount; /* and buffer size */
bp->b_vp = DRIVE[rqe->driveno].vp; /* drive vnode */
bp->b_rcred = FSCRED; /* we have the file system credentials */
bp->b_wcred = FSCRED; /* we have the file system credentials */
if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */
bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */
if (bp->b_data == NULL) { /* failed */
Debugger("XXX");
abortrequest(rqe->rqg->rq, ENOMEM);
return REQUEST_ENOMEM; /* no memory */
}
} else
/* Point directly to user buffer data. This means
* that we don't need to do anything when we have
* finished the transfer */
bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
return 0;
}
/* Abort a request: free resources and complete the
* user request with the specified error */
int
abortrequest(struct request *rq, int error)
{
struct buf *bp = rq->bp; /* user buffer */
bp->b_flags |= B_ERROR;
bp->b_error = error;
freerq(rq); /* free everything we're doing */
biodone(bp);
return error; /* and give up */
}
/* Check that our transfer will cover the
* complete address space of the user request.
*
* Return 1 if it can, otherwise 0 */
int
check_range_covered(struct request *rq)
{
/* XXX */
return 1;
}
/* Perform I/O on a subdisk */
void
sdio(struct buf *bp)
{
int s; /* spl */
struct sd *sd;
struct sdbuf *sbp;
daddr_t endoffset;
struct drive *drive;
sd = &SD[SDNO(bp->b_dev)]; /* point to the subdisk */
drive = &DRIVE[sd->driveno];
if (drive->state != drive_up) { /* XXX until we get the states fixed */
set_sd_state(SDNO(bp->b_dev), sd_obsolete, setstate_force);
bp->b_flags |= B_ERROR;
bp->b_error = EIO;
biodone(bp);
return;
}
/* XXX decide which states we will really accept here. up
* implies it could be involved with a plex, in which
* case we don't want to dick with it */
if ((sd->state != sd_up)
&& (sd->state != sd_initializing)
&& (sd->state != sd_reborn)) { /* we can't access it */
bp->b_flags |= B_ERROR;
bp->b_flags = EIO;
if (bp->b_flags & B_BUSY) /* XXX why isn't this always the case? */
biodone(bp);
return;
}
/* Get a buffer */
sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
if (sbp == NULL) {
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return;
}
bcopy(bp, &sbp->b, sizeof(struct buf)); /* start with the user's buffer */
sbp->b.b_flags |= B_CALL; /* tell us when it's done */
sbp->b.b_iodone = sdio_done; /* here */
sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */
sbp->b.b_vp = DRIVE[sd->driveno].vp; /* vnode */
sbp->b.b_blkno += sd->driveoffset;
sbp->bp = bp; /* note the address of the original header */
sbp->sdno = sd->sdno; /* note for statistics */
sbp->driveno = sd->driveno;
endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */
if (endoffset > sd->sectors) { /* beyond the end */
sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
if (sbp->b.b_bcount <= 0) { /* nothing to transfer */
bp->b_resid = bp->b_bcount; /* nothing transferred */
/* XXX Grrr. This doesn't seem to work. Return
* an error after all */
bp->b_flags |= B_ERROR;
bp->b_error = ENOSPC;
biodone(bp);
Free(sbp);
return;
}
}
if ((sbp->b.b_flags & B_READ) == 0) /* write */
sbp->b.b_vp->v_numoutput++; /* one more output going */
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
sbp->b.b_flags & B_READ ? "Read" : "Write",
sbp->b.b_dev,
sbp->sdno,
(u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
(int) sbp->b.b_blkno,
sbp->b.b_bcount); /* XXX */
if (debug & DEBUG_NUMOUTPUT)
printf(" vinumstart sd %d numoutput %ld\n",
sbp->sdno,
sbp->b.b_vp->v_numoutput);
#endif
s = splbio();
(*bdevsw[major(sbp->b.b_dev)]->d_strategy) (&sbp->b);
splx(s);
}
/* Simplified version of bounds_check_with_label
* Determine the size of the transfer, and make sure it is
* within the boundaries of the partition. Adjust transfer
* if needed, and signal errors or early completion.
*
* Volumes are simpler than disk slices: they only contain
* one component (though we call them a, b and c to make
* system utilities happy), and they always take up the
* complete space of the "partition".
*
* I'm still not happy with this: why should the label be
* protected? If it weren't so damned difficult to write
* one in the first pleace (because it's protected), it wouldn't
* be a problem.
*/
int
vinum_bounds_check(struct buf *bp, struct volume *vol)
{
int maxsize = vol->size; /* size of the partition (sectors) */
int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
/* Would this transfer overwrite the disk label? */
if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */
#if LABELSECTOR != 0
&& bp->b_blkno + size > LABELSECTOR /* and finishes after */
#endif
&& (!(vol->flags & VF_RAW)) /* and it's not raw */
&&major(bp->b_dev) == BDEV_MAJOR /* and it's the block device */
&& (bp->b_flags & B_READ) == 0 /* and it's a write */
&& (!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */
bp->b_error = EROFS; /* read-only */
bp->b_flags |= B_ERROR;
return -1;
}
if (size == 0) /* no transfer specified, */
return 0; /* treat as EOF */
/* beyond partition? */
if (bp->b_blkno < 0 /* negative start */
|| bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */
/* if exactly at end of disk, return an EOF */
if (bp->b_blkno == maxsize) {
bp->b_resid = bp->b_bcount;
return 0;
}
/* or truncate if part of it fits */
size = maxsize - bp->b_blkno;
if (size <= 0) { /* nothing to transfer */
bp->b_error = EINVAL;
bp->b_flags |= B_ERROR;
return -1;
}
bp->b_bcount = size << DEV_BSHIFT;
}
bp->b_pblkno = bp->b_blkno;
return 1;
}
/* Allocate a request group and hook
* it in in the list for rq */
struct rqgroup *
allocrqg(struct request *rq, int elements)
{
struct rqgroup *rqg; /* the one we're going to allocate */
int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
rqg = (struct rqgroup *) Malloc(size);
if (rqg != NULL) { /* malloc OK, */
if (rq->rqg) /* we already have requests */
rq->lrqg->next = rqg; /* hang it off the end */
else /* first request */
rq->rqg = rqg; /* at the start */
rq->lrqg = rqg; /* this one is the last in the list */
bzero(rqg, size); /* no old junk */
rqg->rq = rq; /* point back to the parent request */
rqg->count = elements; /* number of requests in the group */
} else
Debugger("XXX");
return rqg;
}
/* Deallocate a request group out of a chain. We do
* this by linear search: the chain is short, this
* almost never happens, and currently it can only
* happen to the first member of the chain. */
void
deallocrqg(struct rqgroup *rqg)
{
struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */
if (rqg->rq->rqg == rqg) /* we're first in line */
rqg->rq->rqg = rqg->next; /* unhook ourselves */
else {
while (rqgc->next != rqg) /* find the group */
rqgc = rqgc->next;
rqgc->next = rqg->next;
}
Free(rqgc);
}
/* Character device interface */
int
vinumread(dev_t dev, struct uio *uio, int ioflag)
{
return (physio(vinumstrategy, NULL, dev, 1, minphys, uio));
}
int
vinumwrite(dev_t dev, struct uio *uio, int ioflag)
{
return (physio(vinumstrategy, NULL, dev, 0, minphys, uio));
}

128
sys/dev/vinum/vinumrevive.c Normal file
View File

@ -0,0 +1,128 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: revive.c,v 1.1 1998/08/14 06:16:59 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
/* revive a block of a plex. Return an error
* indication. EAGAIN means successful copy, but
* that more blocks remain to be copied.
* XXX We should specify a block size here. At the moment,
* just take a default value. FIXME */
int
revive_block(int plexno)
{
struct plex *plex = &PLEX[plexno];
struct buf *bp;
int error = EAGAIN;
int size;
int s; /* priority level */
if (plex->revive_blocksize == 0) {
if (plex->stripesize != 0) /* we're striped, don't revive more than */
plex->revive_blocksize = min(DEFAULT_REVIVE_BLOCKSIZE, plex->stripesize); /* one block at a time */
else
plex->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
}
size = min(plex->revive_blocksize, plex->length - plex->revived) << DEV_BSHIFT;
s = splbio();
/* Get a buffer */
bp = geteblk(size);
if (bp == NULL) {
splx(s);
return ENOMEM;
}
if (bp->b_qindex != 0) /* on a queue, */
bremfree(bp); /* remove it */
splx(s);
/* Amount to transfer: block size, unless it
* would overlap the end */
bp->b_bufsize = size;
bp->b_bcount = bp->b_bufsize;
bp->b_resid = 0x0;
bp->b_blkno = plex->revived; /* we've got this far */
/* XXX what about reviving anonymous plexes? */
/* First, read the data from the volume. We don't
* care which plex, that's bre's job */
bp->b_dev = VINUMBDEV(plex->volno, 0, 0, VINUM_VOLUME_TYPE); /* create the device number */
bp->b_flags = B_BUSY | B_READ;
vinumstart(bp, 1);
biowait(bp);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else
/* Now write to the plex */
{
s = splbio();
if (bp->b_qindex != 0) /* on a queue, */
bremfree(bp); /* remove it */
splx(s);
bp->b_dev = VINUMBDEV(plex->volno, plex->volplexno, 0, VINUM_PLEX_TYPE); /* create the device number */
bp->b_flags = B_BUSY; /* make this a write */
bp->b_resid = 0x0;
vinumstart(bp, 1);
biowait(bp);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else {
plex->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
if (plex->revived >= plex->length) { /* finished */
plex->revived = 0;
plex->state = plex_up; /* do we need to do more? */
if (plex->volno >= 0) /* we have a volume, */
set_volume_state(plex->volno, volume_up, 0);
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
save_config(); /* and save the updated configuration */
error = 0; /* we're done */
}
}
while (plex->waitlist) { /* we have waiting requests */
launch_requests(plex->waitlist, 1); /* do them now */
plex->waitlist = plex->waitlist->next; /* and move on to the next */
}
}
if (bp->b_qindex == 0) /* not on a queue, */
brelse(bp); /* is this kosher? */
return error;
}

755
sys/dev/vinum/vinumstate.c Normal file
View File

@ -0,0 +1,755 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: state.c,v 2.6 1998/08/19 08:04:47 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
/* Update drive state */
/* Return 1 if the state changes, otherwise 0 */
int
set_drive_state(int driveno, enum drivestate state, int flags)
{
struct drive *drive = &DRIVE[driveno];
int oldstate = drive->state;
int sdno;
if (drive->state == drive_unallocated) /* no drive to do anything with, */
return 0;
if (state != oldstate) { /* don't change it if it's not different */
if (state == drive_down) { /* the drive's going down */
if (flags || (drive->opencount == 0)) { /* we can do it */
close_drive(drive);
drive->state = state;
printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
} else
return 0; /* don't do it */
}
drive->state = state; /* set the state */
if (((drive->state == drive_up)
|| ((drive->state == drive_coming_up)))
&& (drive->vp == NULL)) /* should be open, but we're not */
init_drive(drive); /* which changes the state again */
if ((state != oldstate) /* state has changed */
&&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
if (SD[sdno].driveno == driveno) /* belongs to this drive */
set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
}
save_config(); /* and save the updated configuration */
return 1;
}
}
return 0;
}
/* Try to set the subdisk state. Return 1 if state changed to
* what we wanted, -1 if it changed to something else, and 0
* if no change.
*
* This routine is called both from the user (up, down states
* only) and internally.
*/
int
set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
{
struct sd *sd = &SD[sdno];
int oldstate = sd->state;
int status = 1; /* status to return */
if (state == oldstate)
return 0; /* no change */
if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
return 0;
if (sd->driveoffset < 0) { /* not allocated space */
sd->state = sd_down;
if (state != sd_down)
return -1;
} else { /* space allocated */
switch (state) {
case sd_down:
if ((!flags & setstate_force) /* but gently */
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
return 0; /* don't do it */
break;
case sd_up:
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
return 0; /* not even by force */
switch (sd->state) {
case sd_obsolete:
case sd_down: /* been down, no data lost */
if ((sd->plexno) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
break;
/* XXX Get this right: make sure that other plexes in
* the volume cover this address space, otherwise
* we make this one sd_up */
sd->state = sd_reborn; /* here it is again */
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
status = -1;
break;
case sd_init: /* brand new */
if (flags & setstate_configuring) /* we're doing this while configuring */
break;
sd->state = sd_empty; /* nothing in it */
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
status = -1;
break;
case sd_initializing:
break; /* go on and do it */
case sd_empty:
if ((sd->plexno) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
break;
return 0; /* can't do it */
default: /* can't do it */
/* There's no way to bring subdisks up directly from
* other states. First they need to be initialized
* or revived */
return 0;
}
break;
default: /* other ones, only internal with force */
if (flags & setstate_force == 0) /* no force? What's this? */
return 0; /* don't do it */
}
}
sd->state = state;
printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
if ((flags & setstate_norecurse) == 0)
set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return status;
}
/* Called from request routines when they find
* a subdisk which is not kosher. Decide whether
* it warrants changing the state. Return
* REQUEST_DOWN if we can't use the subdisk,
* REQUEST_OK if we can. */
enum requeststatus
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
{
struct plex *plex = &PLEX[sd->plexno];
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
/* first, see if the plex wants to be accessed */
switch (plex->state) {
case plex_reviving:
/* When writing, we'll write anything that starts
* up to the current revive pointer, but we'll
* only accept a read which finishes before the
* current revive pointer.
*/
if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
if (writeop) { /* writing to a consistent down disk */
if (DRIVE[sd->driveno].state == drive_up)
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
else
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
}
return REQUEST_DOWN; /* that part of the plex is still down */
} else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
/* FALLTHROUGH */
case plex_up:
case plex_degraded:
case plex_flaky:
/* We can access the plex: let's see
* how the subdisk feels */
switch (sd->state) {
case sd_up:
return REQUEST_OK;
case sd_reborn:
if (writeop)
return REQUEST_OK; /* always write to a reborn disk */
/* Handle the mapping. We don't want to reject
* a read request to a reborn subdisk if that's
* all we have. XXX */
return REQUEST_DOWN;
case sd_down:
case sd_crashed:
if (writeop) { /* writing to a consistent down disk */
if (DRIVE[sd->driveno].state == drive_up)
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
else
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
}
return REQUEST_DOWN; /* and it's down one way or another */
default:
return REQUEST_DOWN;
}
default:
return REQUEST_DOWN;
}
}
void
add_defective_region(struct plex *plex, off_t offset, size_t length)
{
/* XXX get this ordered, and coalesce regions if necessary */
if (++plex->defective_regions > plex->defective_region_count)
EXPAND(plex->defective_region,
struct plexregion,
plex->defective_region_count,
PLEX_REGION_TABLE_SIZE);
plex->defective_region[plex->defective_regions - 1].offset = offset;
plex->defective_region[plex->defective_regions - 1].length = length;
}
void
add_unmapped_region(struct plex *plex, off_t offset, size_t length)
{
if (++plex->unmapped_regions > plex->unmapped_region_count)
EXPAND(plex->unmapped_region,
struct plexregion,
plex->unmapped_region_count,
PLEX_REGION_TABLE_SIZE);
plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
plex->unmapped_region[plex->unmapped_regions - 1].length = length;
}
/* Rebuild a plex free list and set state if
* we have a configuration error */
void
rebuild_plex_unmappedlist(struct plex *plex)
{
int sdno;
struct sd *sd;
int lastsdend = 0; /* end offset of last subdisk */
if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
Free(plex->unmapped_region);
plex->unmapped_region = NULL;
plex->unmapped_regions = 0;
plex->unmapped_region_count = 0;
}
if (plex->defective_region != NULL) {
Free(plex->defective_region);
plex->defective_region = NULL;
plex->defective_regions = 0;
plex->defective_region_count = 0;
}
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
if (sd->plexoffset < lastsdend) { /* overlap */
printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
} else if (sd->plexoffset > lastsdend) /* gap */
add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
else if (sd->state < sd_reborn) /* this part defective */
add_defective_region(plex, sd->plexoffset, sd->sectors);
lastsdend = sd->plexoffset + sd->sectors;
}
}
/* return a state map for the subdisks of a plex */
enum sdstates
sdstatemap(struct plex *plex, int *sddowncount)
{
int sdno;
enum sdstates statemap = 0; /* note the states we find */
*sddowncount = 0; /* no subdisks down yet */
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
switch (sd->state) {
case sd_empty:
statemap |= sd_emptystate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_init:
statemap |= sd_initstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_down:
statemap |= sd_downstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_crashed:
statemap |= sd_crashedstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_obsolete:
statemap |= sd_obsolete;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_stale:
statemap |= sd_stalestate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_reborn:
statemap |= sd_rebornstate;
break;
case sd_up:
statemap |= sd_upstate;
break;
default:
statemap |= sd_otherstate;
break;
}
}
return statemap;
}
/* determine the state of the volume relative to this plex */
enum volplexstate
vpstate(struct plex *plex)
{
struct volume *vol;
enum volplexstate state = volplex_onlyusdown; /* state to return */
int plexno;
if (plex->volno < 0) /* not associated with a volume */
return volplex_onlyusdown; /* assume the worst */
vol = &VOL[plex->volno]; /* point to our volume */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
state |= volplex_onlyus; /* yes */
} else {
if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
state |= volplex_otherup; /* and when they were up, they were up */
else
state |= volplex_alldown; /* and when they were down, they were down */
}
}
return state; /* and when they were only halfway up */
} /* they were neither up nor down */
/* Check if all bits b are set in a */
int allset(int a, int b);
int
allset(int a, int b)
{
return (a & b) == b;
}
/* Update the state of a plex dependent on its subdisks.
* Also rebuild the unmapped_region and defective_region table */
int
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
{
int sddowncount = 0; /* number of down subdisks */
struct plex *plex = &PLEX[plexno]; /* point to our plex */
enum plexstate oldstate = plex->state;
enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
return 0; /* no change */
if (plex->state == plex_unallocated) /* no plex to do anything with, */
return 0;
switch (state) {
case plex_up:
if ((plex->state == plex_initializing) /* we're initializing */
&&(statemap != sd_upstate)) /* but SDs aren't up yet */
return 0; /* do nothing */
/* We don't really care what our state was before
* if we want to come up. We rely entirely on the
* state of our subdisks and our volume */
switch (vps) {
case volplex_onlyusdown:
case volplex_alldown: /* another plex is down, and so are we */
if (statemap == sd_upstate) { /* all subdisks ready for action */
if ((plex->state == plex_init) /* we're brand spanking new */
&&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
/* Conceptually, an empty plex does not contain valid data,
* but normally we'll see this state when we have just
* created a plex, and it's either consistent from earlier,
* or we don't care about the previous contents (we're going
* to create a file system or use it for swap).
*
* We need to do this in one swell foop: on the next call
* we will no longer be just empty.
*
* We'll still come back to this function for the remaining
* plexes in the volume. They'll be up already, so that
* doesn't change anything, but it's not worth the additional
* code to stop doing it. */
struct volume *vol = &VOL[plex->volno];
int plexno;
for (plexno = 0; plexno < vol->plexes; plexno++)
PLEX[vol->plex[plexno]].state = plex_up;
}
plex->state = plex_up; /* bring up up, anyway */
} else
plex->state = plex_down;
break;
case volplex_onlyusup: /* only we are up: others are down */
case volplex_onlyus: /* we're up and alone */
if ((statemap == sd_upstate) /* subdisks all up */
||(statemap == sd_emptystate)) /* or all empty */
plex->state = plex_up; /* go for it */
else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
plex->state = plex_flaky;
else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
plex->state = plex_degraded; /* so far no corruption */
else
plex->state = plex_faulty;
break;
case volplex_otherup: /* another plex is up */
case volplex_otherupdown: /* other plexes are up and down */
if ((statemap == sd_upstate) /* subdisks all up */
||(statemap == sd_emptystate) /* or all empty */
) {
/* Is the data in all subdisks valid? */
if (statemap == statemap & (sd_downstate | sd_rebornstate | sd_upstate))
break; /* yes, we can bring the plex up */
plex->state = plex_reviving; /* we need reviving */
return EAGAIN;
} else
plex->state = plex_faulty; /* still in error */
break;
case volplex_allup: /* all plexes are up */
case volplex_someup:
if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
break; /* no change */
else
plex->state = plex_degraded; /* we're not all there */
}
if (plex->state != oldstate)
break;
return 0; /* no change */
case plex_down: /* want to take it down */
if (((vps == volplex_onlyus) /* we're the only one up */
||(vps == volplex_onlyusup)) /* we're the only one up */
&&(!(flags & setstate_force))) /* and we don't want to use force */
return 0; /* can't do it */
plex->state = state; /* do it */
break;
/* This is only requested by the driver.
* Trust ourselves */
case plex_faulty:
plex->state = state; /* do it */
break;
case plex_initializing:
/* XXX consider what safeguards we need here */
if ((flags & setstate_force) == 0)
return 0;
plex->state = state; /* do it */
break;
/* What's this? */
default:
return 0;
}
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
/* Now see what we have left, and whether
* we're taking the volume down */
if (plex->volno >= 0) { /* we have a volume */
struct volume *vol = &VOL[plex->volno];
vps = vpstate(plex); /* get our combined state again */
if ((flags & setstate_norecurse) == 0) { /* we can recurse */
if ((vol->state == volume_up)
&& (vps == volplex_alldown)) /* and we're all down */
set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
else if ((vol->state == volume_down)
&& (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
}
}
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
/* Update the state of a plex dependent on its plexes.
* Also rebuild the unmapped_region and defective_region table */
int
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
{
int plexno;
enum plexstates {
plex_downstate = 1, /* found a plex which is down */
plex_degradedstate = 2, /* found a plex which is halfway up */
plex_upstate = 4 /* found a plex which is completely up */
};
int plexstatemap = 0; /* note the states we find */
struct volume *vol = &VOL[volno]; /* point to our volume */
if (vol->state == state) /* we're there already */
return 0; /* no change */
if (vol->state == volume_unallocated) /* no volume to do anything with, */
return 0;
for (plexno = 0; plexno < vol->plexes; plexno++) {
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
switch (plex->state) {
case plex_degraded:
case plex_flaky:
case plex_reviving:
plexstatemap |= plex_degradedstate;
break;
case plex_up:
plexstatemap |= plex_upstate;
break;
default:
plexstatemap |= plex_downstate;
break;
}
}
if (state == volume_up) { /* want to come up */
if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
vol->state = volume_up; /* did it */
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
/* Here we should check whether we have enough
* coverage for the complete volume. Writeme XXX */
} else if (state == volume_down) { /* want to go down */
if ((vol->opencount == 0) /* not open */
||(flags & setstate_force != 0)) { /* or we're forcing */
vol->state = volume_down;
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
}
return 0; /* no change */
}
/* Start an object, in other words do what we can to get it up.
* This is called from vinumioctl (VINUMSTART).
* Return error indications via ioctl_reply
*/
void
start_object(struct vinum_ioctl_msg *data)
{
int status;
int realstatus; /* what we really have */
int objindex = data->index; /* data gets overwritten */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
switch (data->type) {
case drive_object:
status = set_drive_state(objindex, drive_up, setstate_none);
realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
break;
case sd_object:
status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
break;
case plex_object:
if (PLEX[objindex].state == plex_reviving) { /* reviving, */
ioctl_reply->error = revive_block(objindex); /* revive another block */
ioctl_reply->msg[0] = '\0'; /* no comment */
return;
}
status = set_plex_state(objindex, plex_up, setstate_none);
realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
break;
case volume_object:
status = set_volume_state(objindex, volume_up, setstate_none);
realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
break;
default:
ioctl_reply->error = EINVAL;
strcpy(ioctl_reply->msg, "Invalid object type");
return;
}
/* There's no point in saying anything here:
* the userland program does it better */
ioctl_reply->msg[0] = '\0';
if (realstatus == 0) /* couldn't do it */
ioctl_reply->error = EINVAL;
else
ioctl_reply->error = 0;
}
/* Stop an object, in other words do what we can to get it down
* This is called from vinumioctl (VINUMSTOP).
* Return error indications via ioctl_reply.
*/
void
stop_object(struct vinum_ioctl_msg *data)
{
int status = 1;
int objindex = data->index; /* save the number from change */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
switch (data->type) {
case drive_object:
status = set_drive_state(objindex, drive_down, data->force);
break;
case sd_object:
status = set_sd_state(objindex, sd_down, data->force);
break;
case plex_object:
status = set_plex_state(objindex, plex_down, data->force);
break;
case volume_object:
status = set_volume_state(objindex, volume_down, data->force);
break;
default:
ioctl_reply->error = EINVAL;
strcpy(ioctl_reply->msg, "Invalid object type");
return;
}
ioctl_reply->msg[0] = '\0';
if (status == 0) /* couldn't do it */
ioctl_reply->error = EINVAL;
else
ioctl_reply->error = 0;
}
/* VINUM_SETSTATE ioctl: set an object state
* msg is the message passed by the user */
void
setstate(struct vinum_ioctl_msg *msg)
{
int sdno;
struct sd *sd;
struct plex *plex;
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
switch (msg->state) {
case object_down:
stop_object(msg);
break;
case object_initializing:
switch (msg->type) {
case sd_object:
sd = &SD[msg->index];
if ((msg->index >= vinum_conf.subdisks_used)
|| (sd->state == sd_unallocated)) {
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
ioctl_reply->error = EFAULT;
return;
}
set_sd_state(msg->index, sd_initializing, msg->force);
if (sd->state != sd_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
} else
ioctl_reply->error = 0;
break;
case plex_object:
plex = &PLEX[msg->index];
if ((msg->index >= vinum_conf.plexes_used)
|| (plex->state == plex_unallocated)) {
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
ioctl_reply->error = EFAULT;
return;
}
set_plex_state(msg->index, plex_initializing, msg->force);
if (plex->state != plex_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
} else {
ioctl_reply->error = 0;
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
if (sd->state != sd_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
break;
}
}
}
break;
default:
strcpy(ioctl_reply->msg, "Invalid object");
ioctl_reply->error = EINVAL;
}
break;
case object_up:
start_object(msg);
}
}

213
sys/dev/vinum/vinumstate.h Normal file
View File

@ -0,0 +1,213 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumstate.h,v 1.11 1998/08/04 06:22:49 grog Exp grog $
*/
/* This file gets read by makestatetext to create text files
* with the names of the states, so don't change the file
* format */
enum volumestate {
volume_unallocated,
/* present but unused. Must be 0 */
volume_uninit,
/* mentioned elsewhere but not defined */
volume_down,
/* The volume is up and functional, but not all plexes may be available */
volume_up,
volume_laststate = volume_up /* last value, for table dimensions */
};
enum plexstate {
/* An empty entry, not a plex at all. */
plex_unallocated,
/* The plex has been allocated, but there configuration
* is not complete */
plex_init,
/* A plex which has gone completely down because of
* I/O errors. */
plex_faulty,
/* A plex which has been taken down by the
* administrator. */
plex_down,
/* A plex which is currently being brought up after
* being not up. This involves copying data from
* another plex */
plex_reviving,
/* A plex which is being initialized */
plex_initializing,
/* *** The remaining states represent plexes which are
* at least partially up. Keep these separate so that
* they can be checked more easily. */
/* A plex entry which is at least partially up. Not
* all subdisks are available, and an inconsistency
* has occurred. If no other plex is uncorrupted,
* the volume is no longer consistent. */
plex_corrupt,
plex_firstup = plex_corrupt, /* first "up" state */
/* A plex entry which is at least partially up. Not
* all subdisks are available, but so far no
* inconsistency has occurred (this will change with
* the first write to the address space occupied by
* a defective subdisk). A RAID 5 plex with one subdisk
* down will remain degraded even after a write */
plex_degraded,
/* A plex which is really up, but which has a reborn
* subdisk which we don't completely trust, and
* which we don't want to read if we can avoid it */
plex_flaky,
/* A plex entry which is completely up. All subdisks
* are up. */
plex_up,
plex_laststate = plex_up /* last value, for table dimensions */
};
/* subdisk states */
enum sdstate {
/* An empty entry, not a subdisk at all. */
sd_unallocated,
/* A subdisk entry which has not been created
* completely. Some fields may be empty.
*/
sd_uninit,
/* A subdisk entry which has been created completely.
* All fields are correct, but the disk hasn't
* been updated.
*/
sd_init,
/* A subdisk entry which has been created completely and
* which is currently being initialized */
sd_initializing,
/* A subdisk entry which has been created completely.
* All fields are correct, and the disk has been
* updated, but there is no data on the disk.
*/
sd_empty,
/* *** The following states represent invalid data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down, and as a result updates have been
* missed.
*/
sd_obsolete,
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down, updates have been lost, and then
* the drive came up again.
*/
sd_stale,
/* *** The following states represent valid, inaccessible data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down. No attempt has been made to write
* to the subdisk since the crash.
*/
sd_crashed,
/* A subdisk entry which was up, which contained
* valid data, and which was taken down by the
* administrator. The data is valid. */
sd_down,
/* *** The following states represent accessible subdisks
* with valid data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down and up again. No updates were lost,
* but it is possible that the subdisk has been
* damaged. We won't read from this subdisk if we
* have a choice. If this is the only subdisk which
* covers this address space in the plex, we set its
* state to sd_up under these circumstances, so this
* status implies that there is another subdisk to
* fulfil the request.
*/
sd_reborn,
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data is valid.
*/
sd_up,
sd_laststate = sd_up /* last value, for table dimensions */
};
enum drivestate {
drive_unallocated,
/* present but unused. Must be 0 */
drive_uninit,
/* just mentioned in some other config entry */
drive_down,
/* not accessible */
drive_coming_up,
/* in the process of being brought up */
drive_up,
/* up and running */
drive_laststate = drive_up /* last value, for table dimensions */
};

211
sys/dev/vinum/vinumutil.c Normal file
View File

@ -0,0 +1,211 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: util.c,v 1.7 1998/08/07 09:23:10 grog Exp grog $
*/
/* This file contains utility routines used both in kernel and user context */
#include "vinumhdr.h"
#include "statetexts.h"
#ifndef REALLYKERNEL
#include <stdio.h>
extern jmp_buf command_fail; /* return on a failed command */
#endif
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
/* Return drive state as a string */
char *
drive_state(enum drivestate state)
{
if (((unsigned) state) >= STATECOUNT(drive)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return drivestatetext[state];
}
/* Return volume state as a string */
char *
volume_state(enum volumestate state)
{
if (((unsigned) state) >= STATECOUNT(vol)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return volstatetext[state];
}
/* Return plex state as a string */
char *
plex_state(enum plexstate state)
{
if (((unsigned) state) >= STATECOUNT(plex)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return plexstatetext[state];
}
/* Return plex organization as a string */
char *
plex_org(enum plexorg org)
{
switch (org) {
case plex_disorg: /* disorganized */
return "disorg";
break;
case plex_concat: /* concatenated plex */
return "concat";
break;
case plex_striped: /* striped plex */
return "striped";
break;
default:
sprintf(numeric_state, "Invalid org %d", (int) org);
return numeric_state;
}
}
/* Return sd state as a string */
char *
sd_state(enum sdstate state)
{
if (((unsigned) state) >= STATECOUNT(sd)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return sdstatetext[state];
}
/* Now convert in the other direction */
/* These are currently used only internally,
* so we don't do too much error checking */
enum drivestate
DriveState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(drive); i++)
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
return (enum drivestate) i;
return -1;
}
enum sdstate
SdState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(sd); i++)
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
return (enum sdstate) i;
return -1;
}
enum plexstate
PlexState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(plex); i++)
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
return (enum plexstate) i;
return -1;
}
enum volumestate
VolState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(vol); i++)
if (strcmp(text, volstatetext[i]) == 0) /* found it */
return (enum volstate) i;
return -1;
}
/* Take a number with an optional scale factor and convert
* it to a number of bytes.
*
* The scale factors are:
*
* b blocks (of 512 bytes)
* k kilobytes (1024 bytes)
* m megabytes (of 1024 * 1024 bytes)
* g gigabytes (of 1024 * 1024 * 1024 bytes)
*/
u_int64_t
sizespec(char *spec)
{
u_int64_t size;
char *s;
size = 0;
s = spec;
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
size = size * 10 + *s++ - '0'; /* convert it */
switch (*s) {
case '\0':
return size;
case 'B':
case 'b':
return size * 512;
case 'K':
case 'k':
return size * 1024;
case 'M':
case 'm':
return size * 1024 * 1024;
case 'G':
case 'g':
return size * 1024 * 1024 * 1024;
}
}
#ifdef REALLYKERNEL
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
#else
fprintf(stderr, "Invalid length specification: %s", spec);
longjmp(command_fail, -1);
#endif
/* NOTREACHED */
return -1;
}

510
sys/dev/vinum/vinumvar.h Normal file
View File

@ -0,0 +1,510 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumvar.h,v 1.15 1998/08/14 06:36:41 grog Exp grog $
*/
/* XXX gdb can't find our global pointers, so use this kludge to
* point to them locally. Remove after testing */
#define BROKEN_GDB struct _vinum_conf *VC = &vinum_conf
#include <sys/time.h>
#include "vinumstate.h"
/* Some configuration maxima. They're an enum because
* we can't define global constants. Sorry about that.
*
* These aren't as bad as they look: most of them
* are soft limits. Only the MAXCONFIG parameter is set in stone
*/
enum constants {
VINUM_HEADER = 512, /* size of header on disk */
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
/* XXX Do we still need this? */
MINVINUMSLICE = 1048576, /* minimum size of a slice */
CDEV_MAJOR = 91, /* major number for character device */
BDEV_MAJOR = 25, /* and block device */
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
/* type field in minor number */
VINUM_VOLUME_TYPE = 0,
VINUM_PLEX_TYPE = 1,
VINUM_SD_TYPE = 2,
VINUM_DRIVE_TYPE = 3,
VINUM_SUPERDEV_TYPE = 4, /* super device. */
/* Shifts for the individual fields in the device */
VINUM_TYPE_SHIFT = 28,
VINUM_VOL_SHIFT = 0,
VINUM_PLEX_SHIFT = 16,
VINUM_SD_SHIFT = 20,
VINUM_VOL_WIDTH = 8,
VINUM_PLEX_WIDTH = 3,
VINUM_SD_WIDTH = 8,
MAJORDEV_SHIFT = 8,
/* Create a block device number */
#define VINUMBDEV(v,p,s,t) ((BDEV_MAJOR << MAJORDEV_SHIFT) \
| (v << VINUM_VOL_SHIFT) \
| (p << VINUM_PLEX_SHIFT) \
| (s << VINUM_SD_SHIFT) \
| (t << VINUM_TYPE_SHIFT) )
/* And a character device number */
#define VINUMCDEV(v,p,s,t) ((CDEV_MAJOR << MAJORDEV_SHIFT) \
| (v << VINUM_VOL_SHIFT) \
| (p << VINUM_PLEX_SHIFT) \
| (s << VINUM_SD_SHIFT) \
| (t << VINUM_TYPE_SHIFT) )
/* extract device type */
#define DEVTYPE(x) ((x >> VINUM_TYPE_SHIFT) & 7)
/* extract volume number */
#define VOLNO(x) (x & ((1 << VINUM_VOL_WIDTH) - 1))
/* extract plex number */
#define PLEXNO(x) (VOL [VOLNO (x)].plex [(x >> VINUM_PLEX_SHIFT) & ((1 << VINUM_PLEX_WIDTH) - 1)])
/* extract subdisk number */
#define SDNO(x) (PLEX [PLEXNO (x)].sdnos [(x >> VINUM_SD_SHIFT) & ((1 << VINUM_SD_WIDTH) - 1)])
/* extract drive number */
#define DRIVENO(x) (SD [SDNO (x)].driveno)
VINUM_SUPERDEV = VINUMBDEV(0, 0, 0, VINUM_SUPERDEV_TYPE), /* superdevice number */
/* the number of object entries to cater for initially, and also the
* value by which they are incremented. It doesn't take long
* to extend them, so theoretically we could start with 1 of each, but
* it's untidy to allocate such small areas. These values are
* probably too small.
*/
INITIAL_DRIVES = 4,
INITIAL_VOLUMES = 4,
INITIAL_PLEXES = 8,
INITIAL_SUBDISKS = 16,
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
INITIAL_LOCKS = 8, /* number of locks to allocate to a volume */
DEFAULT_REVIVE_BLOCKSIZE = 32768, /* size of block to transfer in one op */
};
/* device numbers */
/*
* 31 30 28 27 20 19 18 16 15 8 7 0
* |-----------------------------------------------------------------------------------------------|
* |X | Type | Subdisk number | X| Plex | Major number | volume number |
* |-----------------------------------------------------------------------------------------------|
*
* 0x2 03 1 19 06
*/
struct devcode {
/* CARE. These fields assume a big-endian word. On a
* little-endian system, they're the wrong way around */
unsigned volume:8; /* up to 256 volumes */
unsigned major:8; /* this is where the major number fits */
unsigned plex:3; /* up to 8 plexes per volume */
unsigned unused:1; /* up for grabs */
unsigned sd:8; /* up to 256 subdisks per plex */
unsigned type:3; /* type of object */
/* type field
VINUM_VOLUME = 0,
VINUM_PLEX = 1,
VINUM_SUBDISK = 2,
VINUM_DRIVE = 3,
VINUM_SUPERDEV = 4, */
unsigned signbit:1; /* to make 32 bits */
};
#define VINUM_DIR "/dev/vinum"
#define VINUM_RDIR "/dev/rvinum"
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control"
#define MAXDRIVENAME 32 /* maximum length of a device name */
#define MAXSDNAME 64 /* maximum length of a subdisk name */
#define MAXPLEXNAME 64 /* maximum length of a plex name */
#define MAXVOLNAME 64 /* maximum length of a volume name */
#define MAXNAME 64 /* maximum length of any name */
#define MAXVOLPLEX 8 /* maximum number of plexes in a volume */
/* Flags for all objects. Most of them only apply to
* specific objects, but we have space for all in any
* 32 bit flags word. */
enum objflags {
VF_LOCKED = 1, /* somebody has locked access to this object */
VF_LOCKING = 2, /* we want access to this object */
VF_WRITETHROUGH = 8, /* volume: write through */
VF_INITED = 0x10, /* unit has been initialized */
VF_WLABEL = 0x20, /* label area is writable */
VF_LABELLING = 0x40, /* unit is currently being labelled */
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
VF_RAW = 0x100, /* raw volume (no file system) */
VF_LOADED = 0x200, /* module is loaded */
VF_CONFIGURING = 0x400, /* somebody is changing the config */
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
VF_KERNELOP = 0x8000, /* we're performing ops from kernel space */
};
/* Global configuration information for the vinum subsystem */
struct _vinum_conf {
/* Pointers to vinum structures */
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *volume;
/* the number allocated */
int drives_allocated;
int subdisks_allocated;
int plexes_allocated;
int volumes_allocated;
/* and the number currently in use */
int drives_used;
int subdisks_used;
int plexes_used;
int volumes_used;
int flags;
int opencount; /* number of times we've been opened */
#if DEBUG
int lastrq;
struct buf *lastbuf;
#endif
};
/* Use these defines to simplify code */
#define DRIVE vinum_conf.drive
#define SD vinum_conf.sd
#define PLEX vinum_conf.plex
#define VOL vinum_conf.volume
#define VFLAGS vinum_conf.flags
/* Slice header
* Vinum drives start with this structure:
*
* Sector
* |--------------------------------------|
* | PDP-11 memorial boot block | 0
* |--------------------------------------|
* | Disk label, maybe | 1
* |--------------------------------------|
* | Slice definition (vinum_hdr) | 2
* |--------------------------------------|
* | |
* | Configuration info, first copy | 3
* | |
* |--------------------------------------|
* | |
* | Configuration info, second copy | 3 + size of config
* | |
* |--------------------------------------|
*/
/* Sizes and offsets of our information */
enum {
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
VINUMHEADERLEN = 512, /* size of vinum label */
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
MAXCONFIG = 65536, /* and size of config copy */
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
};
/* hostname is 256 bytes long, but we don't need to shlep
* multiple copies in vinum. We use the host name just
* to identify this system, and 32 bytes should be ample
* for that purpose */
#define VINUMHOSTNAMELEN 32
struct vinum_label {
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
char name[MAXDRIVENAME]; /* our name of the drive */
struct timeval date_of_birth; /* the time it was created */
struct timeval last_update; /* and the time of last update */
off_t drive_size; /* total size in bytes of the drive.
* This value includes the headers */
};
struct vinum_hdr {
long long magic; /* we're long on magic numbers */
/* XXX Get these right for big-endian */
#define VINUM_MAGIC 22322600044678729LL /* should be this */
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
int config_length; /* size in bytes of each copy of the
* configuration info.
* This must be a multiple of the sector size. */
struct vinum_label label; /* unique label */
};
/* Information returned from read_drive_label */
enum drive_label_info {
DL_CANT_OPEN, /* invalid partition */
DL_NOT_OURS, /* valid partition, but no vinum label */
DL_DELETED_LABEL, /* valid partition, deleted label found */
DL_WRONG_DRIVE, /* drive name doesn't match */
DL_OURS /* valid partition and label found */
};
/*** Drive definitions ***/
/* A drive corresponds to a disk slice. We use a different term to show
* the difference in usage: it doesn't have to be a slice, and could
* theroretically be a complete, unpartitioned disk */
struct drive {
enum drivestate state; /* current state */
int subdisks_allocated; /* number of entries in sd */
int subdisks_used; /* and the number used */
int blocksize; /* size of fs blocks */
u_int64_t sectors_available; /* number of sectors still available */
int secsperblock;
int lasterror; /* last error on drive */
int driveno; /* index of drive in vinum_conf */
int opencount; /* number of up subdisks */
u_int64_t reads; /* number of reads on this drive */
u_int64_t writes; /* number of writes on this drive */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
dev_t dev; /* and device number */
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
struct vnode *vp; /* vnode pointer */
struct proc *p;
struct vinum_label label; /* and the label information */
struct partinfo partinfo; /* partition information */
int freelist_size; /* number of entries alloced in free list */
int freelist_entries; /* number of entries used in free list */
struct drive_freelist { /* sorted list of free space on drive */
u_int64_t offset;
long sectors;
} *freelist;
};
/*** Subdisk definitions ***/
struct sd {
enum sdstate state; /* state */
/* offsets in blocks */
int64_t driveoffset; /* offset on drive */
int64_t plexoffset; /* offset in plex */
u_int64_t sectors; /* and length in sectors */
int plexno; /* index of plex, if it belongs */
int driveno; /* index of the drive on which it is located */
int sdno; /* our index in vinum_conf */
int pid; /* pid of process which opened us */
u_int64_t reads; /* number of reads on this subdisk */
u_int64_t writes; /* number of writes on this subdisk */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
char name[MAXSDNAME]; /* name of subdisk */
};
/*** Plex definitions ***/
/* kinds of plex organization */
enum plexorg {
plex_disorg, /* disorganized */
plex_concat, /* concatenated plex */
plex_striped, /* striped plex */
plex_raid5 /* RAID5 plex */
};
/* Region in plex (either defective or unmapped) */
struct plexregion {
u_int64_t offset; /* start of region */
u_int64_t length; /* length */
};
struct plex {
enum plexorg organization; /* Plex organization */
enum plexstate state; /* and current state */
u_int64_t length; /* total length of plex (max offset) */
int flags;
int stripesize; /* size of stripe or raid band, in sectors */
int subdisks; /* number of associated subdisks */
int subdisks_allocated; /* number of subdisks allocated space for */
int *sdnos; /* list of component subdisks */
int plexno; /* index of plex in vinum_conf */
int volno; /* index of volume */
int volplexno; /* number of plex in volume */
int pid; /* pid of process which opened us */
/* Lock information */
int locks; /* number of locks used */
int alloclocks; /* number of locks allocated */
struct rangelock *lock; /* ranges of locked addresses */
/* Statistics */
u_int64_t reads; /* number of reads on this plex */
u_int64_t writes; /* number of writes on this plex */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t multiblock; /* requests that needed more than one block */
u_int64_t multistripe; /* requests that needed more than one stripe */
/* revive parameters */
u_int64_t revived; /* block number of current revive request */
int revive_blocksize; /* revive block size (bytes) */
int revive_interval; /* and time to wait between transfers */
struct request *waitlist; /* list of requests waiting on revive op */
/* geometry control */
int defective_regions; /* number of regions which are defective */
int defective_region_count; /* number of entries in defective_region */
struct plexregion *defective_region; /* list of offset/length pairs: defective sds */
int unmapped_regions; /* number of regions which are missing */
int unmapped_region_count; /* number of entries in unmapped_region */
struct plexregion *unmapped_region; /* list of offset/length pairs: missing sds */
char name[MAXPLEXNAME]; /* name of plex */
};
/*** Volume definitions ***/
#define MAXPLEX 8 /* maximum number of plexes */
struct volume {
enum volumestate state; /* current state */
int plexes; /* number of plexes */
int preferred_plex; /* plex to read from, -1 for round-robin */
int last_plex_read; /* index of plex used for last read,
* for round-robin */
dev_t devno; /* device number */
int flags; /* status and configuration flags */
int opencount; /* number of opens (all the same process) */
int openflags; /* flags supplied to last open(2) */
u_int64_t size; /* size of volume */
int disk; /* disk index */
int blocksize; /* logical block size */
int active; /* number of outstanding requests active */
int subops; /* and the number of suboperations */
pid_t pid; /* pid of locker */
/* Statistics */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t reads; /* number of reads on this volume */
u_int64_t writes; /* number of writes on this volume */
u_int64_t recovered_reads; /* reads recovered from another plex */
/* Unlike subdisks in the plex, space for the plex pointers is static */
int plex[MAXPLEX]; /* index of plexes */
char name[MAXVOLNAME]; /* name of volume */
struct disklabel label; /* for DIOCGPART */
};
/* Table expansion. Expand table, which contains oldcount
* entries of type element, by increment entries, and change
* oldcount accordingly */
#define EXPAND(table, element, oldcount, increment) \
{ \
expand_table ((void **) &table, \
oldcount * sizeof (element), \
(oldcount + increment) * sizeof (element) ); \
oldcount += increment; \
}
/* Information on vinum's memory usage */
struct meminfo {
int mallocs; /* number of malloced blocks */
int total_malloced; /* total amount malloced */
int highwater; /* maximum number of mallocs */
struct mc *malloced; /* pointer to kernel table */
};
struct mc {
int seq;
int size;
short line;
short flags;
#define ALLOC_KVA 1 /* allocated via kva calls */
int *databuf; /* really vm_object_t */
caddr_t address;
char file[16];
};
/* These enums are used by the state transition
* routines. They're in bit map format:
*
* Bit 0: Other plexes in the volume are down
* Bit 1: Other plexes in the volume are up
* Bit 2: The current plex is up
* Maybe they should be local to
* state.c */
enum volplexstate {
volplex_onlyusdown = 0, /* we're the only plex, and we're down */
volplex_alldown, /* 1: another plex is down, and so are we */
volplex_otherup, /* 2: another plex is up */
volplex_otherupdown, /* other plexes are up and down */
volplex_onlyus, /* 4: we're up and alone */
volplex_onlyusup, /* only we are up, others are down */
volplex_allup, /* all plexes are up */
volplex_someup /* some plexes are up, including us */
};
/* state map for plex */
enum sdstates {
sd_emptystate = 1,
sd_downstate = 2, /* found an SD which is down */
sd_crashedstate = 4, /* found an SD which is crashed */
sd_obsoletestate = 8, /* found an SD which is obsolete */
sd_stalestate = 16, /* found an SD which is stale */
sd_rebornstate = 32, /* found an SD which is reborn */
sd_upstate = 64, /* found an SD which is up */
sd_initstate = 128, /* found an SD which is init */
sd_otherstate = 256 /* found an SD in some other state */
};
/* This is really just a parameter to pass to
* set_<foo>_state, but since it needs to be known
* in the external definitions, we need to define
* it here */
enum setstateflags {
setstate_none = 0, /* no flags */
setstate_force = 1, /* force the state change */
setstate_configuring = 2, /* we're currently configuring, don't save */
setstate_recursing = 4, /* we're called from another setstate function */
setstate_norecurse = 8 /* don't call other setstate functions */
};
#ifdef DEBUG
/* Debugging stuff */
#define DEBUG_ADDRESSES 1
#define DEBUG_NUMOUTPUT 2
#endif

View File

@ -0,0 +1,37 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
*/

View File

@ -0,0 +1,26 @@
# $Id: Makefile.lkm.lite,v 1.2 1998/08/13 06:07:29 grog Exp grog $
.PATH: ${.CURDIR}/../../sys/dev/ccd
KMOD= vinum_mod
SRCS= vinum.c vinum.h vnode_if.h parser.c config.c io.c util.c vinumhdr.h request.h \
state.c memory.c request.c lock.c vinumext.h vinumio.h vinumkw.h \
vinumstate.h vinumvar.h revive.c vinumioctl.c interrupt.c
NOMAN=
PSEUDO_LKM=
CFLAGS = -I. -O -g -I/usr/include/machine -DDEBUG -Wall -Wno-unused -Wno-parentheses
CLEANFILES+= vinum.h vnode_if.h vnode_if.c
all:
# We don't need this, but the Makefile wants it
vinum.h:
touch $@
state.h: maketabs vinumstate.h
./maketabs >state.h
maketabs: maketabs.c
${CC} -g -o maketabs maketabs.c
.include <bsd.kmod.mk>

1712
sys/modules/vinum/config.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,190 @@
/* interrupt.c: bottom half of the driver */
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: interrupt.c,v 1.1 1998/08/13 06:12:27 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
#include <miscfs/specfs/specdev.h>
#include <sys/resourcevar.h>
void complete_raid5_write(struct rqelement *);
void freerq(struct request *rq);
void free_rqg(struct rqgroup *rqg);
void complete_rqe(struct buf *bp);
void sdio_done(struct buf *bp);
/* Take a completed buffer, transfer the data back if
* it's a read, and complete the high-level request
* if this is the last subrequest.
*
* The bp parameter is in fact a struct rqelement, which
* includes a couple of extras at the end.
*/
void
complete_rqe(struct buf *bp)
{
BROKEN_GDB;
struct rqelement *rqe;
struct request *rq;
struct rqgroup *rqg;
struct buf *ubp; /* user buffer */
rqe = (struct rqelement *) bp; /* point to the element element that completed */
rqg = rqe->rqg; /* and the request group */
rq = rqg->rq; /* and the complete request */
if ((bp->b_flags & B_ERROR) != 0) { /* transfer in error */
if (bp->b_error != 0) /* did it return a number? */
rq->error = bp->b_error; /* yes, put it in. */
else if (rq->error == 0) /* no: do we have one already? */
rq->error = EIO; /* no: catchall "I/O error" */
if (rq->error == EIO) /* I/O error, */
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* take the subdisk down */
}
/* Now update the statistics */
if (bp->b_flags & B_READ) { /* read operation */
DRIVE[rqe->driveno].reads++;
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
SD[rqe->sdno].reads++;
SD[rqe->sdno].bytes_read += bp->b_bcount;
PLEX[rqe->rqg->plexno].reads++;
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
} else { /* write operation */
DRIVE[rqe->driveno].writes++;
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
SD[rqe->sdno].writes++;
SD[rqe->sdno].bytes_written += bp->b_bcount;
PLEX[rqe->rqg->plexno].writes++;
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
}
ubp = rq->bp; /* user buffer */
rqg->active--; /* one less request active */
if (rqg->active == 0) /* request group finished, */
rq->active--; /* one less */
if (rq->active == 0) { /* request finished, */
#if DEBUG
if (debug & 4) {
if (ubp->b_resid != 0) /* still something to transfer? */
Debugger("resid");
{
int i;
for (i = 0; i < ubp->b_bcount; i += 512) /* XXX debug */
if (((char *) ubp->b_data)[i] != '<') { /* and not what we expected */
printf("At 0x%x (offset 0x%x): '%c' (0x%x)\n",
(int) (&((char *) ubp->b_data)[i]),
i,
((char *) ubp->b_data)[i],
((char *) ubp->b_data)[i]);
Debugger("complete_request checksum");
}
}
}
#endif
if (rq->error) { /* did we have an error? */
ubp->b_flags |= B_ERROR; /* yes, propagate to user */
ubp->b_error = rq->error;
} else
ubp->b_resid = 0; /* completed our transfer */
if (rq->isplex == 0) /* volume request, */
VOL[rq->volplex.volno].active--; /* another request finished */
biodone(ubp); /* top level buffer completed */
freerq(rq); /* return the request storage */
}
}
/* Free a request block and anything hanging off it */
void
freerq(struct request *rq)
{
BROKEN_GDB;
struct rqgroup *rqg;
struct rqgroup *nrqg; /* next in chain */
int rqno;
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
for (rqno = 0; rqno < rqg->count; rqno++)
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
Free(rqg->rqe[rqno].b.b_data); /* free it */
nrqg = rqg->next; /* note the next one */
Free(rqg); /* and free this one */
}
Free(rq); /* free the request itself */
}
void
free_rqg(struct rqgroup *rqg)
{
if ((rqg->flags & XFR_GROUPOP) /* RAID 5 request */
&&(rqg->rqe) /* got a buffer structure */
&&(rqg->rqe->b.b_data)) /* and it has a buffer allocated */
Free(rqg->rqe->b.b_data); /* free it */
}
/* I/O on subdisk completed */
void
sdio_done(struct buf *bp)
{
struct sdbuf *sbp;
sbp = (struct sdbuf *) bp;
if (sbp->b.b_flags & B_ERROR) { /* had an error */
bp->b_flags |= B_ERROR;
bp->b_error = sbp->b.b_error;
}
bp->b_resid = sbp->b.b_resid;
biodone(sbp->bp); /* complete the caller's I/O */
/* Now update the statistics */
if (bp->b_flags & B_READ) { /* read operation */
DRIVE[sbp->driveno].reads++;
DRIVE[sbp->driveno].bytes_read += bp->b_bcount;
SD[sbp->sdno].reads++;
SD[sbp->sdno].bytes_read += bp->b_bcount;
} else { /* write operation */
DRIVE[sbp->driveno].writes++;
DRIVE[sbp->driveno].bytes_written += bp->b_bcount;
SD[sbp->sdno].writes++;
SD[sbp->sdno].bytes_written += bp->b_bcount;
}
Free(sbp);
}

886
sys/modules/vinum/io.c Normal file
View File

@ -0,0 +1,886 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: io.c,v 1.16 1998/08/10 23:47:21 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#if __FreeBSD__ < 3 /* this is in sys/disklabel.h in 3.0 and on */
#define DTYPE_VINUM 12 /* vinum volume */
#endif
#define REALLYKERNEL
#include "vinumhdr.h"
#include <miscfs/specfs/specdev.h>
extern jmp_buf command_fail; /* return on a failed command */
struct _ioctl_reply *ioctl_reply; /* data pointer, for returning error messages */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
/* pointer to ioctl p parameter, to save passing it around */
extern struct proc *myproc;
/* Open the device associated with the drive, and set drive's vp */
int
open_drive(struct drive *drive, struct proc *p)
{
BROKEN_GDB;
struct nameidata nd;
struct vattr va;
int error;
if (drive->devicename[0] == '\0') /* no device name */
sprintf(drive->devicename, "/dev/%s", drive->label.name); /* get it from the drive name */
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, drive->devicename, p);
error = vn_open(&nd, FREAD | FWRITE, 0); /* open the device */
if (error != 0) { /* can't open? */
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = error;
printf("vinum open_drive %s: failed with error %d\n", drive->devicename, error); /* XXX */
return error;
}
drive->vp = nd.ni_vp;
drive->p = p;
if (drive->vp->v_usecount > 1) { /* already in use? */
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = EBUSY;
printf("vinum open_drive %s: Drive in use\n", drive->devicename); /* XXX */
return EBUSY;
}
error = VOP_GETATTR(drive->vp, &va, NOCRED, p);
if (error) {
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
drive->lasterror = error;
printf("vinum open_drive %s: GETAATTR returns error %d\n", drive->devicename, error); /* XXX */
return error;
}
drive->dev = va.va_rdev; /* device */
if (va.va_type != VBLK) { /* only consider block devices */
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1); /* this also closes the drive */
drive->lasterror = ENOTBLK;
printf("vinum open_drive %s: Not a block device\n", drive->devicename); /* XXX */
return ENOTBLK;
}
drive->vp->v_numoutput = 0;
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
VOP_UNLOCK(drive->vp);
#else
VOP_UNLOCK(drive->vp, 0, p);
#endif
return 0;
}
/* Set some variables in the drive struct
* in more convenient form. Return error indication */
int
set_drive_parms(struct drive *drive)
{
drive->blocksize = BLKDEV_IOSIZE; /* XXX do we need this? */
drive->secsperblock = drive->blocksize /* number of sectors per block */
/ drive->partinfo.disklab->d_secsize;
/* Now update the label part */
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
#if __FreeBSD__ >= 3
getmicrotime(&drive->label.date_of_birth); /* and current time */
#else
drive->label.date_of_birth = time; /* and current time */
#endif
drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
*((u_int64_t) drive->partinfo.disklab->d_secsize);
/* number of sectors available for subdisks */
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
/* XXX Bug in 3.0 as of January 1998: you can open
* non-existent slices. They have a length of 0 */
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
set_drive_state(drive->driveno, drive_down, 1);
printf("vinum open_drive %s: Drive too small\n", drive->devicename); /* XXX */
drive->lasterror = ENOSPC;
return ENOSPC;
}
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
drive->freelist = (struct drive_freelist *)
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
if (drive->freelist == NULL) /* can't malloc, dammit */
return ENOSPC;
drive->freelist_entries = 1; /* just (almost) the complete drive */
drive->freelist[0].offset = DATASTART; /* starts here */
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
set_drive_state(drive->driveno, drive_up, 1); /* our drive is accessible */
return 0;
}
/* Initialize a drive: open the device and add device
* information */
int
init_drive(struct drive *drive)
{
BROKEN_GDB;
int error;
if (drive->devicename[0] == '\0') { /* no device name yet, default to drive name */
drive->lasterror = EINVAL;
printf("vinum: Can't open drive without drive name\n"); /* XXX */
return EINVAL;
}
error = open_drive(drive, myproc); /* open the drive */
if (error)
return error;
error = VOP_IOCTL(drive->vp, /* get the partition information */
DIOCGPART,
(caddr_t) & drive->partinfo,
FREAD,
NOCRED,
myproc);
if (error) {
printf("vinum open_drive %s: Can't get partition information, error %d\n",
drive->devicename,
error); /* XXX */
close_drive(drive);
drive->lasterror = error;
set_drive_state(drive->driveno, drive_down, 1);
return error;
}
if (drive->partinfo.part->p_fstype != 0) { /* not plain */
drive->lasterror = EFTYPE;
printf("vinum open_drive %s: Wrong partition type for vinum\n", drive->devicename); /* XXX */
close_drive(drive);
set_drive_state(drive->driveno, drive_down, 1);
return EFTYPE;
}
return set_drive_parms(drive); /* set various odds and ends */
}
/* Close a drive if it's open. No errors */
void
close_drive(struct drive *drive)
{
if (drive->vp) {
vn_close(drive->vp, FREAD | FWRITE, NOCRED, drive->p);
drive->vp = NULL;
}
}
/* Remove drive from the configuration.
* Caller must ensure that it isn't active
*/
void
remove_drive(int driveno)
{
BROKEN_GDB;
struct drive *drive = &vinum_conf.drive[driveno];
long long int nomagic = VINUM_NOMAGIC; /* no magic number */
write_drive(drive, /* obliterate the magic, but leave a hint */
(char *) &nomagic,
8,
VINUM_LABEL_OFFSET);
close_drive(drive); /* and close it */
drive->state = drive_unallocated; /* and forget everything we knew about it */
save_config(); /* and save the updated configuration */
}
/* Transfer drive data. Usually called from one of these defines;
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
*
* Return error number
*/
int
driveio(struct drive *drive, void *buf, size_t length, off_t offset, int flag)
{
BROKEN_GDB;
int error;
struct buf *bp;
int spl;
error = 0;
/* Get a buffer */
bp = (struct buf *) Malloc(sizeof(struct buf)); /* get a buffer */
CHECKALLOC(bp, "Can't allocate memory");
bzero(&buf, sizeof(buf));
bp->b_flags = B_BUSY | flag; /* tell us when it's done */
bp->b_iodone = drive_io_done; /* here */
bp->b_proc = myproc; /* process */
bp->b_dev = drive->vp->v_un.vu_specinfo->si_rdev; /* device */
if (offset & (drive->partinfo.disklab->d_secsize - 1)) /* not on a block boundary */
bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
bp->b_data = buf;
bp->b_vp = drive->vp; /* vnode */
bp->b_bcount = length;
bp->b_bufsize = length;
(*bdevsw[major(bp->b_dev)]->d_strategy) (bp); /* initiate the transfer */
spl = splbio();
while ((bp->b_flags & B_DONE) == 0) {
bp->b_flags |= B_CALL; /* wake me again */
tsleep((caddr_t) bp, PRIBIO, "driveio", 0); /* and wait for it to complete */
}
splx(spl);
if (bp->b_flags & B_ERROR) /* didn't work */
error = bp->b_error; /* get the error return */
Free(bp); /* then return the buffer */
return error;
}
/* Read data from a drive
* Return error number
*/
int
read_drive(struct drive *drive, void *buf, size_t length, off_t offset)
{
BROKEN_GDB;
int error;
struct buf *bp;
daddr_t nextbn;
long bscale;
struct uio uio;
struct iovec iov;
daddr_t blocknum; /* block number */
int blockoff; /* offset in block */
int count; /* amount to transfer */
iov.iov_base = buf;
iov.iov_len = length;
uio.uio_iov = &iov;
uio.uio_iovcnt = length;
uio.uio_offset = offset;
uio.uio_resid = length;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_READ;
uio.uio_procp = myproc;
bscale = btodb(drive->blocksize); /* mask off offset from block number */
do {
blocknum = btodb(uio.uio_offset) & ~(bscale - 1); /* get the block number */
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
uio.uio_resid);
/* XXX Check this. I think the test is wrong */
if (drive->vp->v_lastr + bscale == blocknum) { /* did our last read finish in this block? */
nextbn = blocknum + bscale; /* note the end of the transfer */
error = breadn(drive->vp, /* and read with read-ahead */
blocknum,
(int) drive->blocksize,
&nextbn,
(int *) &drive->blocksize,
1,
NOCRED,
&bp);
} else /* random read: just read this block */
error = bread(drive->vp, blocknum, (int) drive->blocksize, NOCRED, &bp);
drive->vp->v_lastr = blocknum; /* note the last block we read */
count = min(count, drive->blocksize - bp->b_resid);
if (error) {
brelse(bp);
return error;
}
error = uiomove((char *) bp->b_data + blockoff, count, &uio); /* move the data */
brelse(bp);
}
while (error == 0 && uio.uio_resid > 0 && count != 0);
return error;
}
/* Write data to a drive
* Return error number
*/
int
write_drive(struct drive *drive, void *buf, size_t length, off_t offset)
{
BROKEN_GDB;
int error;
struct buf *bp;
struct uio uio;
struct iovec iov;
daddr_t blocknum; /* block number */
int blockoff; /* offset in block */
int count; /* amount to transfer */
int blockshift;
if (drive->state == drive_down) /* currently down */
return 0; /* ignore */
if (drive->vp == NULL) {
drive->lasterror = ENODEV;
return ENODEV; /* not configured yet */
}
iov.iov_base = buf;
iov.iov_len = length;
uio.uio_iov = &iov;
uio.uio_iovcnt = length;
uio.uio_offset = offset;
uio.uio_resid = length;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_WRITE;
uio.uio_procp = myproc;
error = 0;
blockshift = btodb(drive->blocksize) - 1; /* amount to shift block number
* to get sector number */
do {
blocknum = btodb(uio.uio_offset) & ~blockshift; /* get the block number */
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
uio.uio_resid);
if (count == drive->blocksize) /* the whole block */
bp = getblk(drive->vp, blocknum, drive->blocksize, 0, 0); /* just get it */
else /* partial block: */
error = bread(drive->vp, /* read it first */
blocknum,
drive->blocksize,
NOCRED,
&bp);
count = min(count, drive->blocksize - bp->b_resid); /* how much will we transfer now? */
if (error == 0)
error = uiomove((char *) bp->b_data + blockoff, /* move the data to the block */
count,
&uio);
if (error) {
brelse(bp);
drive->lasterror = error;
switch (error) {
case EIO:
set_drive_state(drive->driveno, drive_down, 1);
break;
/* XXX Add other possibilities here */
default:
}
return error;
}
if (count + blockoff == drive->blocksize)
/* The transfer goes to the end of the block. There's
* no need to wait for any more data to arrive. */
bawrite(bp); /* start the write now */
else
bdwrite(bp); /* do a delayed write */
}
while (error == 0 && uio.uio_resid > 0 && count != 0);
if (error)
drive->lasterror = error;
return error; /* OK */
}
/* Wake up on completion */
void
drive_io_done(struct buf *bp)
{
BROKEN_GDB;
wakeup((caddr_t) bp); /* Wachet auf! */
bp->b_flags &= ~B_CALL; /* don't do this again */
}
/* Check a drive for a vinum header. If found,
* update the drive information. We come here
* with a partially populated drive structure
* which includes the device name.
*
* Return information on what we found
*/
enum drive_label_info
read_drive_label(struct drive *drive)
{
BROKEN_GDB;
int error;
int result; /* result of our search */
struct vinum_hdr *vhdr; /* and as header */
error = init_drive(drive); /* find the drive */
if (error) /* find the drive */
return DL_CANT_OPEN; /* not ours */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
CHECKALLOC(vhdr, "Can't allocate memory");
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
if (drive->label.name[0] /* we have a name for this drive */
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
drive->lasterror = EINVAL;
result = DL_WRONG_DRIVE; /* it's the wrong drive */
} else {
set_drive_parms(drive); /* and set other parameters */
result = DL_OURS;
}
/* We copy the drive anyway so that we have
* the correct name in the drive info. This
* may not be the name specified */
drive->label = vhdr->label; /* put in the label information */
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
result = DL_DELETED_LABEL;
else
result = DL_NOT_OURS; /* we could have it, but we don't yet */
Free(vhdr); /* that's all. */
return result;
}
/* Check a drive for a vinum header. If found,
* read configuration information from the drive and
* incorporate the data into the configuration.
*
* Return error number
*/
int
check_drive(char *drivename)
{
BROKEN_GDB;
int error;
struct nameidata nd; /* mount point credentials */
char *config_text; /* read the config info from disk into here */
volatile char *cptr; /* pointer into config information */
char *eptr; /* end pointer into config information */
int driveno;
struct drive *drive;
char *config_line; /* copy the config line to */
driveno = find_drive_by_dev(drivename, 1); /* doesn't exist, create it */
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
strcpy(drive->devicename, drivename); /* put in device name */
if (read_drive_label(drive) == DL_OURS) { /* ours! */
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
CHECKALLOC(config_text, "Can't allocate memory");
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
CHECKALLOC(config_line, "Can't allocate memory");
/* Read in both copies of the configuration information */
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
if (error != 0) {
printf("vinum: Can't read device %s, error %d\n", drive->devicename, error);
Free(config_text);
Free(config_line);
free_drive(drive); /* give it back */
return error;
}
/* XXX At this point, check that the two copies are the same, and do something useful if not.
* In particular, consider which is newer, and what this means for the integrity of the
* data on the drive */
/* Parse the configuration, and add it to the global configuration */
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
volatile int parse_status; /* return value from parse_config */
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
*eptr++ = *cptr++;
*eptr = '\0'; /* and delimit */
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
parse_status = parse_config(config_line, &keyword_set); /* parse the config line */
if (parse_status < 0) { /* error in config */
/* This config should have been parsed in user
* space. If we run into problems here, something
* serious is afoot. Complain and let the user
* snarf the config to see what's wrong */
printf("vinum: Config error on drive %s, aborting integration\n", nd.ni_dirp);
Free(config_text);
Free(config_line);
free_drive(drive); /* give it back */
return EINVAL;
}
}
while (*cptr == '\n')
cptr++; /* skip to next line */
}
Free(config_text);
if ((vinum_conf.flags & VF_READING_CONFIG) == 0) /* not reading config */
updateconfig(0); /* update object states */
printf("vinum: read configuration from %s\n", drivename);
return 0; /* it all worked */
} else { /* no vinum label found */
if (drive->lasterror) {
set_drive_state(drive->driveno, drive_down, 1);
return drive->lasterror;
} else
return ENODEV; /* not our device */
}
}
/* Kludge: kernel printf doesn't handle longs correctly XXX */
static char *lltoa(long long l, char *s);
static char *sappend(char *txt, char *s);
static char *
lltoa(long long l, char *s)
{
if (l < 0) {
*s++ = '-';
l = -l;
}
if (l > 9) {
s = lltoa(l / 10, s);
l %= 10;
}
*s++ = l + '0';
return s;
}
static char *
sappend(char *txt, char *s)
{
while (*s++ = *txt++);
return s - 1;
}
/* Format the configuration in text form into the buffer
* at config. Don't go beyond len bytes
* XXX this stinks. Fix soon. */
void
format_config(char *config, int len)
{
BROKEN_GDB;
int i;
int j;
char *s = config;
bzero(config, len);
/* First write the drive configuration */
for (i = 0; i < vinum_conf.drives_used; i++) {
struct drive *drive;
drive = &vinum_conf.drive[i];
if (drive->state != drive_unallocated) {
sprintf(s,
"drive %s state %s device %s\n",
drive->label.name,
drive_state(drive->state),
drive->devicename);
while (*s)
s++; /* find the end */
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* Then the volume configuration */
for (i = 0; i < vinum_conf.volumes_used; i++) {
struct volume *vol;
vol = &vinum_conf.volume[i];
if (vol->state != volume_unallocated) {
if (vol->preferred_plex >= 0) /* preferences, */
sprintf(s,
"volume %s state %s readpol prefer %s",
vol->name,
volume_state(vol->state),
vinum_conf.plex[vol->preferred_plex].name);
else /* default round-robin */
sprintf(s,
"volume %s state %s",
vol->name,
volume_state(vol->state));
while (*s)
s++; /* find the end */
s = sappend("\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* Then the plex configuration */
for (i = 0; i < vinum_conf.plexes_used; i++) {
struct plex *plex;
plex = &vinum_conf.plex[i];
if (plex->state != plex_unallocated) {
sprintf(s, "plex name %s state %s org %s ",
plex->name,
plex_state(plex->state),
plex_org(plex->organization));
while (*s)
s++; /* find the end */
if ((plex->organization == plex_striped)
) {
sprintf(s, "%db ", (int) plex->stripesize);
while (*s)
s++; /* find the end */
}
if (plex->volno >= 0) /* we have a volume */
sprintf(s, "vol %s ", vinum_conf.volume[plex->volno].name);
while (*s)
s++; /* find the end */
for (j = 0; j < plex->subdisks; j++) {
sprintf(s, " sd %s", vinum_conf.sd[plex->sdnos[j]].name);
}
s = sappend("\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
/* And finally the subdisk configuration */
for (i = 0; i < vinum_conf.subdisks_used; i++) {
struct sd *sd = &vinum_conf.sd[i]; /* XXX */
if (vinum_conf.sd[i].state != sd_unallocated) {
sprintf(s,
"sd name %s drive %s plex %s state %s len ",
sd->name,
vinum_conf.drive[sd->driveno].label.name,
vinum_conf.plex[sd->plexno].name,
sd_state(sd->state));
while (*s)
s++; /* find the end */
s = lltoa(sd->sectors, s);
s = sappend("b driveoffset ", s);
s = lltoa(sd->driveoffset, s);
s = sappend("b plexoffset ", s);
s = lltoa(sd->plexoffset, s);
s = sappend("b\n", s);
if (s > &config[len - 80]) {
printf("vinum: configuration data overflow\n");
return;
}
}
}
}
/* Write the configuration to all vinum slices */
int
save_config(void)
{
BROKEN_GDB;
int error;
int written_config; /* set when we firstnwrite the config to disk */
int driveno;
struct drive *drive; /* point to current drive info */
struct vinum_hdr *vhdr; /* and as header */
char *config; /* point to config data */
int wlabel_on; /* to set writing label on/off */
/* don't save the configuration while we're still working on it */
if (vinum_conf.flags & VF_CONFIGURING)
return 0;
written_config = 0; /* no config written yet */
/* Build a volume header */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
CHECKALLOC(vhdr, "Can't allocate config data");
vhdr->magic = VINUM_MAGIC; /* magic number */
vhdr->config_length = MAXCONFIG; /* length of following config info */
config = Malloc(MAXCONFIG); /* get space for the config data */
CHECKALLOC(config, "Can't allocate config data");
format_config(config, MAXCONFIG);
error = 0; /* no errors yet */
for (driveno = 0; driveno < vinum_conf.drives_used; driveno++) {
drive = &vinum_conf.drive[driveno]; /* point to drive */
if (drive->state != drive_down) {
#if (__FreeBSD__ >= 3)
getmicrotime(&drive->label.last_update); /* time of last update is now */
#else
drive->label.last_update = time; /* time of last update is now */
#endif
bcopy((char *) &drive->label, /* and the label info from the drive structure */
(char *) &vhdr->label,
sizeof(vhdr->label));
if ((drive->state != drive_unallocated)
&& (drive->state != drive_uninit)) {
wlabel_on = 1; /* enable writing the label */
error = VOP_IOCTL(drive->vp, /* make the label writeable */
DIOCWLABEL,
(caddr_t) & wlabel_on,
FWRITE,
NOCRED,
myproc);
if (error == 0)
error = write_drive(drive, vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (error == 0)
error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET);
wlabel_on = 0; /* enable writing the label */
VOP_IOCTL(drive->vp, /* make the label non-writeable again */
DIOCWLABEL,
(caddr_t) & wlabel_on,
FWRITE,
NOCRED,
myproc);
if (error) {
printf("vinum: Can't write config to %s, error %d\n", drive->devicename, error);
set_drive_state(drive->driveno, drive_down, 1);
} else
written_config = 1; /* we've written it on at least one drive */
}
}
}
Free(vhdr);
Free(config);
return written_config == 0; /* return 1 if we failed to write config */
}
/* Disk labels are a mess. The correct way to access them
* is with the DIOC[GSW]DINFO ioctls, but some programs, such
* as newfs, access the disk directly, so we have to write
* things there. We do this only on request. If a user
* request tries to read it directly, we fake up one on the fly.
*/
/* get_volume_label returns a label structure to lp, which
* is allocated by the caller */
void
get_volume_label(struct volume *vol, struct disklabel *lp)
{
bzero(lp, sizeof(struct disklabel));
strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
lp->d_type = DTYPE_VINUM;
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
lp->d_rpm = 14400 * vol->plexes; /* to keep them guessing */
lp->d_interleave = 1;
lp->d_flags = 0;
/* Fitting unto the vine, a vinum has a single
* track with all its sectors */
lp->d_secsize = DEV_BSIZE; /* bytes per sector */
lp->d_nsectors = vol->size; /* data sectors per track */
lp->d_ntracks = 1; /* tracks per cylinder */
lp->d_ncylinders = 1; /* data cylinders per unit */
lp->d_secpercyl = vol->size; /* data sectors per cylinder */
lp->d_secperunit = vol->size; /* data sectors per unit */
lp->d_bbsize = BBSIZE;
lp->d_sbsize = SBSIZE;
lp->d_magic = DISKMAGIC;
lp->d_magic2 = DISKMAGIC;
/* Set up partitions a, b and c to be identical
* and the size of the volume. a is UFS, b is
* swap, c is nothing */
lp->d_partitions[0].p_size = vol->size;
lp->d_partitions[0].p_fsize = 1024;
lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */
lp->d_partitions[0].p_frag = 8; /* and fragments per block */
lp->d_partitions[SWAP_PART].p_size = vol->size;
lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
lp->d_partitions[LABEL_PART].p_size = vol->size;
lp->d_npartitions = LABEL_PART + 1;
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
lp->d_checksum = dkcksum(lp);
}
int
write_volume_label(int volno)
{
struct disklabel *lp;
struct buf *bp;
struct disklabel *dlp;
struct volume *vol;
int error;
lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
if (lp == 0)
return ENOMEM;
if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_used) /* invalid volume */
return ENOENT;
vol = &VOL[volno]; /* volume in question */
if (vol->state == volume_unallocated) /* nothing there */
return ENOENT;
get_volume_label(vol, lp); /* get the label */
/* Now write to disk. This code is derived from the
* system writedisklabel (), which does silly things
* like reading the label and refusing to write
* unless it's already there. */
bp = geteblk((int) lp->d_secsize); /* get a buffer */
bp->b_dev = minor(vol->devno) | (CDEV_MAJOR << MAJORDEV_SHIFT); /* our own raw volume */
bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
bp->b_bcount = lp->d_secsize;
bzero(bp->b_data, lp->d_secsize);
dlp = (struct disklabel *) bp->b_data;
*dlp = *lp;
bp->b_flags &= ~B_INVAL;
bp->b_flags |= B_BUSY | B_WRITE;
vinumstrategy(bp); /* write it out */
error = biowait(bp);
bp->b_flags |= B_INVAL | B_AGE;
brelse(bp);
return error;
}
/* Initialize a subdisk */
int
initsd(int sdno)
{
return 0;
}

137
sys/modules/vinum/lock.c Normal file
View File

@ -0,0 +1,137 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: lock.c,v 1.6 1998/07/28 06:32:57 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
/* Lock routines. Currently, we lock either an individual volume
* or the global configuration. I don't think tsleep and
* wakeup are SMP safe. FIXME XXX */
/* Lock a volume, wait if it's in use */
int
lockvol(struct volume *vol)
{
int error;
while ((vol->flags & VF_LOCKED) != 0) {
vol->flags |= VF_LOCKING;
/* It would seem to make more sense to sleep on
* the address 'vol'. Unfortuntaly we can't
* guarantee that this address won't change due to
* table expansion. The address we choose won't change. */
if ((error = tsleep(&vinum_conf.volume + vol->devno,
PRIBIO | PCATCH,
"volock",
0)) != 0)
return error;
}
vol->flags |= VF_LOCKED;
return 0;
}
/* Unlock a volume and let the next one at it */
void
unlockvol(struct volume *vol)
{
vol->flags &= ~VF_LOCKED;
if ((vol->flags & VF_LOCKING) != 0) {
vol->flags &= ~VF_LOCKING;
wakeup(&vinum_conf.volume + vol->devno);
}
}
/* Lock a plex, wait if it's in use */
int
lockplex(struct plex *plex)
{
int error;
while ((plex->flags & VF_LOCKED) != 0) {
plex->flags |= VF_LOCKING;
/* It would seem to make more sense to sleep on
* the address 'plex'. Unfortuntaly we can't
* guarantee that this address won't change due to
* table expansion. The address we choose won't change. */
if ((error = tsleep(&vinum_conf.plex + plex->sdnos[0],
PRIBIO | PCATCH,
"plexlk",
0)) != 0)
return error;
}
plex->flags |= VF_LOCKED;
return 0;
}
/* Unlock a plex and let the next one at it */
void
unlockplex(struct plex *plex)
{
plex->flags &= ~VF_LOCKED;
if ((plex->flags & VF_LOCKING) != 0) {
plex->flags &= ~VF_LOCKING;
wakeup(&vinum_conf.plex + plex->plexno);
}
}
/* Get a lock for the global config, wait if it's not available */
int
lock_config(void)
{
int error;
while ((vinum_conf.flags & VF_LOCKED) != 0) {
vinum_conf.flags |= VF_LOCKING;
if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0)
return error;
}
vinum_conf.flags |= VF_LOCKED;
return 0;
}
/* Unlock and wake up any waiters */
void
unlock_config(void)
{
vinum_conf.flags &= ~VF_LOCKED;
if ((vinum_conf.flags & VF_LOCKING) != 0) {
vinum_conf.flags &= ~VF_LOCKING;
wakeup(&vinum_conf);
}
}

40
sys/modules/vinum/makestatetext Executable file
View File

@ -0,0 +1,40 @@
#!/bin/sh
# Make statetexts.h from vinumstate.h
# $Id: makestatetext,v 1.4 1998/03/13 05:36:16 grog Exp grog $
infile=vinumstate.h
ofile=statetexts.h
cat <COPYRIGHT > $ofile
echo >>$ofile "/* Created by $0 on" `date`. "Do not edit */"
echo >>$ofile
echo >>$ofile "/* Drive state texts */"
echo >>$ofile "char *drivestatetext [] =
{ "
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Subdisk state texts */
char *sdstatetext [] =
{
FOO
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Plex state texts */
char *plexstatetext [] =
{
FOO
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Volume state texts */
char *volstatetext [] =
{
FOO
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
FOO

186
sys/modules/vinum/memory.c Normal file
View File

@ -0,0 +1,186 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: memory.c,v 1.16 1998/08/08 04:43:22 grog Exp grog $
*/
#define REALLYKERNEL
#define USES_VM
#include "vinumhdr.h"
extern jmp_buf command_fail; /* return on a failed command */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
void freedatabuf(struct mc *me);
caddr_t allocdatabuf(struct mc *me);
void
expand_table(void **table, int oldsize, int newsize)
{
if (newsize > oldsize) {
int *temp;
temp = (int *) Malloc(newsize); /* allocate a new table */
CHECKALLOC(temp, "vinum: Can't expand table\n");
if (*table != NULL) { /* already something there, */
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
Free(*table);
}
*table = temp;
}
}
#ifndef DEBUG
/* increase the size of a request block */
void
expandrq(struct plexrq *prq)
{
expand_table((void **) &prq->rqe,
prq->requests * sizeof(struct rqelement),
(prq->requests + RQELTS) * sizeof(struct rqelement));
bzero(&prq->rqe[prq->requests], RQELTS * sizeof(struct rqelement)); /* clear the new part */
prq->rqcount += RQELTS;
}
#endif
#if DEBUG /* XXX debug */
#define MALLOCENTRIES 16384
int malloccount = 0;
int highwater = 0; /* highest index ever allocated */
static struct mc malloced[MALLOCENTRIES];
static total_malloced;
caddr_t
MMalloc(int size, char *file, int line)
{
caddr_t result;
int i;
static int seq = 0;
int s;
struct mc me; /* information to pass to allocdatabuf */
if (malloccount >= MALLOCENTRIES) { /* too many */
printf("vinum: can't allocate table space to trace memory allocation");
return 0; /* can't continue */
}
result = malloc(size, M_DEVBUF, M_WAITOK); /* use malloc for smaller and irregular stuff */
if (result == NULL)
printf("vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
else {
me.flags = 0; /* allocation via malloc */
s = splhigh();
for (i = 0; i < malloccount; i++) {
if (((result + size) > malloced[i].address)
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
Debugger("Malloc overlap");
}
if (result) {
i = malloccount++;
total_malloced += size;
malloced[i].address = result;
malloced[i].size = size;
malloced[i].line = line;
malloced[i].seq = seq++;
malloced[i].flags = me.flags;
malloced[i].databuf = me.databuf; /* only used with kva alloc */
bcopy(file, malloced[i].file, min(strlen(file) + 1, 16));
}
if (malloccount > highwater)
highwater = malloccount;
splx(s);
}
return result;
}
void
FFree(void *mem, char *file, int line)
{
int i;
int s;
s = splhigh();
for (i = 0; i < malloccount; i++) {
if ((caddr_t) mem == malloced[i].address) { /* found it */
bzero(mem, malloced[i].size); /* XXX */
free(mem, M_DEVBUF);
malloccount--;
total_malloced -= malloced[i].size;
if (i < malloccount) /* more coming after */
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
splx(s);
return;
}
}
splx(s);
printf("Freeing unallocated data at 0x%08x from %s, line %d\n", (int) mem, file, line);
Debugger("Free");
}
void
vinum_meminfo(caddr_t data)
{
struct meminfo *m = (struct meminfo *) data;
m->mallocs = malloccount;
m->total_malloced = total_malloced;
m->malloced = malloced;
m->highwater = highwater;
}
int
vinum_mallocinfo(caddr_t data)
{
struct mc *m = (struct mc *) data;
unsigned int ent = *(int *) data; /* 1st word is index */
if (ent >= malloccount)
return ENOENT;
m->address = malloced[ent].address;
m->size = malloced[ent].size;
m->line = malloced[ent].line;
m->seq = malloced[ent].seq;
bcopy(malloced[ent].file, m->file, 16);
return 0;
}
#endif

206
sys/modules/vinum/parser.c Normal file
View File

@ -0,0 +1,206 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: parser.c,v 1.11 1998/08/10 08:50:42 grog Exp grog $
*/
/* This file contains the parser for the configuration routines. It's used
* both in the kernel and in the user interface program, thus the separate file. */
/* Go through a text and split up into text tokens. These are either non-blank
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
* " characters may be escaped by \, which otherwise has no special meaning.
*
* Delimit by following with a \0, and return pointers to the starts at token [].
* Return the number of tokens found as the return value.
*
* This method has the restriction that a closing " or ' must be followed by
* grey space.
*
* Error conditions are end of line before end of quote, or no space after
* a closing quote. In this case, tokenize() returns -1. */
#include <sys/param.h>
#ifdef KERNEL
#undef KERNEL /* XXX */
#define REALLYKERNEL
#else
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#endif
/* All this mess for a single struct definition */
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/disklabel.h>
#include <sys/mount.h>
#include <sys/device.h>
#include <sys/disk.h>
#include "sys/buf.h"
#include <vinumvar.h>
#include "vinumkw.h"
#include "vinumio.h"
#include "vinumext.h"
#ifdef REALLYKERNEL
#define isspace(c) ((c == ' ') || (c == '\t')) /* check for white space */
#else /* get it from the headers */
#include <ctype.h>
#endif
/* enum keyword is defined in vinumvar.h */
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
/* Normal keywords. These are all the words that vinum knows. */
struct _keywords keywords[] =
{keypair(drive),
keypair(sd),
keypair(subdisk),
keypair(plex),
keypair(volume),
keypair(vol),
keypair(setupstate),
keypair(readpol),
keypair(org),
keypair(name),
keypair(writethrough),
keypair(writeback),
keypair(raw),
keypair(device),
keypair(concat),
keypair(raid5),
keypair(striped),
keypair(plexoffset),
keypair(driveoffset),
keypair(length),
keypair(len),
keypair(state),
keypair(round),
keypair(prefer),
keypair(rename),
keypair(detached),
#ifndef KERNEL /* for vinum(8) only */
#ifdef DEBUG
keypair(debug),
#endif
keypair(attach),
keypair(detach),
keypair(printconfig),
keypair(replace),
keypair(create),
keypair(read),
keypair(modify),
keypair(list),
keypair(l),
keypair(ld),
keypair(ls),
keypair(lp),
keypair(lv),
keypair(info),
keypair(set),
keypair(rm),
keypair(init),
keypair(label),
keypair(resetconfig),
keypair(start),
keypair(stop),
keypair(resetstats)
#endif
};
struct keywordset keyword_set = KEYWORDSET(keywords);
#ifndef KERNEL
struct _keywords flag_keywords[] =
{flagkeypair(f),
flagkeypair(d),
flagkeypair(v),
flagkeypair(s),
flagkeypair(r)
};
struct keywordset flag_set = KEYWORDSET(flag_keywords);
#endif
int
tokenize(char *cptr, char *token[])
{
char delim; /* delimiter for searching for the partner */
int tokennr; /* index of this token */
tokennr = 0; /* none found yet */
for (;;) {
while (isspace(*cptr))
cptr++; /* skip initial white space */
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
return tokennr; /* return number of tokens found */
delim = *cptr;
token[tokennr] = cptr; /* point to it */
tokennr++; /* one more */
/* XXX this is broken. It leaves superfluous \\ characters in the text */
if ((delim == '\'') || (delim == '"')) { /* delimitered */
for (;;) {
cptr++;
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
cptr++; /* move on past */
if (!isspace(*cptr)) /* error, no space after closing quote */
return -1;
*cptr++ = '\0'; /* delimit */
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
return -1;
}
} else { /* not quoted */
while ((*cptr != '\0') && (!isspace(*cptr)) && (*cptr != '\n'))
cptr++;
if (*cptr != '\0') /* not end of the line, */
*cptr++ = '\0'; /* delimit and move to the next */
}
}
}
/* Find a keyword and return an index */
enum keyword
get_keyword(char *name, struct keywordset *keywordset)
{
int i;
struct _keywords *keywords = keywordset->k; /* point to the keywords */
for (i = 0; i < keywordset->size; i++)
if (!strcmp(name, keywords[i].name))
return (enum keyword) keywords[i].keyword;
return kw_invalid_keyword;
}

882
sys/modules/vinum/request.c Normal file
View File

@ -0,0 +1,882 @@
/* XXX to do:
* Decide where we need splbio ()
*/
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: request.c,v 1.17 1998/08/13 06:04:47 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
#include <miscfs/specfs/specdev.h>
#include <sys/resourcevar.h>
/* pointer to ioctl p parameter, to save passing it around */
extern struct proc *myproc;
enum requeststatus bre(struct request *rq,
int plexno,
daddr_t * diskstart,
daddr_t diskend);
enum requeststatus bre5(struct request *rq,
int plexno,
daddr_t * diskstart,
daddr_t diskend);
enum requeststatus build_read_request(struct request *rq, int volplexno);
enum requeststatus build_write_request(struct request *rq);
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
void freerq(struct request *rq);
void free_rqg(struct rqgroup *rqg);
int find_alternate_sd(struct request *rq);
int check_range_covered(struct request *);
void complete_rqe(struct buf *bp);
void complete_raid5_write(struct rqelement *);
int abortrequest(struct request *rq, int error);
void sdio(struct buf *bp);
void sdio_done(struct buf *bp);
int vinum_bounds_check(struct buf *bp, struct volume *vol);
caddr_t allocdatabuf(struct rqelement *rqe);
void freedatabuf(struct rqelement *rqe);
void
vinumstrategy(struct buf *bp)
{
BROKEN_GDB;
int volno;
struct volume *vol = NULL;
int s;
struct devcode *device = (struct devcode *) &bp->b_dev; /* decode device number */
enum requeststatus status;
switch (device->type) {
case VINUM_SD_TYPE:
sdio(bp);
return;
/* In fact, vinum doesn't handle drives: they're
* handled directly by the disk drivers */
case VINUM_DRIVE_TYPE:
default:
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
biodone(bp);
return;
case VINUM_VOLUME_TYPE: /* volume I/O */
volno = VOLNO(bp->b_dev);
vol = &VOL[volno];
if (vol->state != volume_up) { /* can't access this volume */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
biodone(bp);
return;
}
if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */
biodone(bp); /* have nothing to do with this */
return;
}
/* FALLTHROUGH */
/* Plex I/O is pretty much the same as volume I/O
* for a single plex. Indicate this by passing a NULL
* pointer (set above) for the volume */
case VINUM_PLEX_TYPE:
bp->b_resid = bp->b_bcount; /* transfer everything */
vinumstart(bp, 0);
return;
}
}
/* Start a transfer. Return -1 on error,
* 0 if OK, 1 if we need to retry.
* Parameter reviveok is set when doing
* transfers for revives: it allows transfers to
* be started immediately when a revive is in
* progress. During revive, normal transfers
* are queued if they share address space with
* a currently active revive operation. */
int
vinumstart(struct buf *bp, int reviveok)
{
BROKEN_GDB;
int plexno;
int maxplex; /* maximum number of plexes to handle */
struct volume *vol;
struct rqgroup *rqg; /* current plex's requests */
struct rqelement *rqe; /* individual element */
struct request *rq; /* build up our request here */
int rqno; /* index in request list */
enum requeststatus status;
/* XXX In these routines, we're assuming that
* we will always be called with bp->b_bcount
* which is a multiple of the sector size. This
* is a reasonable assumption, since we are only
* called from system routines. Should we check
* anyway? */
if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */
bp->b_error = EINVAL; /* invalid size */
bp->b_flags |= B_ERROR;
biodone(bp);
return -1;
}
rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
if (rq == NULL) { /* can't do it */
bp->b_error = ENOMEM; /* can't get memory */
bp->b_flags |= B_ERROR;
biodone(bp);
return -1;
}
bzero(rq, sizeof(struct request));
/* Note the volume ID. This can be NULL, which
* the request building functions use as an
* indication for single plex I/O */
rq->bp = bp; /* and the user buffer struct */
if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */
rq->volplex.volno = VOLNO(bp->b_dev); /* get the volume number */
vol = &VOL[rq->volplex.volno]; /* and point to it */
vol->active++; /* one more active request */
maxplex = vol->plexes; /* consider all its plexes */
} else {
vol = NULL; /* no volume */
rq->volplex.plexno = PLEXNO(bp->b_dev); /* point to the plex */
rq->isplex = 1; /* note that it's a plex */
maxplex = 1; /* just the one plex */
}
if (bp->b_flags & B_READ) {
/* This is a read request. Decide
* which plex to read from.
*
* There's a potential race condition here,
* since we're not locked, and we could end
* up multiply incrementing the round-robin
* counter. This doesn't have any serious
* effects, however. */
if (vol != NULL) {
vol->reads++;
vol->bytes_read += bp->b_bcount;
plexno = vol->preferred_plex; /* get the plex to use */
if (plexno < 0) { /* round robin */
plexno = vol->last_plex_read;
vol->last_plex_read++;
if (vol->last_plex_read == vol->plexes) /* got the the end? */
vol->last_plex_read = 0; /* wrap around */
}
status = build_read_request(rq, plexno); /* build a request */
} else {
daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */
status = bre(rq, /* build a request list */
rq->volplex.plexno,
&diskaddr,
diskaddr + (bp->b_bcount / DEV_BSIZE));
}
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
if (status == REQUEST_DOWN) { /* not enough subdisks */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
}
biodone(bp);
freerq(rq);
return -1;
}
return launch_requests(rq, reviveok); /* now start the requests if we can */
} else
/* This is a write operation. We write to all
* plexes. If this is a RAID 5 plex, we must also
* update the parity stripe. */
{
if (vol != NULL) {
vol->writes++;
vol->bytes_written += bp->b_bcount;
status = build_write_request(rq); /* Not all the subdisks are up */
} else { /* plex I/O */
daddr_t diskstart;
diskstart = bp->b_blkno; /* start offset of transfer */
status = bre(rq,
PLEXNO(bp->b_dev),
&diskstart,
bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */
}
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
if (status == REQUEST_DOWN) { /* not enough subdisks */
bp->b_error = EIO; /* I/O error */
bp->b_flags |= B_ERROR;
}
if ((bp->b_flags & B_DONE) == 0)
biodone(bp);
freerq(rq);
return -1;
}
return launch_requests(rq, reviveok); /* start the requests */
}
}
/* Call the low-level strategy routines to
* perform the requests in a struct request */
int
launch_requests(struct request *rq, int reviveok)
{
struct rqgroup *rqg;
int rqno; /* loop index */
struct rqelement *rqe; /* current element */
int s;
/* First find out whether we're reviving, and the
* request contains a conflict. If so, we hang
* the request off plex->waitlist of the first
* plex we find which is reviving */
if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */
&&(!reviveok)) { /* and we don't want to do it now, */
struct volume *vol = &VOL[VOLNO(rq->bp->b_dev)];
struct plex *plex;
int plexno;
for (plexno = 0; plexno < vol->plexes; plexno++) { /* find the reviving plex */
plex = &PLEX[vol->plex[plexno]];
if (plex->state == plex_reviving) /* found it */
break;
}
if (plexno < vol->plexes) { /* found it? */
struct request *waitlist = plex->waitlist; /* point to the waiting list */
while (waitlist->next != NULL) /* find the end */
waitlist = waitlist->next;
waitlist->next = rq; /* hook our request there */
return 0; /* and get out of here */
} else /* bad vinum, bad */
printf("vinum: can't find reviving plex for volume %s\n", vol->name);
}
rq->active = 0; /* nothing yet */
/* XXX This is probably due to a bug */
if (rq->rqg == NULL) { /* no request */
abortrequest(rq, EINVAL);
return -1;
}
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf("Request: %x\nWrite dev 0x%x, offset 0x%x, length %ld\n",
(u_int) rq,
rq->bp->b_dev,
rq->bp->b_blkno,
rq->bp->b_bcount); /* XXX */
vinum_conf.lastrq = (int) rq;
vinum_conf.lastbuf = rq->bp;
#endif
for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */
rqg->active = rqg->count; /* they're all active */
rq->active++; /* one more active request group */
for (rqno = 0; rqno < rqg->count; rqno++) {
rqe = &rqg->rqe[rqno];
if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */
rqg->active--; /* one less active request */
else {
struct drive *drive = &DRIVE[rqe->driveno]; /* drive to access */
if ((rqe->b.b_flags & B_READ) == 0)
rqe->b.b_vp->v_numoutput++; /* one more output going */
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
rqe->b.b_flags & B_READ ? "Read" : "Write",
rqe->b.b_dev,
rqe->sdno,
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
rqe->b.b_blkno,
rqe->b.b_bcount); /* XXX */
if (debug & DEBUG_NUMOUTPUT)
printf(" vinumstart sd %d numoutput %ld\n",
rqe->sdno,
rqe->b.b_vp->v_numoutput);
#endif
/* fire off the request */
s = splbio();
(*bdevsw[major(rqe->b.b_dev)]->d_strategy) (&rqe->b);
splx(s);
}
/* XXX Do we need caching? Think about this more */
}
}
return 0;
}
/* define the low-level requests needed to perform a
* high-level I/O operation for a specific plex 'plexno'.
*
* Return 0 if all subdisks involved in the request are up, 1 if some
* subdisks are not up, and -1 if the request is at least partially
* outside the bounds of the subdisks.
*
* Modify the pointer *diskstart to point to the end address. On
* read, return on the first bad subdisk, so that the caller
* (build_read_request) can try alternatives.
*
* On entry to this routine, the rqg structures are not assigned. The
* assignment is performed by expandrq(). Strictly speaking, the
* elements rqe->sdno of all entries should be set to -1, since 0
* (from bzero) is a valid subdisk number. We avoid this problem by
* initializing the ones we use, and not looking at the others (index
* >= rqg->requests).
*/
enum requeststatus
bre(struct request *rq,
int plexno,
daddr_t * diskaddr,
daddr_t diskend)
{
BROKEN_GDB;
int sdno;
struct sd *sd;
struct rqgroup *rqg;
struct buf *bp; /* user's bp */
struct plex *plex;
enum requeststatus status; /* return value */
daddr_t plexoffset; /* offset of transfer in plex */
daddr_t stripebase; /* base address of stripe (1st subdisk) */
daddr_t stripeoffset; /* offset in stripe */
daddr_t blockoffset; /* offset in stripe on subdisk */
struct rqelement *rqe; /* point to this request information */
daddr_t diskstart = *diskaddr; /* remember where this transfer starts */
bp = rq->bp; /* buffer pointer */
status = REQUEST_OK; /* return value: OK until proven otherwise */
plex = &PLEX[plexno]; /* point to the plex */
switch (plex->organization) {
case plex_concat:
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
if ((*diskaddr < (sd->plexoffset + sd->sectors)) /* The request starts before the end of this */
&&(diskend > sd->plexoffset)) { /* subdisk and ends after the start of this sd */
if ((sd->state != sd_up) || (plex->state != plex_up)) {
enum requeststatus s;
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
if (s) /* give up? */
return s; /* yup */
}
rqg = allocrqg(rq, 1); /* space for the request */
if (rqg == NULL) { /* malloc failed */
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM;
}
rqg->plexno = plexno;
rqe = &rqg->rqe[0]; /* point to the element */
rqe->rqg = rqg; /* group */
rqe->sdno = sd->sdno; /* put in the subdisk number */
plexoffset = max(sd->plexoffset, *diskaddr); /* start offset in plex */
rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */
rqe->dataoffset = 0;
rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */
sd->sectors - rqe->sdoffset);
rqe->groupoffset = 0; /* no groups for concatenated plexes */
rqe->grouplen = 0;
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
rqe->flags = 0;
rqe->driveno = sd->driveno;
*diskaddr += rqe->datalen; /* bump the address */
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
deallocrqg(rqg);
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM; /* can't do it */
}
}
if (*diskaddr > diskend) /* we're finished, */
break; /* get out of here */
}
break;
case plex_striped:
{
while (*diskaddr < diskend) { /* until we get it all sorted out */
/* The offset of the start address from
* the start of the stripe */
stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
/* The plex-relative address of the
* start of the stripe */
stripebase = *diskaddr - stripeoffset;
/* The number of the subdisk in which
* the start is located */
sdno = stripeoffset / plex->stripesize;
/* The offset from the beginning of the stripe
* on this subdisk */
blockoffset = stripeoffset % plex->stripesize;
sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */
if ((sd->state != sd_up) || (plex->state != plex_up)) {
enum requeststatus s;
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
if (s) /* give up? */
return s; /* yup */
}
rqg = allocrqg(rq, 1); /* space for the request */
if (rqg == NULL) { /* malloc failed */
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM;
}
rqg->plexno = plexno;
rqe = &rqg->rqe[0]; /* point to the element */
rqe->rqg = rqg;
rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */
rqe->dataoffset = 0;
rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */
plex->stripesize - blockoffset); /* and the amount left in this stripe */
rqe->groupoffset = 0; /* no groups for striped plexes */
rqe->grouplen = 0;
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
rqe->flags = 0;
rqe->sdno = sd->sdno; /* put in the subdisk number */
rqe->driveno = sd->driveno;
if (rqe->sdoffset >= sd->sectors) { /* starts beyond the end of the subdisk? */
deallocrqg(rqg);
return REQUEST_EOF;
} else if (rqe->sdoffset + rqe->datalen > sd->sectors) /* ends beyond the end of the subdisk? */
rqe->datalen = sd->sectors - rqe->sdoffset; /* yes, truncate */
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
deallocrqg(rqg);
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return REQUEST_ENOMEM; /* can't do it */
}
*diskaddr += rqe->datalen; /* look at the remainder */
if (*diskaddr < diskend) { /* didn't finish the request on this stripe */
plex->multiblock++; /* count another one */
if (sdno == plex->subdisks - 1) /* last subdisk, */
plex->multistripe++; /* another stripe as well */
}
}
}
break;
default:
printf("vinum: invalid plex type in bre");
}
return status;
}
/* Build up a request structure for reading volumes.
* This function is not needed for plex reads, since there's
* no recovery if a plex read can't be satisified. */
enum requeststatus
build_read_request(struct request *rq, /* request */
int plexindex)
{ /* index in the volume's plex table */
BROKEN_GDB;
struct buf *bp;
daddr_t startaddr; /* offset of previous part of transfer */
daddr_t diskaddr; /* offset of current part of transfer */
daddr_t diskend; /* and end offset of transfer */
int plexno; /* plex index in vinum_conf */
struct rqgroup *rqg; /* point to the request we're working on */
struct volume *vol; /* volume in question */
off_t oldstart; /* note where we started */
int recovered = 0; /* set if we recover a read */
enum requeststatus status = REQUEST_OK;
bp = rq->bp; /* buffer pointer */
diskaddr = bp->b_blkno; /* start offset of transfer */
diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */
rqg = &rq->rqg[plexindex]; /* plex request */
vol = &VOL[rq->volplex.volno]; /* point to volume */
while (diskaddr < diskend) { /* build up request components */
startaddr = diskaddr;
status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
switch (status) {
case REQUEST_OK:
continue;
case REQUEST_RECOVERED:
recovered = 1;
break;
case REQUEST_EOF:
case REQUEST_ENOMEM:
return status;
/* if we get here, we have either had a failure or
* a RAID 5 recovery. We don't want to use the
* recovery, because it's expensive, so first we
* check if we have alternatives */
case REQUEST_DOWN: /* can't access the plex */
if (vol != NULL) { /* and this is volume I/O */
/* Try to satisfy the request
* from another plex */
for (plexno = 0; plexno < vol->plexes; plexno++) {
diskaddr = startaddr; /* start at the beginning again */
oldstart = startaddr; /* and note where that was */
if (plexno != plexindex) { /* don't try this plex again */
bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
if (diskaddr > oldstart) { /* we satisfied another part */
recovered = 1; /* we recovered from the problem */
status = REQUEST_OK; /* don't complain about it */
break;
}
}
if (plexno == (vol->plexes - 1)) /* couldn't satisfy the request */
return REQUEST_DOWN; /* failed */
}
} else
return REQUEST_DOWN; /* bad luck */
}
if (recovered)
vol->recovered_reads += recovered; /* adjust our recovery count */
}
return status;
}
/* Build up a request structure for writes.
* Return 0 if all subdisks involved in the request are up, 1 if some
* subdisks are not up, and -1 if the request is at least partially
* outside the bounds of the subdisks. */
enum requeststatus
build_write_request(struct request *rq)
{ /* request */
BROKEN_GDB;
struct buf *bp;
daddr_t diskstart; /* offset of current part of transfer */
daddr_t diskend; /* and end offset of transfer */
int plexno; /* plex index in vinum_conf */
struct volume *vol; /* volume in question */
enum requeststatus status;
bp = rq->bp; /* buffer pointer */
vol = &VOL[rq->volplex.volno]; /* point to volume */
diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */
status = REQUEST_OK;
for (plexno = 0; plexno < vol->plexes; plexno++) {
diskstart = bp->b_blkno; /* start offset of transfer */
status = min(status, bre(rq, /* build requests for the plex */
vol->plex[plexno],
&diskstart,
diskend));
}
return status;
}
/* Fill in the struct buf part of a request element. */
enum requeststatus
build_rq_buffer(struct rqelement *rqe, struct plex *plex)
{
BROKEN_GDB;
struct sd *sd; /* point to subdisk */
struct volume *vol;
struct buf *bp;
struct buf *ubp; /* user (high level) buffer header */
vol = &VOL[rqe->rqg->rq->volplex.volno];
sd = &SD[rqe->sdno]; /* point to subdisk */
bp = &rqe->b;
ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */
/* Initialize the buf struct */
bzero(&rqe->b, sizeof(struct buf));
bp->b_proc = ubp->b_proc; /* process pointer */
bp->b_flags = ubp->b_flags & (B_NOCACHE | B_READ | B_ASYNC); /* copy these flags from user bp */
bp->b_flags |= B_CALL | B_BUSY; /* inform us when it's done */
if (plex->state == plex_reviving)
bp->b_flags |= B_ORDERED; /* keep request order if we're reviving */
bp->b_iodone = complete_rqe; /* by calling us here */
bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */
bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */
bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */
bp->b_resid = bp->b_bcount; /* and it's still all waiting */
bp->b_bufsize = bp->b_bcount; /* and buffer size */
bp->b_vp = DRIVE[rqe->driveno].vp; /* drive vnode */
bp->b_rcred = FSCRED; /* we have the file system credentials */
bp->b_wcred = FSCRED; /* we have the file system credentials */
if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */
bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */
if (bp->b_data == NULL) { /* failed */
Debugger("XXX");
abortrequest(rqe->rqg->rq, ENOMEM);
return REQUEST_ENOMEM; /* no memory */
}
} else
/* Point directly to user buffer data. This means
* that we don't need to do anything when we have
* finished the transfer */
bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
return 0;
}
/* Abort a request: free resources and complete the
* user request with the specified error */
int
abortrequest(struct request *rq, int error)
{
struct buf *bp = rq->bp; /* user buffer */
bp->b_flags |= B_ERROR;
bp->b_error = error;
freerq(rq); /* free everything we're doing */
biodone(bp);
return error; /* and give up */
}
/* Check that our transfer will cover the
* complete address space of the user request.
*
* Return 1 if it can, otherwise 0 */
int
check_range_covered(struct request *rq)
{
/* XXX */
return 1;
}
/* Perform I/O on a subdisk */
void
sdio(struct buf *bp)
{
int s; /* spl */
struct sd *sd;
struct sdbuf *sbp;
daddr_t endoffset;
struct drive *drive;
sd = &SD[SDNO(bp->b_dev)]; /* point to the subdisk */
drive = &DRIVE[sd->driveno];
if (drive->state != drive_up) { /* XXX until we get the states fixed */
set_sd_state(SDNO(bp->b_dev), sd_obsolete, setstate_force);
bp->b_flags |= B_ERROR;
bp->b_error = EIO;
biodone(bp);
return;
}
/* XXX decide which states we will really accept here. up
* implies it could be involved with a plex, in which
* case we don't want to dick with it */
if ((sd->state != sd_up)
&& (sd->state != sd_initializing)
&& (sd->state != sd_reborn)) { /* we can't access it */
bp->b_flags |= B_ERROR;
bp->b_flags = EIO;
if (bp->b_flags & B_BUSY) /* XXX why isn't this always the case? */
biodone(bp);
return;
}
/* Get a buffer */
sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
if (sbp == NULL) {
bp->b_flags |= B_ERROR;
bp->b_error = ENOMEM;
biodone(bp);
return;
}
bcopy(bp, &sbp->b, sizeof(struct buf)); /* start with the user's buffer */
sbp->b.b_flags |= B_CALL; /* tell us when it's done */
sbp->b.b_iodone = sdio_done; /* here */
sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */
sbp->b.b_vp = DRIVE[sd->driveno].vp; /* vnode */
sbp->b.b_blkno += sd->driveoffset;
sbp->bp = bp; /* note the address of the original header */
sbp->sdno = sd->sdno; /* note for statistics */
sbp->driveno = sd->driveno;
endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */
if (endoffset > sd->sectors) { /* beyond the end */
sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
if (sbp->b.b_bcount <= 0) { /* nothing to transfer */
bp->b_resid = bp->b_bcount; /* nothing transferred */
/* XXX Grrr. This doesn't seem to work. Return
* an error after all */
bp->b_flags |= B_ERROR;
bp->b_error = ENOSPC;
biodone(bp);
Free(sbp);
return;
}
}
if ((sbp->b.b_flags & B_READ) == 0) /* write */
sbp->b.b_vp->v_numoutput++; /* one more output going */
#if DEBUG
if (debug & DEBUG_ADDRESSES)
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
sbp->b.b_flags & B_READ ? "Read" : "Write",
sbp->b.b_dev,
sbp->sdno,
(u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
(int) sbp->b.b_blkno,
sbp->b.b_bcount); /* XXX */
if (debug & DEBUG_NUMOUTPUT)
printf(" vinumstart sd %d numoutput %ld\n",
sbp->sdno,
sbp->b.b_vp->v_numoutput);
#endif
s = splbio();
(*bdevsw[major(sbp->b.b_dev)]->d_strategy) (&sbp->b);
splx(s);
}
/* Simplified version of bounds_check_with_label
* Determine the size of the transfer, and make sure it is
* within the boundaries of the partition. Adjust transfer
* if needed, and signal errors or early completion.
*
* Volumes are simpler than disk slices: they only contain
* one component (though we call them a, b and c to make
* system utilities happy), and they always take up the
* complete space of the "partition".
*
* I'm still not happy with this: why should the label be
* protected? If it weren't so damned difficult to write
* one in the first pleace (because it's protected), it wouldn't
* be a problem.
*/
int
vinum_bounds_check(struct buf *bp, struct volume *vol)
{
int maxsize = vol->size; /* size of the partition (sectors) */
int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
/* Would this transfer overwrite the disk label? */
if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */
#if LABELSECTOR != 0
&& bp->b_blkno + size > LABELSECTOR /* and finishes after */
#endif
&& (!(vol->flags & VF_RAW)) /* and it's not raw */
&&major(bp->b_dev) == BDEV_MAJOR /* and it's the block device */
&& (bp->b_flags & B_READ) == 0 /* and it's a write */
&& (!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */
bp->b_error = EROFS; /* read-only */
bp->b_flags |= B_ERROR;
return -1;
}
if (size == 0) /* no transfer specified, */
return 0; /* treat as EOF */
/* beyond partition? */
if (bp->b_blkno < 0 /* negative start */
|| bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */
/* if exactly at end of disk, return an EOF */
if (bp->b_blkno == maxsize) {
bp->b_resid = bp->b_bcount;
return 0;
}
/* or truncate if part of it fits */
size = maxsize - bp->b_blkno;
if (size <= 0) { /* nothing to transfer */
bp->b_error = EINVAL;
bp->b_flags |= B_ERROR;
return -1;
}
bp->b_bcount = size << DEV_BSHIFT;
}
bp->b_pblkno = bp->b_blkno;
return 1;
}
/* Allocate a request group and hook
* it in in the list for rq */
struct rqgroup *
allocrqg(struct request *rq, int elements)
{
struct rqgroup *rqg; /* the one we're going to allocate */
int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
rqg = (struct rqgroup *) Malloc(size);
if (rqg != NULL) { /* malloc OK, */
if (rq->rqg) /* we already have requests */
rq->lrqg->next = rqg; /* hang it off the end */
else /* first request */
rq->rqg = rqg; /* at the start */
rq->lrqg = rqg; /* this one is the last in the list */
bzero(rqg, size); /* no old junk */
rqg->rq = rq; /* point back to the parent request */
rqg->count = elements; /* number of requests in the group */
} else
Debugger("XXX");
return rqg;
}
/* Deallocate a request group out of a chain. We do
* this by linear search: the chain is short, this
* almost never happens, and currently it can only
* happen to the first member of the chain. */
void
deallocrqg(struct rqgroup *rqg)
{
struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */
if (rqg->rq->rqg == rqg) /* we're first in line */
rqg->rq->rqg = rqg->next; /* unhook ourselves */
else {
while (rqgc->next != rqg) /* find the group */
rqgc = rqgc->next;
rqgc->next = rqg->next;
}
Free(rqgc);
}
/* Character device interface */
int
vinumread(dev_t dev, struct uio *uio, int ioflag)
{
return (physio(vinumstrategy, NULL, dev, 1, minphys, uio));
}
int
vinumwrite(dev_t dev, struct uio *uio, int ioflag)
{
return (physio(vinumstrategy, NULL, dev, 0, minphys, uio));
}

159
sys/modules/vinum/request.h Normal file
View File

@ -0,0 +1,159 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: request.h,v 1.10 1998/08/03 07:15:26 grog Exp grog $
*/
/* Information needed to set up a transfer */
/* struct buf is surprisingly big (about 300
* bytes), and it's part of the request, so this
* value is really important. Most requests
* don't need more than 2 subrequests per
* plex. The table is automatically extended if
* this value is too small. */
#define RQELTS 2 /* default of 2 requests per transfer */
enum xferinfo {
XFR_NORMAL_READ = 1,
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
XFR_RECOVERY_READ = 4,
XFR_DEGRADED_WRITE = 8,
XFR_PARITYLESS_WRITE = 0x10,
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
XFR_DATA_BLOCK = 0x40, /* data block in request */
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
XFR_MALLOCED = 0x200, /* this buffer is malloced */
#if DEBUG
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
#endif
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
/* operations that need a parity block */
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
/* operations that use the group parameters */
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
/* operations that that use the data parameters */
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
/* operations requiring read before write */
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
/* operations that need a malloced buffer */
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
};
/* Describe one low-level request, part
* of a high-level request. This is an
* extended struct buf buffer, and the first
* element *must* be a struct buf. We pass this structure
* to the I/O routines instead of a struct buf in oder
* to be able to locate the high-level request when it
* completes.
*
* All offsets and lengths are in "blocks", i.e. sectors */
struct rqelement {
struct buf b; /* buf structure */
struct rqgroup *rqg; /* pointer to our group */
/* Information about the transfer */
daddr_t sdoffset; /* offset in subdisk */
int useroffset; /* offset in user buffer of normal data */
/* dataoffset and datalen refer to "individual"
* data transfers (normal read, parityless write)
* and also degraded write.
*
* groupoffset and grouplen refer to the other
* "group" operations (normal write, recovery read)
* Both the offsets are relative to the start of the
* local buffer */
int dataoffset; /* offset in buffer of the normal data */
int groupoffset; /* offset in buffer of group data */
short datalen; /* length of normal data (sectors) */
short grouplen; /* length of group data (sectors) */
short buflen; /* total buffer length to allocate */
short flags; /* really enum xferinfo (see above) */
/* Ways to find other components */
short sdno; /* subdisk number */
short driveno; /* drive number */
};
/* A group of requests built to satisfy a certain
* component of a user request */
struct rqgroup {
struct rqgroup *next; /* pointer to next group */
struct request *rq; /* pointer to the request */
short count; /* number of requests in this group */
short active; /* and number active */
short plexno; /* index of plex */
int badsdno; /* index of bad subdisk or -1 */
enum xferinfo flags; /* description of transfer */
struct rqelement rqe[0]; /* and the elements of this request */
};
/* Describe one high-level request and the
* work we have to do to satisfy it */
struct request {
struct buf *bp; /* pointer to the high-level request */
int flags;
union {
int volno; /* volume index */
int plexno; /* or plex index */
} volplex;
int error; /* current error indication */
short isplex; /* set if this is a plex request */
short active; /* number of subrequests still active */
struct rqgroup *rqg; /* pointer to the first group of requests */
struct rqgroup *lrqg; /* and to the first group of requests */
struct request *next; /* link of waiting requests */
};
/* Extended buffer header for subdisk I/O. Includes
* a pointer to the user I/O request. */
struct sdbuf {
struct buf b; /* our buffer */
struct buf *bp; /* and pointer to parent */
short driveno; /* drive index */
short sdno; /* and subdisk index */
};
/* Values returned by rqe and friends.
* Be careful with these: they are in order of increasing
* seriousness. Some routines check for > REQUEST_RECOVERED
* to indicate a completely failed request. */
enum requeststatus {
REQUEST_OK, /* request built OK */
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
REQUEST_EOF, /* request failed: outside plex */
REQUEST_DOWN, /* request failed: subdisk down */
REQUEST_ENOMEM /* ran out of memory */
};

128
sys/modules/vinum/revive.c Normal file
View File

@ -0,0 +1,128 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: revive.c,v 1.1 1998/08/14 06:16:59 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
/* revive a block of a plex. Return an error
* indication. EAGAIN means successful copy, but
* that more blocks remain to be copied.
* XXX We should specify a block size here. At the moment,
* just take a default value. FIXME */
int
revive_block(int plexno)
{
struct plex *plex = &PLEX[plexno];
struct buf *bp;
int error = EAGAIN;
int size;
int s; /* priority level */
if (plex->revive_blocksize == 0) {
if (plex->stripesize != 0) /* we're striped, don't revive more than */
plex->revive_blocksize = min(DEFAULT_REVIVE_BLOCKSIZE, plex->stripesize); /* one block at a time */
else
plex->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
}
size = min(plex->revive_blocksize, plex->length - plex->revived) << DEV_BSHIFT;
s = splbio();
/* Get a buffer */
bp = geteblk(size);
if (bp == NULL) {
splx(s);
return ENOMEM;
}
if (bp->b_qindex != 0) /* on a queue, */
bremfree(bp); /* remove it */
splx(s);
/* Amount to transfer: block size, unless it
* would overlap the end */
bp->b_bufsize = size;
bp->b_bcount = bp->b_bufsize;
bp->b_resid = 0x0;
bp->b_blkno = plex->revived; /* we've got this far */
/* XXX what about reviving anonymous plexes? */
/* First, read the data from the volume. We don't
* care which plex, that's bre's job */
bp->b_dev = VINUMBDEV(plex->volno, 0, 0, VINUM_VOLUME_TYPE); /* create the device number */
bp->b_flags = B_BUSY | B_READ;
vinumstart(bp, 1);
biowait(bp);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else
/* Now write to the plex */
{
s = splbio();
if (bp->b_qindex != 0) /* on a queue, */
bremfree(bp); /* remove it */
splx(s);
bp->b_dev = VINUMBDEV(plex->volno, plex->volplexno, 0, VINUM_PLEX_TYPE); /* create the device number */
bp->b_flags = B_BUSY; /* make this a write */
bp->b_resid = 0x0;
vinumstart(bp, 1);
biowait(bp);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else {
plex->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
if (plex->revived >= plex->length) { /* finished */
plex->revived = 0;
plex->state = plex_up; /* do we need to do more? */
if (plex->volno >= 0) /* we have a volume, */
set_volume_state(plex->volno, volume_up, 0);
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
save_config(); /* and save the updated configuration */
error = 0; /* we're done */
}
}
while (plex->waitlist) { /* we have waiting requests */
launch_requests(plex->waitlist, 1); /* do them now */
plex->waitlist = plex->waitlist->next; /* and move on to the next */
}
}
if (bp->b_qindex == 0) /* not on a queue, */
brelse(bp); /* is this kosher? */
return error;
}

755
sys/modules/vinum/state.c Normal file
View File

@ -0,0 +1,755 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: state.c,v 2.6 1998/08/19 08:04:47 grog Exp grog $
*/
#define REALLYKERNEL
#include "vinumhdr.h"
#include "request.h"
/* Update drive state */
/* Return 1 if the state changes, otherwise 0 */
int
set_drive_state(int driveno, enum drivestate state, int flags)
{
struct drive *drive = &DRIVE[driveno];
int oldstate = drive->state;
int sdno;
if (drive->state == drive_unallocated) /* no drive to do anything with, */
return 0;
if (state != oldstate) { /* don't change it if it's not different */
if (state == drive_down) { /* the drive's going down */
if (flags || (drive->opencount == 0)) { /* we can do it */
close_drive(drive);
drive->state = state;
printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
} else
return 0; /* don't do it */
}
drive->state = state; /* set the state */
if (((drive->state == drive_up)
|| ((drive->state == drive_coming_up)))
&& (drive->vp == NULL)) /* should be open, but we're not */
init_drive(drive); /* which changes the state again */
if ((state != oldstate) /* state has changed */
&&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
if (SD[sdno].driveno == driveno) /* belongs to this drive */
set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
}
save_config(); /* and save the updated configuration */
return 1;
}
}
return 0;
}
/* Try to set the subdisk state. Return 1 if state changed to
* what we wanted, -1 if it changed to something else, and 0
* if no change.
*
* This routine is called both from the user (up, down states
* only) and internally.
*/
int
set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
{
struct sd *sd = &SD[sdno];
int oldstate = sd->state;
int status = 1; /* status to return */
if (state == oldstate)
return 0; /* no change */
if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
return 0;
if (sd->driveoffset < 0) { /* not allocated space */
sd->state = sd_down;
if (state != sd_down)
return -1;
} else { /* space allocated */
switch (state) {
case sd_down:
if ((!flags & setstate_force) /* but gently */
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
return 0; /* don't do it */
break;
case sd_up:
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
return 0; /* not even by force */
switch (sd->state) {
case sd_obsolete:
case sd_down: /* been down, no data lost */
if ((sd->plexno) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
break;
/* XXX Get this right: make sure that other plexes in
* the volume cover this address space, otherwise
* we make this one sd_up */
sd->state = sd_reborn; /* here it is again */
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
status = -1;
break;
case sd_init: /* brand new */
if (flags & setstate_configuring) /* we're doing this while configuring */
break;
sd->state = sd_empty; /* nothing in it */
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
status = -1;
break;
case sd_initializing:
break; /* go on and do it */
case sd_empty:
if ((sd->plexno) /* we're associated with a plex */
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
break;
return 0; /* can't do it */
default: /* can't do it */
/* There's no way to bring subdisks up directly from
* other states. First they need to be initialized
* or revived */
return 0;
}
break;
default: /* other ones, only internal with force */
if (flags & setstate_force == 0) /* no force? What's this? */
return 0; /* don't do it */
}
}
sd->state = state;
printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
if ((flags & setstate_norecurse) == 0)
set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return status;
}
/* Called from request routines when they find
* a subdisk which is not kosher. Decide whether
* it warrants changing the state. Return
* REQUEST_DOWN if we can't use the subdisk,
* REQUEST_OK if we can. */
enum requeststatus
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
{
struct plex *plex = &PLEX[sd->plexno];
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
/* first, see if the plex wants to be accessed */
switch (plex->state) {
case plex_reviving:
/* When writing, we'll write anything that starts
* up to the current revive pointer, but we'll
* only accept a read which finishes before the
* current revive pointer.
*/
if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
if (writeop) { /* writing to a consistent down disk */
if (DRIVE[sd->driveno].state == drive_up)
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
else
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
}
return REQUEST_DOWN; /* that part of the plex is still down */
} else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
/* FALLTHROUGH */
case plex_up:
case plex_degraded:
case plex_flaky:
/* We can access the plex: let's see
* how the subdisk feels */
switch (sd->state) {
case sd_up:
return REQUEST_OK;
case sd_reborn:
if (writeop)
return REQUEST_OK; /* always write to a reborn disk */
/* Handle the mapping. We don't want to reject
* a read request to a reborn subdisk if that's
* all we have. XXX */
return REQUEST_DOWN;
case sd_down:
case sd_crashed:
if (writeop) { /* writing to a consistent down disk */
if (DRIVE[sd->driveno].state == drive_up)
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
else
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
}
return REQUEST_DOWN; /* and it's down one way or another */
default:
return REQUEST_DOWN;
}
default:
return REQUEST_DOWN;
}
}
void
add_defective_region(struct plex *plex, off_t offset, size_t length)
{
/* XXX get this ordered, and coalesce regions if necessary */
if (++plex->defective_regions > plex->defective_region_count)
EXPAND(plex->defective_region,
struct plexregion,
plex->defective_region_count,
PLEX_REGION_TABLE_SIZE);
plex->defective_region[plex->defective_regions - 1].offset = offset;
plex->defective_region[plex->defective_regions - 1].length = length;
}
void
add_unmapped_region(struct plex *plex, off_t offset, size_t length)
{
if (++plex->unmapped_regions > plex->unmapped_region_count)
EXPAND(plex->unmapped_region,
struct plexregion,
plex->unmapped_region_count,
PLEX_REGION_TABLE_SIZE);
plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
plex->unmapped_region[plex->unmapped_regions - 1].length = length;
}
/* Rebuild a plex free list and set state if
* we have a configuration error */
void
rebuild_plex_unmappedlist(struct plex *plex)
{
int sdno;
struct sd *sd;
int lastsdend = 0; /* end offset of last subdisk */
if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
Free(plex->unmapped_region);
plex->unmapped_region = NULL;
plex->unmapped_regions = 0;
plex->unmapped_region_count = 0;
}
if (plex->defective_region != NULL) {
Free(plex->defective_region);
plex->defective_region = NULL;
plex->defective_regions = 0;
plex->defective_region_count = 0;
}
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
if (sd->plexoffset < lastsdend) { /* overlap */
printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
} else if (sd->plexoffset > lastsdend) /* gap */
add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
else if (sd->state < sd_reborn) /* this part defective */
add_defective_region(plex, sd->plexoffset, sd->sectors);
lastsdend = sd->plexoffset + sd->sectors;
}
}
/* return a state map for the subdisks of a plex */
enum sdstates
sdstatemap(struct plex *plex, int *sddowncount)
{
int sdno;
enum sdstates statemap = 0; /* note the states we find */
*sddowncount = 0; /* no subdisks down yet */
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
switch (sd->state) {
case sd_empty:
statemap |= sd_emptystate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_init:
statemap |= sd_initstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_down:
statemap |= sd_downstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_crashed:
statemap |= sd_crashedstate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_obsolete:
statemap |= sd_obsolete;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_stale:
statemap |= sd_stalestate;
(*sddowncount)++; /* another unusable subdisk */
break;
case sd_reborn:
statemap |= sd_rebornstate;
break;
case sd_up:
statemap |= sd_upstate;
break;
default:
statemap |= sd_otherstate;
break;
}
}
return statemap;
}
/* determine the state of the volume relative to this plex */
enum volplexstate
vpstate(struct plex *plex)
{
struct volume *vol;
enum volplexstate state = volplex_onlyusdown; /* state to return */
int plexno;
if (plex->volno < 0) /* not associated with a volume */
return volplex_onlyusdown; /* assume the worst */
vol = &VOL[plex->volno]; /* point to our volume */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
state |= volplex_onlyus; /* yes */
} else {
if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
state |= volplex_otherup; /* and when they were up, they were up */
else
state |= volplex_alldown; /* and when they were down, they were down */
}
}
return state; /* and when they were only halfway up */
} /* they were neither up nor down */
/* Check if all bits b are set in a */
int allset(int a, int b);
int
allset(int a, int b)
{
return (a & b) == b;
}
/* Update the state of a plex dependent on its subdisks.
* Also rebuild the unmapped_region and defective_region table */
int
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
{
int sddowncount = 0; /* number of down subdisks */
struct plex *plex = &PLEX[plexno]; /* point to our plex */
enum plexstate oldstate = plex->state;
enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
return 0; /* no change */
if (plex->state == plex_unallocated) /* no plex to do anything with, */
return 0;
switch (state) {
case plex_up:
if ((plex->state == plex_initializing) /* we're initializing */
&&(statemap != sd_upstate)) /* but SDs aren't up yet */
return 0; /* do nothing */
/* We don't really care what our state was before
* if we want to come up. We rely entirely on the
* state of our subdisks and our volume */
switch (vps) {
case volplex_onlyusdown:
case volplex_alldown: /* another plex is down, and so are we */
if (statemap == sd_upstate) { /* all subdisks ready for action */
if ((plex->state == plex_init) /* we're brand spanking new */
&&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
/* Conceptually, an empty plex does not contain valid data,
* but normally we'll see this state when we have just
* created a plex, and it's either consistent from earlier,
* or we don't care about the previous contents (we're going
* to create a file system or use it for swap).
*
* We need to do this in one swell foop: on the next call
* we will no longer be just empty.
*
* We'll still come back to this function for the remaining
* plexes in the volume. They'll be up already, so that
* doesn't change anything, but it's not worth the additional
* code to stop doing it. */
struct volume *vol = &VOL[plex->volno];
int plexno;
for (plexno = 0; plexno < vol->plexes; plexno++)
PLEX[vol->plex[plexno]].state = plex_up;
}
plex->state = plex_up; /* bring up up, anyway */
} else
plex->state = plex_down;
break;
case volplex_onlyusup: /* only we are up: others are down */
case volplex_onlyus: /* we're up and alone */
if ((statemap == sd_upstate) /* subdisks all up */
||(statemap == sd_emptystate)) /* or all empty */
plex->state = plex_up; /* go for it */
else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
plex->state = plex_flaky;
else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
plex->state = plex_degraded; /* so far no corruption */
else
plex->state = plex_faulty;
break;
case volplex_otherup: /* another plex is up */
case volplex_otherupdown: /* other plexes are up and down */
if ((statemap == sd_upstate) /* subdisks all up */
||(statemap == sd_emptystate) /* or all empty */
) {
/* Is the data in all subdisks valid? */
if (statemap == statemap & (sd_downstate | sd_rebornstate | sd_upstate))
break; /* yes, we can bring the plex up */
plex->state = plex_reviving; /* we need reviving */
return EAGAIN;
} else
plex->state = plex_faulty; /* still in error */
break;
case volplex_allup: /* all plexes are up */
case volplex_someup:
if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
break; /* no change */
else
plex->state = plex_degraded; /* we're not all there */
}
if (plex->state != oldstate)
break;
return 0; /* no change */
case plex_down: /* want to take it down */
if (((vps == volplex_onlyus) /* we're the only one up */
||(vps == volplex_onlyusup)) /* we're the only one up */
&&(!(flags & setstate_force))) /* and we don't want to use force */
return 0; /* can't do it */
plex->state = state; /* do it */
break;
/* This is only requested by the driver.
* Trust ourselves */
case plex_faulty:
plex->state = state; /* do it */
break;
case plex_initializing:
/* XXX consider what safeguards we need here */
if ((flags & setstate_force) == 0)
return 0;
plex->state = state; /* do it */
break;
/* What's this? */
default:
return 0;
}
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
/* Now see what we have left, and whether
* we're taking the volume down */
if (plex->volno >= 0) { /* we have a volume */
struct volume *vol = &VOL[plex->volno];
vps = vpstate(plex); /* get our combined state again */
if ((flags & setstate_norecurse) == 0) { /* we can recurse */
if ((vol->state == volume_up)
&& (vps == volplex_alldown)) /* and we're all down */
set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
else if ((vol->state == volume_down)
&& (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
}
}
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
/* Update the state of a plex dependent on its plexes.
* Also rebuild the unmapped_region and defective_region table */
int
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
{
int plexno;
enum plexstates {
plex_downstate = 1, /* found a plex which is down */
plex_degradedstate = 2, /* found a plex which is halfway up */
plex_upstate = 4 /* found a plex which is completely up */
};
int plexstatemap = 0; /* note the states we find */
struct volume *vol = &VOL[volno]; /* point to our volume */
if (vol->state == state) /* we're there already */
return 0; /* no change */
if (vol->state == volume_unallocated) /* no volume to do anything with, */
return 0;
for (plexno = 0; plexno < vol->plexes; plexno++) {
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
switch (plex->state) {
case plex_degraded:
case plex_flaky:
case plex_reviving:
plexstatemap |= plex_degradedstate;
break;
case plex_up:
plexstatemap |= plex_upstate;
break;
default:
plexstatemap |= plex_downstate;
break;
}
}
if (state == volume_up) { /* want to come up */
if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
vol->state = volume_up; /* did it */
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
/* Here we should check whether we have enough
* coverage for the complete volume. Writeme XXX */
} else if (state == volume_down) { /* want to go down */
if ((vol->opencount == 0) /* not open */
||(flags & setstate_force != 0)) { /* or we're forcing */
vol->state = volume_down;
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
save_config();
return 1;
}
}
return 0; /* no change */
}
/* Start an object, in other words do what we can to get it up.
* This is called from vinumioctl (VINUMSTART).
* Return error indications via ioctl_reply
*/
void
start_object(struct vinum_ioctl_msg *data)
{
int status;
int realstatus; /* what we really have */
int objindex = data->index; /* data gets overwritten */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
switch (data->type) {
case drive_object:
status = set_drive_state(objindex, drive_up, setstate_none);
realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
break;
case sd_object:
status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
break;
case plex_object:
if (PLEX[objindex].state == plex_reviving) { /* reviving, */
ioctl_reply->error = revive_block(objindex); /* revive another block */
ioctl_reply->msg[0] = '\0'; /* no comment */
return;
}
status = set_plex_state(objindex, plex_up, setstate_none);
realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
break;
case volume_object:
status = set_volume_state(objindex, volume_up, setstate_none);
realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
break;
default:
ioctl_reply->error = EINVAL;
strcpy(ioctl_reply->msg, "Invalid object type");
return;
}
/* There's no point in saying anything here:
* the userland program does it better */
ioctl_reply->msg[0] = '\0';
if (realstatus == 0) /* couldn't do it */
ioctl_reply->error = EINVAL;
else
ioctl_reply->error = 0;
}
/* Stop an object, in other words do what we can to get it down
* This is called from vinumioctl (VINUMSTOP).
* Return error indications via ioctl_reply.
*/
void
stop_object(struct vinum_ioctl_msg *data)
{
int status = 1;
int objindex = data->index; /* save the number from change */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
switch (data->type) {
case drive_object:
status = set_drive_state(objindex, drive_down, data->force);
break;
case sd_object:
status = set_sd_state(objindex, sd_down, data->force);
break;
case plex_object:
status = set_plex_state(objindex, plex_down, data->force);
break;
case volume_object:
status = set_volume_state(objindex, volume_down, data->force);
break;
default:
ioctl_reply->error = EINVAL;
strcpy(ioctl_reply->msg, "Invalid object type");
return;
}
ioctl_reply->msg[0] = '\0';
if (status == 0) /* couldn't do it */
ioctl_reply->error = EINVAL;
else
ioctl_reply->error = 0;
}
/* VINUM_SETSTATE ioctl: set an object state
* msg is the message passed by the user */
void
setstate(struct vinum_ioctl_msg *msg)
{
int sdno;
struct sd *sd;
struct plex *plex;
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
switch (msg->state) {
case object_down:
stop_object(msg);
break;
case object_initializing:
switch (msg->type) {
case sd_object:
sd = &SD[msg->index];
if ((msg->index >= vinum_conf.subdisks_used)
|| (sd->state == sd_unallocated)) {
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
ioctl_reply->error = EFAULT;
return;
}
set_sd_state(msg->index, sd_initializing, msg->force);
if (sd->state != sd_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
} else
ioctl_reply->error = 0;
break;
case plex_object:
plex = &PLEX[msg->index];
if ((msg->index >= vinum_conf.plexes_used)
|| (plex->state == plex_unallocated)) {
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
ioctl_reply->error = EFAULT;
return;
}
set_plex_state(msg->index, plex_initializing, msg->force);
if (plex->state != plex_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
} else {
ioctl_reply->error = 0;
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
if (sd->state != sd_initializing) {
strcpy(ioctl_reply->msg, "Can't set state");
ioctl_reply->error = EINVAL;
break;
}
}
}
break;
default:
strcpy(ioctl_reply->msg, "Invalid object");
ioctl_reply->error = EINVAL;
}
break;
case object_up:
start_object(msg);
}
}

View File

@ -0,0 +1,88 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
*/
/* Created by ./makestatetext on Tue 4 Aug 15:53:16 CST 1998. Do not edit */
/* Drive state texts */
char *drivestatetext[] =
{
"unallocated",
"uninit",
"down",
"coming_up",
"up",
};
/* Subdisk state texts */
char *sdstatetext[] =
{
"unallocated",
"uninit",
"init",
"initializing",
"empty",
"obsolete",
"stale",
"crashed",
"down",
"reborn",
"up",
};
/* Plex state texts */
char *plexstatetext[] =
{
"unallocated",
"init",
"faulty",
"down",
"reviving",
"initializing",
"corrupt",
"degraded",
"flaky",
"up",
};
/* Volume state texts */
char *volstatetext[] =
{
"unallocated",
"uninit",
"down",
"up",
};

211
sys/modules/vinum/util.c Normal file
View File

@ -0,0 +1,211 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: util.c,v 1.7 1998/08/07 09:23:10 grog Exp grog $
*/
/* This file contains utility routines used both in kernel and user context */
#include "vinumhdr.h"
#include "statetexts.h"
#ifndef REALLYKERNEL
#include <stdio.h>
extern jmp_buf command_fail; /* return on a failed command */
#endif
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
/* Return drive state as a string */
char *
drive_state(enum drivestate state)
{
if (((unsigned) state) >= STATECOUNT(drive)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return drivestatetext[state];
}
/* Return volume state as a string */
char *
volume_state(enum volumestate state)
{
if (((unsigned) state) >= STATECOUNT(vol)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return volstatetext[state];
}
/* Return plex state as a string */
char *
plex_state(enum plexstate state)
{
if (((unsigned) state) >= STATECOUNT(plex)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return plexstatetext[state];
}
/* Return plex organization as a string */
char *
plex_org(enum plexorg org)
{
switch (org) {
case plex_disorg: /* disorganized */
return "disorg";
break;
case plex_concat: /* concatenated plex */
return "concat";
break;
case plex_striped: /* striped plex */
return "striped";
break;
default:
sprintf(numeric_state, "Invalid org %d", (int) org);
return numeric_state;
}
}
/* Return sd state as a string */
char *
sd_state(enum sdstate state)
{
if (((unsigned) state) >= STATECOUNT(sd)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return sdstatetext[state];
}
/* Now convert in the other direction */
/* These are currently used only internally,
* so we don't do too much error checking */
enum drivestate
DriveState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(drive); i++)
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
return (enum drivestate) i;
return -1;
}
enum sdstate
SdState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(sd); i++)
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
return (enum sdstate) i;
return -1;
}
enum plexstate
PlexState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(plex); i++)
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
return (enum plexstate) i;
return -1;
}
enum volumestate
VolState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(vol); i++)
if (strcmp(text, volstatetext[i]) == 0) /* found it */
return (enum volstate) i;
return -1;
}
/* Take a number with an optional scale factor and convert
* it to a number of bytes.
*
* The scale factors are:
*
* b blocks (of 512 bytes)
* k kilobytes (1024 bytes)
* m megabytes (of 1024 * 1024 bytes)
* g gigabytes (of 1024 * 1024 * 1024 bytes)
*/
u_int64_t
sizespec(char *spec)
{
u_int64_t size;
char *s;
size = 0;
s = spec;
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
size = size * 10 + *s++ - '0'; /* convert it */
switch (*s) {
case '\0':
return size;
case 'B':
case 'b':
return size * 512;
case 'K':
case 'k':
return size * 1024;
case 'M':
case 'm':
return size * 1024 * 1024;
case 'G':
case 'g':
return size * 1024 * 1024 * 1024;
}
}
#ifdef REALLYKERNEL
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
#else
fprintf(stderr, "Invalid length specification: %s", spec);
longjmp(command_fail, -1);
#endif
/* NOTREACHED */
return -1;
}

512
sys/modules/vinum/vinum.c Normal file
View File

@ -0,0 +1,512 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinum.c,v 1.19 1998/08/13 05:24:02 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#define REALLYKERNEL
#include "vinumhdr.h"
#include "sys/sysproto.h" /* for sync(2) */
#ifdef DEBUG
#include <sys/reboot.h>
int debug = 0;
#endif
/* pointer to ioctl p parameter, to save passing it around */
struct proc *myproc;
#if __FreeBSD__ < 3
STATIC struct cdevsw vinum_cdevsw;
STATIC struct bdevsw vinum_bdevsw =
{
vinumopen, vinumclose, vinumstrategy, vinumioctl,
vinumdump, vinumsize, 0,
"vinum", &vinum_cdevsw, -1
};
#else /* goodbye, bdevsw */
STATIC struct cdevsw vinum_cdevsw =
{
vinumopen, vinumclose, vinumread, vinumwrite,
vinumioctl, nostop, nullreset, nodevtotty,
seltrue, nommap, vinumstrategy, "vinum",
NULL, -1, vinumdump, vinumsize,
D_DISK, 0, -1
};
#endif
/* Called by main() during pseudo-device attachment. */
STATIC void vinumattach(void *);
STATIC void vinumgetdisklabel(dev_t);
void vinum_scandisk(void);
int vinum_inactive(void);
void free_vinum(int);
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
extern jmp_buf command_fail; /* return here if config fails */
struct _vinum_conf vinum_conf; /* configuration information */
STATIC int vinum_devsw_installed = 0;
/*
* Called by main() during pseudo-device attachment. All we need
* to do is allocate enough space for devices to be configured later, and
* add devsw entries.
*/
void
vinumattach(void *dummy)
{
BROKEN_GDB;
char *buf; /* pointer to temporary buffer */
struct _ioctl_reply *ioctl_reply; /* struct to return */
struct uio uio;
struct iovec iovec;
/* modload should prevent multiple loads, so this is worth a panic */
if ((vinum_conf.flags & VF_LOADED) != NULL)
panic("vinum: already loaded");
printf("vinum: loaded\n");
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
/* We don't have a p pointer here, so take it from curproc */
myproc = curproc;
#if __FreeBSD__ < 3
bdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_bdevsw);
#else
cdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_cdevsw);
#endif
#ifdef DEVFS
#error DEVFS not finished yet
#endif
uio.uio_iov = &iovec;
uio.uio_iovcnt = 1; /* just one buffer */
uio.uio_offset = 0; /* start at the beginning */
uio.uio_resid = 512; /* one sector */
uio.uio_segflg = UIO_SYSSPACE; /* we're in system space */
uio.uio_rw = UIO_READ; /* do we need this? */
uio.uio_procp = curproc; /* do it for our own process */
iovec.iov_len = 512;
buf = (char *) Malloc(iovec.iov_len); /* get a buffer */
CHECKALLOC(buf, "vinum: no memory\n"); /* can't get 512 bytes? */
iovec.iov_base = buf; /* read into buf */
/* allocate space: drives... */
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
CHECKALLOC(DRIVE, "vinum: no memory\n");
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
vinum_conf.drives_used = 0; /* and number in use */
/* volumes, ... */
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
CHECKALLOC(VOL, "vinum: no memory\n");
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
vinum_conf.volumes_used = 0; /* and number in use */
/* plexes, ... */
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
CHECKALLOC(PLEX, "vinum: no memory\n");
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
vinum_conf.plexes_used = 0; /* and number in use */
/* and subdisks */
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
CHECKALLOC(SD, "vinum: no memory\n");
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
vinum_conf.subdisks_used = 0; /* and number in use */
ioctl_reply = NULL; /* no reply on longjmp */
}
#ifdef ACTUALLY_LKM_NOT_KERNEL /* stuff for LKMs */
/* Check if we have anything open. If so, return 0 (not inactive),
* otherwise 1 (inactive) */
int
vinum_inactive(void)
{
BROKEN_GDB;
int i;
int can_do = 1; /* assume we can do it */
lock_config();
for (i = 0; i < vinum_conf.volumes_used; i++) {
if (VOL[i].pid != NULL) { /* volume is open */
can_do = 0;
break;
}
}
unlock_config();
return can_do;
}
/* Free all structures.
* If cleardrive is 0, save the configuration; otherwise
* remove the configuration from the drive.
*
* Before coming here, ensure that no volumes are open.
*/
void
free_vinum(int cleardrive)
{
BROKEN_GDB;
int i;
if (cleardrive) {
for (i = 0; i < vinum_conf.drives_used; i++)
remove_drive(i); /* remove the drive */
} else { /* keep the config */
save_config();
if (DRIVE != NULL) {
for (i = 0; i < vinum_conf.drives_used; i++)
free_drive(&DRIVE[i]); /* close files and things */
Free(DRIVE);
}
}
if (SD != NULL)
Free(SD);
if (PLEX != NULL) {
for (i = 0; i < vinum_conf.plexes_used; i++) {
struct plex *plex = &vinum_conf.plex[i];
if (plex->state != plex_unallocated) { /* we have real data there */
if (plex->sdnos)
Free(plex->sdnos);
if (plex->unmapped_regions)
Free(plex->unmapped_region);
if (plex->defective_regions)
Free(plex->defective_region);
}
}
Free(PLEX);
}
if (VOL != NULL)
Free(VOL);
bzero(&vinum_conf, sizeof(vinum_conf));
}
MOD_MISC(vinum);
/*
* Function called when loading the driver.
*/
STATIC int
vinum_load(struct lkm_table *lkmtp, int cmd)
{
BROKEN_GDB;
/* Debugger ("vinum_load"); */
vinumattach(NULL);
return 0; /* OK */
}
/*
* Function called when unloading the driver.
*/
STATIC int
vinum_unload(struct lkm_table *lkmtp, int cmd)
{
BROKEN_GDB;
if (vinum_inactive()) { /* is anything open? */
struct sync_args dummyarg =
{0};
#if __FreeBSD__ < 3
int retval;
#endif
printf("vinum: unloaded\n");
#if __FreeBSD__ < 3
sync(curproc, &dummyarg, &retval); /* write out buffers */
#else
sync(curproc, &dummyarg); /* write out buffers */
#endif
free_vinum(0); /* no: clean up */
#if __FreeBSD__ < 3
bdevsw[BDEV_MAJOR] = NULL; /* clear bdevsw */
#endif
cdevsw[CDEV_MAJOR] = NULL; /* and cdevsw */
return 0;
} else
return EBUSY;
}
/*
* Dispatcher function for the module (load/unload/stat).
*/
int
vinum_mod(struct lkm_table *lkmtp, int cmd, int ver)
{
BROKEN_GDB;
MOD_DISPATCH(vinum, /* module name */
lkmtp, /* LKM table */
cmd, /* command */
ver,
vinum_load, /* load with this function */
vinum_unload, /* and unload with this */
lkm_nullcmd);
}
#else /* not LKM */
#error "This driver must be compiled as a loadable kernel module"
#endif /* LKM */
/* ARGSUSED */
/* Open a vinum object
* At the moment, we only open volumes and the
* super device. It's a nice concept to be
* able to open drives, subdisks and plexes, but
* I can't think what good it could be */
int
vinumopen(dev_t dev,
int flags,
int fmt,
struct proc *p)
{
BROKEN_GDB;
int s; /* spl */
int error;
unsigned int index;
struct volume *vol;
struct plex *plex;
struct sd *sd;
struct devcode *device;
device = (struct devcode *) &dev;
error = 0;
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_VOLUME_TYPE:
index = VOLNO(dev);
if (index >= vinum_conf.volumes_used)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
s = splhigh(); /* quick lock */
if (error)
return error;
if (vol->opencount == 0)
vol->openflags = flags; /* set our flags */
vol->opencount++;
vol->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
if (VOLNO(dev) >= vinum_conf.volumes_used)
return ENXIO;
index = PLEXNO(dev); /* get plex index in vinum_conf */
if (index >= vinum_conf.plexes_used)
return ENXIO; /* no such device */
plex = &PLEX[index];
switch (plex->state) {
case plex_unallocated:
return EINVAL;
default:
s = splhigh();
if (plex->pid /* it's open already */
&& (plex->pid != p->p_pid)) { /* and not by us, */
splx(s);
return EBUSY; /* one at a time, please */
}
plex->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
}
case VINUM_SD_TYPE:
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
return ENXIO; /* no such device */
index = SDNO(dev); /* get the subdisk number */
if (index >= vinum_conf.subdisks_used)
return ENXIO; /* no such device */
sd = &SD[index];
/* Opening a subdisk is always a special operation, so we
* ignore the state as long as it represents a real subdisk */
switch (sd->state) {
case sd_unallocated:
case sd_uninit:
return EINVAL;
default:
s = splhigh();
if (sd->pid /* it's open already */
&& (sd->pid != p->p_pid)) { /* and not by us, */
splx(s);
return EBUSY; /* one at a time, please */
}
sd->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
splx(s);
return 0;
}
case VINUM_DRIVE_TYPE:
default:
return ENODEV; /* don't know what to do with these */
case VINUM_SUPERDEV_TYPE:
if (p->p_ucred->cr_uid == 0) { /* root calling, */
vinum_conf.opencount++; /* one more opener */
return 0; /* no worries opening super dev */
} else
return EPERM; /* you can't do that! */
}
}
/* ARGSUSED */
int
vinumclose(dev_t dev,
int flags,
int fmt,
struct proc *p)
{
BROKEN_GDB;
unsigned int index;
struct volume *vol;
struct plex *plex;
struct sd *sd;
struct devcode *device = (struct devcode *) &dev;
index = VOLNO(dev);
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_VOLUME_TYPE:
if (index >= vinum_conf.volumes_used)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
vol->opencount = 0; /* reset our flags */
vol->pid = NULL; /* and forget who owned us */
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
if (VOLNO(dev) >= vinum_conf.volumes_used)
return ENXIO;
index = PLEXNO(dev); /* get plex index in vinum_conf */
if (index >= vinum_conf.plexes_used)
return ENXIO; /* no such device */
plex = &PLEX[index];
plex->pid = 0;
return 0;
case VINUM_SD_TYPE:
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
return ENXIO; /* no such device */
index = SDNO(dev); /* get the subdisk number */
if (index >= vinum_conf.subdisks_used)
return ENXIO; /* no such device */
sd = &SD[index];
sd->pid = 0;
return 0;
case VINUM_SUPERDEV_TYPE:
if (p->p_ucred->cr_uid == 0) /* root calling, */
vinum_conf.opencount--; /* one less opener */
return 0; /* no worries closing super dev */
case VINUM_DRIVE_TYPE:
default:
return ENODEV; /* don't know what to do with these */
}
}
/* size routine */
int
vinumsize(dev_t dev)
{
BROKEN_GDB;
struct volume *vol;
int size;
/* XXX This is bogus. We don't need to open
* a device to find its size */
vol = &VOL[VOLNO(dev)];
if (vol->state == volume_up)
size = vol->size;
else
return 0; /* err on the size of conservatism */
return size;
}
int
vinumdump(dev_t dev)
{
/* Not implemented. */
return ENXIO;
}

View File

@ -0,0 +1,214 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumext.h,v 1.14 1998/08/11 00:03:57 grog Exp grog $
*/
/* vinumext.h: external definitions */
extern struct _vinum_conf vinum_conf; /* configuration information */
#ifdef DEBUG
extern debug; /* debug flags */
#endif
#define CHECKALLOC(ptr, msg) \
if (ptr == NULL) \
{ \
printf (msg); \
longjmp (command_fail, -1); \
}
#ifndef KERNEL
struct vnode;
struct proc;
#endif
#ifdef KERNEL
int give_sd_to_plex(int plexno, int sdno);
int give_plex_to_volume(int volno, int plexno);
int check_drive(char *);
enum drive_label_info read_drive_label(struct drive *drive);
int parse_config(char *, struct keywordset *);
int parse_user_config(char *cptr, struct keywordset *keyset);
u_int64_t sizespec(char *spec);
int volume_index(struct volume *volume);
int plex_index(struct plex *plex);
int sd_index(struct sd *sd);
int drive_index(struct drive *drive);
int my_plex(int volno, int plexno);
int my_sd(int plexno, int sdno);
int get_empty_drive(void);
int find_drive(const char *name, int create);
int find_drive_by_dev(const char *devname, int create);
int get_empty_sd(void);
int find_subdisk(const char *name, int create);
void free_sd(int sdno);
void free_volume(int volno);
int get_empty_plex(void);
int find_plex(const char *name, int create);
void free_plex(int plexno);
int get_empty_volume(void);
int find_volume(const char *name, int create);
void config_subdisk(void);
void config_plex(void);
void config_volume(void);
void config_drive(void);
void updateconfig(int);
void update_sd_config(int sdno, int kernelstate);
void update_plex_config(int plexno, int kernelstate);
void update_volume_config(int volno, int kernelstate);
void update_config(void);
void drive_io_done(struct buf *);
int save_config(void);
void write_config(char *, int);
int start_config(void);
void finish_config(int);
void remove(struct vinum_ioctl_msg *msg);
void remove_drive_entry(int driveno, int force, int recurse);
void remove_sd_entry(int sdno, int force, int recurse);
void remove_plex_entry(int plexno, int force, int recurse);
void remove_volume_entry(int volno, int force, int recurse);
void checkernel(char *);
int open_drive(struct drive *, struct proc *);
void close_drive(struct drive *drive);
int driveio(struct drive *, void *, size_t, off_t, int);
/* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
#define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) */
int set_drive_parms(struct drive *drive);
int init_drive(struct drive *);
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
void throw_rude_remark(int, char *,...);
int read_drive(struct drive *drive, void *buf, size_t length, off_t offset);
int write_drive(struct drive *drive, void *buf, size_t length, off_t offset);
void format_config(char *config, int len);
void checkkernel(char *op);
void free_drive(struct drive *drive);
void down_drive(struct drive *drive);
void remove_drive(int driveno);
/* I/O */
d_open_t vinumopen;
d_close_t vinumclose;
d_strategy_t vinumstrategy;
d_ioctl_t vinumioctl;
d_dump_t vinumdump;
d_psize_t vinumsize;
d_read_t vinumread;
d_write_t vinumwrite;
int vinumstart(struct buf *bp, int reviveok);
int launch_requests(struct request *rq, int reviveok);
/* XXX Do we need this? */
int vinumpart(dev_t);
/* Memory allocation */
void vinum_meminfo(caddr_t data);
int vinum_mallocinfo(caddr_t data);
void expand_table(void **, int, int);
void add_defective_region(struct plex *plex, off_t offset, size_t length);
void add_unmapped_region(struct plex *plex, off_t offset, size_t length);
void rebuild_plex_unmappedlist(struct plex *plex);
struct request;
struct rqgroup *allocrqg(struct request *rq, int elements);
void deallocrqg(struct rqgroup *rqg);
/* State transitions */
int set_drive_state(int driveno, enum drivestate state, int force);
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
void get_volume_label(struct volume *vol, struct disklabel *lp);
int write_volume_label(int);
void start_object(struct vinum_ioctl_msg *);
void stop_object(struct vinum_ioctl_msg *);
void setstate(struct vinum_ioctl_msg *msg);
void vinum_label(int);
int vinum_writedisklabel(struct volume *, struct disklabel *);
int initsd(int);
int restart_plex(int plexno);
int revive_block(int plexno);
/* Auxiliary functions */
enum sdstates sdstatemap(struct plex *plex, int *sddowncount);
enum volplexstate vpstate(struct plex *plex);
#endif
enum keyword get_keyword(char *, struct keywordset *);
void listconfig(void);
char *drive_state(enum drivestate);
char *volume_state(enum volumestate);
char *plex_state(enum plexstate);
char *plex_org(enum plexorg);
char *sd_state(enum sdstate);
enum drivestate DriveState(char *text);
enum sdstate SdState(char *text);
enum plexstate PlexState(char *text);
enum volumestate VolState(char *text);
struct drive *validdrive(int driveno, struct _ioctl_reply *);
struct sd *validsd(int sdno, struct _ioctl_reply *);
struct plex *validplex(int plexno, struct _ioctl_reply *);
struct volume *validvol(int volno, struct _ioctl_reply *);
int tokenize(char *, char *[]);
void resetstats(struct vinum_ioctl_msg *msg);
/* Locking */
int lockvol(struct volume *vol);
void unlockvol(struct volume *vol);
int lockplex(struct plex *plex);
void unlockplex(struct plex *plex);
int lockrange(struct plex *plex, off_t first, off_t last);
void unlockrange(struct plex *plex, off_t first, off_t last);
int lock_config(void);
void unlock_config(void);
#ifdef DEBUG
#define expandrq(prq) \
{ \
expand_table ((void **) &prq->rqe, \
prq->requests * sizeof (struct rqelement), \
(prq->requests + RQELTS) * sizeof (struct rqelement) ); \
bzero (&prq->rqe [prq->requests], RQELTS * sizeof (struct rqelement)); \
prq->rqcount += RQELTS; \
}
#else
void expandrq(struct plexrq *);
#endif

View File

@ -0,0 +1,104 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
*/
/* Header files used by all modules */
/* $Id: vinumhdr.h,v 1.7 1998/08/07 04:41:18 grog Exp grog $ */
#ifdef KERNEL
#define REALLYKERNEL
#endif
#include <sys/param.h>
#ifdef REALLYKERNEL
#include <sys/systm.h>
#include <sys/kernel.h>
#endif
#ifdef DEVFS
#error "DEVFS code not complete yet"
#include <sys/devfsext.h>
#endif /*DEVFS */
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/dkstat.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/conf.h>
#include <sys/stat.h>
#include <sys/disklabel.h>
#include <ufs/ffs/fs.h>
#include <sys/mount.h>
#include <sys/device.h>
#undef KERNEL /* XXX */
#include <sys/disk.h>
#ifdef REALLYKERNEL
#define KERNEL
#endif
#include <sys/syslog.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/dkbad.h>
#include <setjmp.h>
#include <stdarg.h>
#include <vm/vm.h>
#ifdef USES_VM
/* XXX Do we need this? */
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_prot.h>
/* #include <vm/vm_page.h> */
#include <sys/vmmeter.h>
/* #include <machine/pmap.h> */
#include <machine/cputypes.h>
#endif /* USES_VM */
#include <vinumvar.h>
#include <vinumio.h>
#include "vinumkw.h"
#include "vinumext.h"
#undef Free /* defined in some funny net stuff */
#ifdef REALLYKERNEL
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
caddr_t MMalloc (int size, char *, int);
void FFree (void *mem, char *, int);
#else
#define Malloc(x) malloc ((x)) /* just the size */
#define Free(x) free ((x)) /* just the address */
#endif

132
sys/modules/vinum/vinumio.h Normal file
View File

@ -0,0 +1,132 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumio.h,v 1.10 1998/08/10 05:46:19 grog Exp grog $
*/
#define MAX_IOCTL_REPLY 256
#define L 'F' /* ID letter of our ioctls */
/* VINUM_CREATE returns a buffer of this kind */
struct _ioctl_reply {
int error;
char msg[MAX_IOCTL_REPLY];
};
/* ioctl requests */
#define BUFSIZE 1024 /* size of buffer, including continuations */
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
#define VINUM_GETCONFIG _IOR(L, 65, struct _vinum_conf) /* get global config */
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct drive) /* get drive config */
#define VINUM_SDCONFIG _IOWR(L, 67, struct sd) /* get subdisk config */
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct plex) /* get plex config */
#define VINUM_VOLCONFIG _IOWR(L, 69, struct volume) /* get volume config */
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct sd) /* get sd config for plex (plex, sdno) */
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
#define VINUM_SAVECONFIG _IOC(0, L, 72, 0) /* release locks, update, write config to disk */
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
#ifdef DEBUG
struct debuginfo {
int changeit;
int param;
};
#define VINUM_DEBUG _IOWR(L, 75, struct debuginfo) /* call the debugger from ioctl () */
#endif
enum objecttype {
drive_object,
sd_object,
plex_object,
volume_object,
invalid_object
};
/* Start an object. Pass two integers:
* msg [0] index in vinum_conf.<object>
* msg [1] type of object (see below)
*
* Return ioctl_reply
*/
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
/* The state to set with VINUM_SETSTATE. Since
* each object has a different set of states, we
* need to translate later */
enum objectstate {
object_down,
object_initializing,
object_up
};
/* This structure is used for modifying objects
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
* VINUM_DETACH, VINUM_REPLACE
*/
struct vinum_ioctl_msg {
int index;
enum objecttype type;
enum objectstate state; /* state to set (VINUM_SETSTATE) */
int force; /* do it even if it doesn't make sense */
int recurse; /* recurse (VINUM_REMOVE) */
int otherobject; /* superordinate object (attach),
* replacement object (replace) */
int rename; /* rename object (attach) */
int64_t offset; /* offset of subdisk (for attach) */
};
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
#define VINUM_STARTCONFIG _IOC(0, L, 78, 0) /* start a configuration operation */
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
#define VINUM_LABEL _IOC(IOC_IN | IOC_OUT, L, 81, MAX_IOCTL_REPLY) /* label a volume */
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
#define VINUM_REMOVE _IOC(IOC_IN | IOC_OUT, L, 83, MAX_IOCTL_REPLY) /* remove an object */
#define VINUM_GETUNMAPPED _IOWR(L, 84, struct plexregion) /* get unmapped element (plex, re) */
#define VINUM_GETDEFECTIVE _IOWR(L, 85, struct plexregion) /* get defective element (plex, re) */
#define VINUM_RESETSTATS _IOC(IOC_IN | IOC_OUT, L, 86, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_ATTACH _IOC(IOC_IN | IOC_OUT, L, 87, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_DETACH _IOC(IOC_IN | IOC_OUT, L, 88, MAX_IOCTL_REPLY) /* reset object stats */
struct vinum_rename_msg {
int index;
int recurse; /* rename subordinate objects too */
enum objecttype type;
char newname[MAXNAME]; /* new name to give to object */
};
#define VINUM_RENAME _IOC(IOC_IN | IOC_OUT, L, 89, MAX_IOCTL_REPLY) /* reset object stats */
#define VINUM_REPLACE _IOC(IOC_IN | IOC_OUT, L, 90, MAX_IOCTL_REPLY) /* reset object stats */

View File

@ -0,0 +1,787 @@
/* XXX replace all the checks on object validity with
* calls to valid<object> */
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumioctl.c,v 1.1 1998/08/14 08:46:10 grog Exp grog $
*/
#define STATIC /* nothing while we're testing XXX */
#define REALLYKERNEL
#include "vinumhdr.h"
#include "sys/sysproto.h" /* for sync(2) */
#ifdef DEBUG
#include <sys/reboot.h>
#endif
jmp_buf command_fail; /* return on a failed command */
#if __FreeBSD__ >= 3
/* Why aren't these declared anywhere? XXX */
int setjmp(jmp_buf);
void longjmp(jmp_buf, int);
#endif
/* pointer to ioctl p parameter, to save passing it around */
struct proc *myproc;
int vinum_inactive(void);
void free_vinum(int);
void attachobject(struct vinum_ioctl_msg *);
void detachobject(struct vinum_ioctl_msg *);
void renameobject(struct vinum_rename_msg *);
void replaceobject(struct vinum_ioctl_msg *);
/* ioctl routine */
int
vinumioctl(dev_t dev,
#if __FreeBSD__ >= 3
u_long cmd,
#else
int cmd,
#endif
caddr_t data,
int flag,
struct proc *p)
{
BROKEN_GDB;
unsigned int objno;
int error = 0;
struct volume *vol;
unsigned int index; /* for transferring config info */
unsigned int sdno; /* for transferring config info */
int fe; /* free list element number */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
struct devcode *device = (struct devcode *) &dev;
/* First, decide what we're looking at */
switch (device->type) {
case VINUM_SUPERDEV_TYPE:
myproc = p; /* save pointer to process */
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
error = setjmp(command_fail); /* come back here on error */
if (error) /* bombed out */
return 0; /* the reply will contain meaningful info */
switch (cmd) {
/* XXX #ifdef DEBUG */
case VINUM_DEBUG:
boothowto |= RB_GDB; /* serial debug line */
if (((struct debuginfo *) data)->changeit) /* change debug settings */
debug = (((struct debuginfo *) data)->param);
else
Debugger("vinum debug");
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
/* XXX #endif */
case VINUM_CREATE: /* create a vinum object */
error = lock_config(); /* get the config for us alone */
if (error) /* can't do it, */
return error; /* give up */
error = setjmp(command_fail); /* come back here on error */
if (error == 0) { /* first time, */
parse_user_config((char *) data, &keyword_set); /* update the config */
ioctl_reply->error = 0; /* no error if we make it here */
} else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
ioctl_reply->error = EINVAL; /* note that something's up */
ioctl_reply->msg[0] = '\0'; /* no message? */
}
unlock_config();
return 0; /* must be 0 to return the real error info */
case VINUM_GETCONFIG: /* get the configuration information */
bcopy(&vinum_conf, data, sizeof(vinum_conf));
return 0;
/* start configuring the subsystem */
case VINUM_STARTCONFIG:
return start_config(); /* just lock it */
/* Move the individual parts of the config to user space.
* Specify the index of the object in the first word of data,
* and return the object there
*/
case VINUM_DRIVECONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.drives_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&DRIVE[index], data, sizeof(struct drive)); /* copy the config item out */
return 0;
case VINUM_SDCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.subdisks_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&SD[index], data, sizeof(struct sd)); /* copy the config item out */
return 0;
case VINUM_PLEXCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.plexes_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&PLEX[index], data, sizeof(struct plex)); /* copy the config item out */
return 0;
case VINUM_VOLCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.volumes_used) /* can't do it */
return EFAULT; /* bang */
bcopy(&VOL[index], data, sizeof(struct volume)); /* copy the config item out */
return 0;
case VINUM_PLEXSDCONFIG:
index = *(int *) data; /* get the plex index */
sdno = ((int *) data)[1]; /* and the sd index */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
return EFAULT; /* bang */
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
data,
sizeof(struct sd));
return 0;
case VINUM_SAVECONFIG:
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
finish_config(1); /* finish the configuration and update it */
error = save_config(); /* save configuration to disk */
} else
error = EINVAL; /* queue up for this one, please */
return error;
case VINUM_RELEASECONFIG: /* release the config */
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
finish_config(0); /* finish the configuration, don't change it */
error = save_config(); /* save configuration to disk */
} else
error = EINVAL; /* release what config? */
return error;
case VINUM_INIT:
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
case VINUM_RESETCONFIG:
if (vinum_inactive() && (vinum_conf.opencount < 2)) { /* if we're not active */
/* Note the open count. We may be called from v, so we'll be open.
* Keep the count so we don't underflow */
int oc = vinum_conf.opencount;
free_vinum(1); /* clean up everything */
printf("vinum: CONFIGURATION OBLITERATED\n");
vinum_conf.opencount = oc;
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
}
return EBUSY;
case VINUM_SETSTATE:
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
return 0;
case VINUM_MEMINFO:
vinum_meminfo(data);
return 0;
case VINUM_MALLOCINFO:
return vinum_mallocinfo(data);
case VINUM_LABEL: /* label a volume */
ioctl_reply->error = write_volume_label(*(int *) data); /* index of the volume to label */
ioctl_reply->msg[0] = '\0'; /* no message */
return 0;
case VINUM_REMOVE:
remove((struct vinum_ioctl_msg *) data); /* remove an object */
return 0;
case VINUM_GETFREELIST: /* get a drive free list element */
index = *(int *) data; /* get the drive index */
fe = ((int *) data)[1]; /* and the free list element */
if ((index >= (unsigned) vinum_conf.drives_used) /* plex doesn't exist */
||(DRIVE[index].state == drive_unallocated))
return ENODEV;
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
return ENOENT;
bcopy(&DRIVE[index].freelist[fe],
data,
sizeof(struct drive_freelist));
return 0;
case VINUM_GETDEFECTIVE: /* get a plex defective area element */
index = *(int *) data; /* get the plex index */
fe = ((int *) data)[1]; /* and the region number */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(PLEX[index].state == plex_unallocated))
return ENODEV;
if (fe >= PLEX[index].defective_regions) /* no such entry */
return ENOENT;
bcopy(&PLEX[index].defective_region[fe],
data,
sizeof(struct plexregion));
return 0;
case VINUM_GETUNMAPPED: /* get a plex unmapped area element */
index = *(int *) data; /* get the plex index */
fe = ((int *) data)[1]; /* and the region number */
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
||(PLEX[index].state == plex_unallocated))
return ENODEV;
if (fe >= PLEX[index].unmapped_regions) /* no such entry */
return ENOENT;
bcopy(&PLEX[index].unmapped_region[fe],
data,
sizeof(struct plexregion));
return 0;
case VINUM_RESETSTATS:
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
return 0;
/* attach an object to a superordinate object */
case VINUM_ATTACH:
attachobject((struct vinum_ioctl_msg *) data);
return 0;
/* detach an object from a superordinate object */
case VINUM_DETACH:
detachobject((struct vinum_ioctl_msg *) data);
return 0;
/* rename an object */
case VINUM_RENAME:
renameobject((struct vinum_rename_msg *) data);
return 0;
/* replace an object */
case VINUM_REPLACE:
replaceobject((struct vinum_ioctl_msg *) data);
return 0;
default:
/* FALLTHROUGH */
}
default:
#if __FreeBSD__>=3
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %lx\n",
device->type,
device->sd,
device->plex,
device->major,
device->volume,
cmd); /* XXX */
#else
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %x\n",
device->type,
device->sd,
device->plex,
device->major,
device->volume,
cmd); /* XXX */
#endif
return EINVAL;
case VINUM_DRIVE_TYPE:
case VINUM_PLEX_TYPE:
return EAGAIN; /* try again next week */
case VINUM_SD_TYPE:
objno = SDNO(dev);
switch (cmd) {
case VINUM_INITSD: /* initialize subdisk */
return initsd(objno);
default:
return EINVAL;
}
break;
case VINUM_VOLUME_TYPE:
objno = VOLNO(dev);
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_used) /* not a valid volume */
return ENXIO;
vol = &VOL[objno];
if (vol->state != volume_up) /* not up, */
return EIO; /* I/O error */
switch (cmd) {
case DIOCGDINFO: /* get disk label */
get_volume_label(vol, (struct disklabel *) data);
break;
/* Care! DIOCGPART returns *pointers* to
* the caller, so we need to store this crap as well.
* And yes, we need it. */
case DIOCGPART: /* get partition information */
get_volume_label(vol, &vol->label);
((struct partinfo *) data)->disklab = &vol->label;
((struct partinfo *) data)->part = &vol->label.d_partitions[0];
break;
/* We don't have this stuff on hardware,
* so just pretend to do it so that
* utilities don't get upset. */
case DIOCWDINFO: /* write partition info */
case DIOCSDINFO: /* set partition info */
return 0; /* not a titty */
case DIOCWLABEL: /* set or reset label writeable */
if ((flag & FWRITE) == 0) /* not writeable? */
return EACCES; /* no, die */
if (*(int *) data != 0) /* set it? */
vol->flags |= VF_WLABEL; /* yes */
else
vol->flags &= ~VF_WLABEL; /* no, reset */
break;
default:
return ENOTTY; /* not my kind of ioctl */
}
break;
}
return 0; /* XXX */
}
/* The following four functions check the supplied
* object index and return a pointer to the object
* if it exists. Otherwise they longjump out via
* throw_rude_remark */
struct drive *
validdrive(int driveno, struct _ioctl_reply *reply)
{
if ((driveno < vinum_conf.drives_used)
&& (DRIVE[driveno].state != drive_unallocated))
return &DRIVE[driveno];
strcpy(reply->msg, "No such drive");
reply->error = ENOENT;
return NULL;
}
struct sd *
validsd(int sdno, struct _ioctl_reply *reply)
{
if ((sdno < vinum_conf.subdisks_used)
&& (SD[sdno].state != sd_unallocated))
return &SD[sdno];
strcpy(reply->msg, "No such subdisk");
reply->error = ENOENT;
return NULL;
}
struct plex *
validplex(int plexno, struct _ioctl_reply *reply)
{
if ((plexno < vinum_conf.plexes_used)
&& (PLEX[plexno].state != plex_unallocated))
return &PLEX[plexno];
strcpy(reply->msg, "No such plex");
reply->error = ENOENT;
return NULL;
}
struct volume *
validvol(int volno, struct _ioctl_reply *reply)
{
if ((volno < vinum_conf.volumes_used)
&& (VOL[volno].state != volume_unallocated))
return &VOL[volno];
strcpy(reply->msg, "No such volume");
reply->error = ENOENT;
return NULL;
}
/* reset an object's stats */
void
resetstats(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
switch (msg->type) {
case drive_object:
if (msg->index < vinum_conf.drives_used) {
struct drive *drive = &DRIVE[msg->index];
if (drive->state != drive_unallocated) {
drive->reads = 0; /* number of reads on this drive */
drive->writes = 0; /* number of writes on this drive */
drive->bytes_read = 0; /* number of bytes read */
drive->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case sd_object:
if (msg->index < vinum_conf.subdisks_used) {
struct sd *sd = &SD[msg->index];
if (sd->state != sd_unallocated) {
sd->reads = 0; /* number of reads on this subdisk */
sd->writes = 0; /* number of writes on this subdisk */
sd->bytes_read = 0; /* number of bytes read */
sd->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case plex_object:
if (msg->index < vinum_conf.plexes_used) {
struct plex *plex = &PLEX[msg->index];
if (plex->state != plex_unallocated) {
plex->reads = 0;
plex->writes = 0; /* number of writes on this plex */
plex->bytes_read = 0; /* number of bytes read */
plex->bytes_written = 0; /* number of bytes written */
plex->multiblock = 0; /* requests that needed more than one block */
plex->multistripe = 0; /* requests that needed more than one stripe */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case volume_object:
if (msg->index < vinum_conf.volumes_used) {
struct volume *vol = &VOL[msg->index];
if (vol->state != volume_unallocated) {
vol->bytes_read = 0; /* number of bytes read */
vol->bytes_written = 0; /* number of bytes written */
vol->reads = 0; /* number of reads on this volume */
vol->writes = 0; /* number of writes on this volume */
vol->recovered_reads = 0; /* reads recovered from another plex */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case invalid_object: /* can't get this */
reply->error = EINVAL;
return;
}
}
/* attach an object to a superior object */
void
attachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL) /* not a valid subdisk */
return;
plex = validplex(msg->otherobject, reply);
if (plex) {
if (sd->plexno >= 0) { /* already belong to a plex */
reply->error = EBUSY; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
sd->plexoffset = msg->offset; /* this is where we want it */
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
break;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
if (plex->organization != plex_concat) { /* can't attach to striped and raid-5 */
reply->error = EINVAL; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
vol = validvol(msg->otherobject, reply); /* and volume information */
if (vol) {
if ((vol->plexes == MAXPLEX) /* we have too many already */
||(plex->volno >= 0)) { /* or the plex has an owner */
reply->error = EINVAL; /* no message, the user should check */
reply->msg[0] = '\0';
return;
}
set_plex_state(plex->plexno, plex_down, setstate_force); /* make sure it's down */
give_plex_to_volume(msg->otherobject, msg->index); /* and give it to the volume */
update_plex_config(plex->plexno, 0);
save_config();
if (plex->state == plex_reviving)
reply->error = EAGAIN; /* need to revive it */
else
reply->error = 0;
}
}
}
/* detach an object from a superior object */
void
detachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct sd *sd;
struct plex *plex;
struct volume *vol;
int sdno;
int plexno;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL)
return;
if (sd->plexno < 0) { /* doesn't belong to a plex */
reply->error = ENOENT;
strcpy(reply->msg, "Subdisk is not attached");
return;
} else { /* valid plex number */
plex = &PLEX[sd->plexno];
if ((!msg->force) /* don't force things */
&&((plex->state == plex_up) /* and the plex is up */
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
reply->error = EBUSY; /* we need this sd */
reply->msg[0] = '\0';
return;
}
sd->plexno = -1; /* anonymous sd */
if (plex->subdisks == 1) { /* this was the only subdisk */
Free(plex->sdnos); /* free the subdisk array */
plex->sdnos = NULL; /* and note the fact */
plex->subdisks_allocated = 0; /* no subdisk space */
} else {
for (sdno = 0; sdno < plex->subdisks; sdno++) {
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
break;
}
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
bcopy(&plex->sdnos[sdno + 1],
&plex->sdnos[sdno],
(plex->subdisks - 1 - sdno) * sizeof(int));
}
plex->subdisks--;
rebuild_plex_unmappedlist(plex); /* rebuild the unmapped list */
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* this subdisk is named after the plex */
bcopy(sd->name,
&sd->name[3],
min(strlen(sd->name), MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
update_plex_config(plex->plexno, 0);
if ((plex->organization == plex_striped) /* we've just mutilated our plex, */
||(plex->organization == plex_striped)) /* the data no longer matches */
set_plex_state(plex->plexno,
plex_down,
setstate_force | setstate_configuring);
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
return;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
if (plex->volno >= 0) {
int volno = plex->volno;
vol = &VOL[volno];
if ((!msg->force) /* don't force things */
&&((vol->state == volume_up) /* and the volume is up */
&&(vol->plexes == 1))) { /* and this is the last plex */
/* XXX As elsewhere, check whether we will lose
* mapping by removing this plex */
reply->error = EBUSY; /* we need this plex */
reply->msg[0] = '\0';
return;
}
plex->volno = -1; /* anonymous plex */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (vol->plex[plexno] == msg->index) /* found our plex */
break;
}
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
bcopy(&vol[plexno + 1], &vol[plexno], (vol->plexes - 1 - plexno) * sizeof(int));
vol->plexes--;
if (!bcmp(vol->name, plex->name, strlen(vol->name))) { /* this plex is named after the volume */
/* First, check if the subdisks are the same */
if (msg->recurse) {
int sdno;
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]];
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* subdisk is named after the plex */
bcopy(sd->name, &sd->name[3], min(strlen(sd->name), MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
}
}
bcopy(plex->name, &plex->name[3], min(strlen(plex->name), MAXPLEXNAME - 3));
bcopy("ex-", plex->name, 3);
plex->name[MAXPLEXNAME - 1] = '\0';
}
update_plex_config(plex->plexno, 0);
update_volume_config(volno, 0);
save_config();
reply->error = 0;
} else {
reply->error = ENOENT;
strcpy(reply->msg, "Plex is not attached");
}
}
}
void
renameobject(struct vinum_rename_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
drive = validdrive(msg->index, reply);
if (drive) {
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
save_config();
reply->error = 0;
}
return;
case sd_object: /* you can't attach a subdisk to anything */
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
sd = validsd(msg->index, reply);
if (sd) {
bcopy(msg->newname, sd->name, MAXSDNAME);
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
return;
case plex_object: /* you can't attach a plex to anything */
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
plex = validplex(msg->index, reply);
if (plex) {
bcopy(msg->newname, plex->name, MAXPLEXNAME);
update_plex_config(plex->plexno, 0);
save_config();
reply->error = 0;
}
return;
case volume_object: /* you can't attach a volume to anything */
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
vol = validvol(msg->index, reply);
if (vol) {
bcopy(msg->newname, vol->name, MAXVOLNAME);
update_volume_config(msg->index, 0);
save_config();
reply->error = 0;
}
return;
case invalid_object:
reply->error = EINVAL;
reply->msg[0] = '\0';
}
}
/* Replace one object with another */
void
replaceobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
reply->error = ENODEV; /* until I know how to do this */
strcpy(reply->msg, "replace not implemented yet");
/* save_config (); */
}

120
sys/modules/vinum/vinumkw.h Normal file
View File

@ -0,0 +1,120 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumkw.h,v 1.7 1998/08/07 02:35:51 grog Exp grog $
*/
/* Command keywords that vinum knows. These include both user-level
* and kernel-level stuff */
/* Our complete vocabulary. The names of the commands are
* the same as the identifier without the kw_ at the beginning
* (i.e. kw_create defines the "create" keyword). Preprocessor
* magic in parser.c does the rest. */
enum keyword {
kw_create,
kw_modify,
kw_list,
kw_l = kw_list,
kw_ld, /* list drive */
kw_ls, /* list subdisk */
kw_lp, /* list plex */
kw_lv, /* list volume */
kw_set,
kw_rm,
kw_start,
kw_stop,
kw_drive,
kw_sd,
kw_subdisk = kw_sd,
kw_plex,
kw_volume,
kw_vol = kw_volume,
kw_read,
kw_readpol,
kw_org,
kw_name,
kw_concat,
kw_striped,
kw_raid5,
kw_driveoffset,
kw_plexoffset,
kw_len,
kw_length = kw_len,
kw_state,
kw_setupstate,
kw_d, /* flag names */
kw_f,
kw_r,
kw_s,
kw_v,
kw_round, /* round robin */
kw_prefer, /* prefer plex */
kw_device,
kw_init,
kw_label,
kw_resetconfig,
kw_writethrough,
kw_writeback,
kw_raw,
kw_resetstats,
kw_attach,
kw_detach,
kw_rename,
kw_printconfig,
kw_replace,
kw_detached,
#ifdef DEBUG
kw_debug, /* go into debugger */
kw_info,
#endif
kw_invalid_keyword = -1
};
struct _keywords {
char *name;
enum keyword keyword;
};
struct keywordset {
int size;
struct _keywords *k;
};
extern struct _keywords keywords[];
extern struct _keywords flag_keywords[];
extern struct keywordset keyword_set;
extern struct keywordset flag_set;

View File

@ -0,0 +1,213 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumstate.h,v 1.11 1998/08/04 06:22:49 grog Exp grog $
*/
/* This file gets read by makestatetext to create text files
* with the names of the states, so don't change the file
* format */
enum volumestate {
volume_unallocated,
/* present but unused. Must be 0 */
volume_uninit,
/* mentioned elsewhere but not defined */
volume_down,
/* The volume is up and functional, but not all plexes may be available */
volume_up,
volume_laststate = volume_up /* last value, for table dimensions */
};
enum plexstate {
/* An empty entry, not a plex at all. */
plex_unallocated,
/* The plex has been allocated, but there configuration
* is not complete */
plex_init,
/* A plex which has gone completely down because of
* I/O errors. */
plex_faulty,
/* A plex which has been taken down by the
* administrator. */
plex_down,
/* A plex which is currently being brought up after
* being not up. This involves copying data from
* another plex */
plex_reviving,
/* A plex which is being initialized */
plex_initializing,
/* *** The remaining states represent plexes which are
* at least partially up. Keep these separate so that
* they can be checked more easily. */
/* A plex entry which is at least partially up. Not
* all subdisks are available, and an inconsistency
* has occurred. If no other plex is uncorrupted,
* the volume is no longer consistent. */
plex_corrupt,
plex_firstup = plex_corrupt, /* first "up" state */
/* A plex entry which is at least partially up. Not
* all subdisks are available, but so far no
* inconsistency has occurred (this will change with
* the first write to the address space occupied by
* a defective subdisk). A RAID 5 plex with one subdisk
* down will remain degraded even after a write */
plex_degraded,
/* A plex which is really up, but which has a reborn
* subdisk which we don't completely trust, and
* which we don't want to read if we can avoid it */
plex_flaky,
/* A plex entry which is completely up. All subdisks
* are up. */
plex_up,
plex_laststate = plex_up /* last value, for table dimensions */
};
/* subdisk states */
enum sdstate {
/* An empty entry, not a subdisk at all. */
sd_unallocated,
/* A subdisk entry which has not been created
* completely. Some fields may be empty.
*/
sd_uninit,
/* A subdisk entry which has been created completely.
* All fields are correct, but the disk hasn't
* been updated.
*/
sd_init,
/* A subdisk entry which has been created completely and
* which is currently being initialized */
sd_initializing,
/* A subdisk entry which has been created completely.
* All fields are correct, and the disk has been
* updated, but there is no data on the disk.
*/
sd_empty,
/* *** The following states represent invalid data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down, and as a result updates have been
* missed.
*/
sd_obsolete,
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down, updates have been lost, and then
* the drive came up again.
*/
sd_stale,
/* *** The following states represent valid, inaccessible data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down. No attempt has been made to write
* to the subdisk since the crash.
*/
sd_crashed,
/* A subdisk entry which was up, which contained
* valid data, and which was taken down by the
* administrator. The data is valid. */
sd_down,
/* *** The following states represent accessible subdisks
* with valid data */
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down and up again. No updates were lost,
* but it is possible that the subdisk has been
* damaged. We won't read from this subdisk if we
* have a choice. If this is the only subdisk which
* covers this address space in the plex, we set its
* state to sd_up under these circumstances, so this
* status implies that there is another subdisk to
* fulfil the request.
*/
sd_reborn,
/* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data is valid.
*/
sd_up,
sd_laststate = sd_up /* last value, for table dimensions */
};
enum drivestate {
drive_unallocated,
/* present but unused. Must be 0 */
drive_uninit,
/* just mentioned in some other config entry */
drive_down,
/* not accessible */
drive_coming_up,
/* in the process of being brought up */
drive_up,
/* up and running */
drive_laststate = drive_up /* last value, for table dimensions */
};

View File

@ -0,0 +1,510 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumvar.h,v 1.15 1998/08/14 06:36:41 grog Exp grog $
*/
/* XXX gdb can't find our global pointers, so use this kludge to
* point to them locally. Remove after testing */
#define BROKEN_GDB struct _vinum_conf *VC = &vinum_conf
#include <sys/time.h>
#include "vinumstate.h"
/* Some configuration maxima. They're an enum because
* we can't define global constants. Sorry about that.
*
* These aren't as bad as they look: most of them
* are soft limits. Only the MAXCONFIG parameter is set in stone
*/
enum constants {
VINUM_HEADER = 512, /* size of header on disk */
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
/* XXX Do we still need this? */
MINVINUMSLICE = 1048576, /* minimum size of a slice */
CDEV_MAJOR = 91, /* major number for character device */
BDEV_MAJOR = 25, /* and block device */
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
/* type field in minor number */
VINUM_VOLUME_TYPE = 0,
VINUM_PLEX_TYPE = 1,
VINUM_SD_TYPE = 2,
VINUM_DRIVE_TYPE = 3,
VINUM_SUPERDEV_TYPE = 4, /* super device. */
/* Shifts for the individual fields in the device */
VINUM_TYPE_SHIFT = 28,
VINUM_VOL_SHIFT = 0,
VINUM_PLEX_SHIFT = 16,
VINUM_SD_SHIFT = 20,
VINUM_VOL_WIDTH = 8,
VINUM_PLEX_WIDTH = 3,
VINUM_SD_WIDTH = 8,
MAJORDEV_SHIFT = 8,
/* Create a block device number */
#define VINUMBDEV(v,p,s,t) ((BDEV_MAJOR << MAJORDEV_SHIFT) \
| (v << VINUM_VOL_SHIFT) \
| (p << VINUM_PLEX_SHIFT) \
| (s << VINUM_SD_SHIFT) \
| (t << VINUM_TYPE_SHIFT) )
/* And a character device number */
#define VINUMCDEV(v,p,s,t) ((CDEV_MAJOR << MAJORDEV_SHIFT) \
| (v << VINUM_VOL_SHIFT) \
| (p << VINUM_PLEX_SHIFT) \
| (s << VINUM_SD_SHIFT) \
| (t << VINUM_TYPE_SHIFT) )
/* extract device type */
#define DEVTYPE(x) ((x >> VINUM_TYPE_SHIFT) & 7)
/* extract volume number */
#define VOLNO(x) (x & ((1 << VINUM_VOL_WIDTH) - 1))
/* extract plex number */
#define PLEXNO(x) (VOL [VOLNO (x)].plex [(x >> VINUM_PLEX_SHIFT) & ((1 << VINUM_PLEX_WIDTH) - 1)])
/* extract subdisk number */
#define SDNO(x) (PLEX [PLEXNO (x)].sdnos [(x >> VINUM_SD_SHIFT) & ((1 << VINUM_SD_WIDTH) - 1)])
/* extract drive number */
#define DRIVENO(x) (SD [SDNO (x)].driveno)
VINUM_SUPERDEV = VINUMBDEV(0, 0, 0, VINUM_SUPERDEV_TYPE), /* superdevice number */
/* the number of object entries to cater for initially, and also the
* value by which they are incremented. It doesn't take long
* to extend them, so theoretically we could start with 1 of each, but
* it's untidy to allocate such small areas. These values are
* probably too small.
*/
INITIAL_DRIVES = 4,
INITIAL_VOLUMES = 4,
INITIAL_PLEXES = 8,
INITIAL_SUBDISKS = 16,
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
INITIAL_LOCKS = 8, /* number of locks to allocate to a volume */
DEFAULT_REVIVE_BLOCKSIZE = 32768, /* size of block to transfer in one op */
};
/* device numbers */
/*
* 31 30 28 27 20 19 18 16 15 8 7 0
* |-----------------------------------------------------------------------------------------------|
* |X | Type | Subdisk number | X| Plex | Major number | volume number |
* |-----------------------------------------------------------------------------------------------|
*
* 0x2 03 1 19 06
*/
struct devcode {
/* CARE. These fields assume a big-endian word. On a
* little-endian system, they're the wrong way around */
unsigned volume:8; /* up to 256 volumes */
unsigned major:8; /* this is where the major number fits */
unsigned plex:3; /* up to 8 plexes per volume */
unsigned unused:1; /* up for grabs */
unsigned sd:8; /* up to 256 subdisks per plex */
unsigned type:3; /* type of object */
/* type field
VINUM_VOLUME = 0,
VINUM_PLEX = 1,
VINUM_SUBDISK = 2,
VINUM_DRIVE = 3,
VINUM_SUPERDEV = 4, */
unsigned signbit:1; /* to make 32 bits */
};
#define VINUM_DIR "/dev/vinum"
#define VINUM_RDIR "/dev/rvinum"
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control"
#define MAXDRIVENAME 32 /* maximum length of a device name */
#define MAXSDNAME 64 /* maximum length of a subdisk name */
#define MAXPLEXNAME 64 /* maximum length of a plex name */
#define MAXVOLNAME 64 /* maximum length of a volume name */
#define MAXNAME 64 /* maximum length of any name */
#define MAXVOLPLEX 8 /* maximum number of plexes in a volume */
/* Flags for all objects. Most of them only apply to
* specific objects, but we have space for all in any
* 32 bit flags word. */
enum objflags {
VF_LOCKED = 1, /* somebody has locked access to this object */
VF_LOCKING = 2, /* we want access to this object */
VF_WRITETHROUGH = 8, /* volume: write through */
VF_INITED = 0x10, /* unit has been initialized */
VF_WLABEL = 0x20, /* label area is writable */
VF_LABELLING = 0x40, /* unit is currently being labelled */
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
VF_RAW = 0x100, /* raw volume (no file system) */
VF_LOADED = 0x200, /* module is loaded */
VF_CONFIGURING = 0x400, /* somebody is changing the config */
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
VF_KERNELOP = 0x8000, /* we're performing ops from kernel space */
};
/* Global configuration information for the vinum subsystem */
struct _vinum_conf {
/* Pointers to vinum structures */
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *volume;
/* the number allocated */
int drives_allocated;
int subdisks_allocated;
int plexes_allocated;
int volumes_allocated;
/* and the number currently in use */
int drives_used;
int subdisks_used;
int plexes_used;
int volumes_used;
int flags;
int opencount; /* number of times we've been opened */
#if DEBUG
int lastrq;
struct buf *lastbuf;
#endif
};
/* Use these defines to simplify code */
#define DRIVE vinum_conf.drive
#define SD vinum_conf.sd
#define PLEX vinum_conf.plex
#define VOL vinum_conf.volume
#define VFLAGS vinum_conf.flags
/* Slice header
* Vinum drives start with this structure:
*
* Sector
* |--------------------------------------|
* | PDP-11 memorial boot block | 0
* |--------------------------------------|
* | Disk label, maybe | 1
* |--------------------------------------|
* | Slice definition (vinum_hdr) | 2
* |--------------------------------------|
* | |
* | Configuration info, first copy | 3
* | |
* |--------------------------------------|
* | |
* | Configuration info, second copy | 3 + size of config
* | |
* |--------------------------------------|
*/
/* Sizes and offsets of our information */
enum {
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
VINUMHEADERLEN = 512, /* size of vinum label */
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
MAXCONFIG = 65536, /* and size of config copy */
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
};
/* hostname is 256 bytes long, but we don't need to shlep
* multiple copies in vinum. We use the host name just
* to identify this system, and 32 bytes should be ample
* for that purpose */
#define VINUMHOSTNAMELEN 32
struct vinum_label {
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
char name[MAXDRIVENAME]; /* our name of the drive */
struct timeval date_of_birth; /* the time it was created */
struct timeval last_update; /* and the time of last update */
off_t drive_size; /* total size in bytes of the drive.
* This value includes the headers */
};
struct vinum_hdr {
long long magic; /* we're long on magic numbers */
/* XXX Get these right for big-endian */
#define VINUM_MAGIC 22322600044678729LL /* should be this */
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
int config_length; /* size in bytes of each copy of the
* configuration info.
* This must be a multiple of the sector size. */
struct vinum_label label; /* unique label */
};
/* Information returned from read_drive_label */
enum drive_label_info {
DL_CANT_OPEN, /* invalid partition */
DL_NOT_OURS, /* valid partition, but no vinum label */
DL_DELETED_LABEL, /* valid partition, deleted label found */
DL_WRONG_DRIVE, /* drive name doesn't match */
DL_OURS /* valid partition and label found */
};
/*** Drive definitions ***/
/* A drive corresponds to a disk slice. We use a different term to show
* the difference in usage: it doesn't have to be a slice, and could
* theroretically be a complete, unpartitioned disk */
struct drive {
enum drivestate state; /* current state */
int subdisks_allocated; /* number of entries in sd */
int subdisks_used; /* and the number used */
int blocksize; /* size of fs blocks */
u_int64_t sectors_available; /* number of sectors still available */
int secsperblock;
int lasterror; /* last error on drive */
int driveno; /* index of drive in vinum_conf */
int opencount; /* number of up subdisks */
u_int64_t reads; /* number of reads on this drive */
u_int64_t writes; /* number of writes on this drive */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
dev_t dev; /* and device number */
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
struct vnode *vp; /* vnode pointer */
struct proc *p;
struct vinum_label label; /* and the label information */
struct partinfo partinfo; /* partition information */
int freelist_size; /* number of entries alloced in free list */
int freelist_entries; /* number of entries used in free list */
struct drive_freelist { /* sorted list of free space on drive */
u_int64_t offset;
long sectors;
} *freelist;
};
/*** Subdisk definitions ***/
struct sd {
enum sdstate state; /* state */
/* offsets in blocks */
int64_t driveoffset; /* offset on drive */
int64_t plexoffset; /* offset in plex */
u_int64_t sectors; /* and length in sectors */
int plexno; /* index of plex, if it belongs */
int driveno; /* index of the drive on which it is located */
int sdno; /* our index in vinum_conf */
int pid; /* pid of process which opened us */
u_int64_t reads; /* number of reads on this subdisk */
u_int64_t writes; /* number of writes on this subdisk */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
char name[MAXSDNAME]; /* name of subdisk */
};
/*** Plex definitions ***/
/* kinds of plex organization */
enum plexorg {
plex_disorg, /* disorganized */
plex_concat, /* concatenated plex */
plex_striped, /* striped plex */
plex_raid5 /* RAID5 plex */
};
/* Region in plex (either defective or unmapped) */
struct plexregion {
u_int64_t offset; /* start of region */
u_int64_t length; /* length */
};
struct plex {
enum plexorg organization; /* Plex organization */
enum plexstate state; /* and current state */
u_int64_t length; /* total length of plex (max offset) */
int flags;
int stripesize; /* size of stripe or raid band, in sectors */
int subdisks; /* number of associated subdisks */
int subdisks_allocated; /* number of subdisks allocated space for */
int *sdnos; /* list of component subdisks */
int plexno; /* index of plex in vinum_conf */
int volno; /* index of volume */
int volplexno; /* number of plex in volume */
int pid; /* pid of process which opened us */
/* Lock information */
int locks; /* number of locks used */
int alloclocks; /* number of locks allocated */
struct rangelock *lock; /* ranges of locked addresses */
/* Statistics */
u_int64_t reads; /* number of reads on this plex */
u_int64_t writes; /* number of writes on this plex */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t multiblock; /* requests that needed more than one block */
u_int64_t multistripe; /* requests that needed more than one stripe */
/* revive parameters */
u_int64_t revived; /* block number of current revive request */
int revive_blocksize; /* revive block size (bytes) */
int revive_interval; /* and time to wait between transfers */
struct request *waitlist; /* list of requests waiting on revive op */
/* geometry control */
int defective_regions; /* number of regions which are defective */
int defective_region_count; /* number of entries in defective_region */
struct plexregion *defective_region; /* list of offset/length pairs: defective sds */
int unmapped_regions; /* number of regions which are missing */
int unmapped_region_count; /* number of entries in unmapped_region */
struct plexregion *unmapped_region; /* list of offset/length pairs: missing sds */
char name[MAXPLEXNAME]; /* name of plex */
};
/*** Volume definitions ***/
#define MAXPLEX 8 /* maximum number of plexes */
struct volume {
enum volumestate state; /* current state */
int plexes; /* number of plexes */
int preferred_plex; /* plex to read from, -1 for round-robin */
int last_plex_read; /* index of plex used for last read,
* for round-robin */
dev_t devno; /* device number */
int flags; /* status and configuration flags */
int opencount; /* number of opens (all the same process) */
int openflags; /* flags supplied to last open(2) */
u_int64_t size; /* size of volume */
int disk; /* disk index */
int blocksize; /* logical block size */
int active; /* number of outstanding requests active */
int subops; /* and the number of suboperations */
pid_t pid; /* pid of locker */
/* Statistics */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t reads; /* number of reads on this volume */
u_int64_t writes; /* number of writes on this volume */
u_int64_t recovered_reads; /* reads recovered from another plex */
/* Unlike subdisks in the plex, space for the plex pointers is static */
int plex[MAXPLEX]; /* index of plexes */
char name[MAXVOLNAME]; /* name of volume */
struct disklabel label; /* for DIOCGPART */
};
/* Table expansion. Expand table, which contains oldcount
* entries of type element, by increment entries, and change
* oldcount accordingly */
#define EXPAND(table, element, oldcount, increment) \
{ \
expand_table ((void **) &table, \
oldcount * sizeof (element), \
(oldcount + increment) * sizeof (element) ); \
oldcount += increment; \
}
/* Information on vinum's memory usage */
struct meminfo {
int mallocs; /* number of malloced blocks */
int total_malloced; /* total amount malloced */
int highwater; /* maximum number of mallocs */
struct mc *malloced; /* pointer to kernel table */
};
struct mc {
int seq;
int size;
short line;
short flags;
#define ALLOC_KVA 1 /* allocated via kva calls */
int *databuf; /* really vm_object_t */
caddr_t address;
char file[16];
};
/* These enums are used by the state transition
* routines. They're in bit map format:
*
* Bit 0: Other plexes in the volume are down
* Bit 1: Other plexes in the volume are up
* Bit 2: The current plex is up
* Maybe they should be local to
* state.c */
enum volplexstate {
volplex_onlyusdown = 0, /* we're the only plex, and we're down */
volplex_alldown, /* 1: another plex is down, and so are we */
volplex_otherup, /* 2: another plex is up */
volplex_otherupdown, /* other plexes are up and down */
volplex_onlyus, /* 4: we're up and alone */
volplex_onlyusup, /* only we are up, others are down */
volplex_allup, /* all plexes are up */
volplex_someup /* some plexes are up, including us */
};
/* state map for plex */
enum sdstates {
sd_emptystate = 1,
sd_downstate = 2, /* found an SD which is down */
sd_crashedstate = 4, /* found an SD which is crashed */
sd_obsoletestate = 8, /* found an SD which is obsolete */
sd_stalestate = 16, /* found an SD which is stale */
sd_rebornstate = 32, /* found an SD which is reborn */
sd_upstate = 64, /* found an SD which is up */
sd_initstate = 128, /* found an SD which is init */
sd_otherstate = 256 /* found an SD in some other state */
};
/* This is really just a parameter to pass to
* set_<foo>_state, but since it needs to be known
* in the external definitions, we need to define
* it here */
enum setstateflags {
setstate_none = 0, /* no flags */
setstate_force = 1, /* force the state change */
setstate_configuring = 2, /* we're currently configuring, don't save */
setstate_recursing = 4, /* we're called from another setstate function */
setstate_norecurse = 8 /* don't call other setstate functions */
};
#ifdef DEBUG
/* Debugging stuff */
#define DEBUG_ADDRESSES 1
#define DEBUG_NUMOUTPUT 2
#endif