Import base vinum lkm sources
This commit is contained in:
commit
633c70539f
37
lkm/vinum/COPYRIGHT
Normal file
37
lkm/vinum/COPYRIGHT
Normal file
@ -0,0 +1,37 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
|
||||
*/
|
26
lkm/vinum/Makefile
Normal file
26
lkm/vinum/Makefile
Normal file
@ -0,0 +1,26 @@
|
||||
# $Id: Makefile.lkm.lite,v 1.2 1998/08/13 06:07:29 grog Exp grog $
|
||||
|
||||
.PATH: ${.CURDIR}/../../sys/dev/ccd
|
||||
KMOD= vinum_mod
|
||||
SRCS= vinum.c vinum.h vnode_if.h parser.c config.c io.c util.c vinumhdr.h request.h \
|
||||
state.c memory.c request.c lock.c vinumext.h vinumio.h vinumkw.h \
|
||||
vinumstate.h vinumvar.h revive.c vinumioctl.c interrupt.c
|
||||
NOMAN=
|
||||
PSEUDO_LKM=
|
||||
CFLAGS = -I. -O -g -I/usr/include/machine -DDEBUG -Wall -Wno-unused -Wno-parentheses
|
||||
|
||||
CLEANFILES+= vinum.h vnode_if.h vnode_if.c
|
||||
|
||||
all:
|
||||
|
||||
# We don't need this, but the Makefile wants it
|
||||
vinum.h:
|
||||
touch $@
|
||||
|
||||
state.h: maketabs vinumstate.h
|
||||
./maketabs >state.h
|
||||
|
||||
maketabs: maketabs.c
|
||||
${CC} -g -o maketabs maketabs.c
|
||||
|
||||
.include <bsd.kmod.mk>
|
1712
lkm/vinum/config.c
Normal file
1712
lkm/vinum/config.c
Normal file
File diff suppressed because it is too large
Load Diff
190
lkm/vinum/interrupt.c
Normal file
190
lkm/vinum/interrupt.c
Normal file
@ -0,0 +1,190 @@
|
||||
/* interrupt.c: bottom half of the driver */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: interrupt.c,v 1.1 1998/08/13 06:12:27 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
void freerq(struct request *rq);
|
||||
void free_rqg(struct rqgroup *rqg);
|
||||
void complete_rqe(struct buf *bp);
|
||||
void sdio_done(struct buf *bp);
|
||||
|
||||
/* Take a completed buffer, transfer the data back if
|
||||
* it's a read, and complete the high-level request
|
||||
* if this is the last subrequest.
|
||||
*
|
||||
* The bp parameter is in fact a struct rqelement, which
|
||||
* includes a couple of extras at the end.
|
||||
*/
|
||||
void
|
||||
complete_rqe(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct rqelement *rqe;
|
||||
struct request *rq;
|
||||
struct rqgroup *rqg;
|
||||
struct buf *ubp; /* user buffer */
|
||||
|
||||
rqe = (struct rqelement *) bp; /* point to the element element that completed */
|
||||
rqg = rqe->rqg; /* and the request group */
|
||||
rq = rqg->rq; /* and the complete request */
|
||||
|
||||
if ((bp->b_flags & B_ERROR) != 0) { /* transfer in error */
|
||||
if (bp->b_error != 0) /* did it return a number? */
|
||||
rq->error = bp->b_error; /* yes, put it in. */
|
||||
else if (rq->error == 0) /* no: do we have one already? */
|
||||
rq->error = EIO; /* no: catchall "I/O error" */
|
||||
if (rq->error == EIO) /* I/O error, */
|
||||
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* take the subdisk down */
|
||||
}
|
||||
/* Now update the statistics */
|
||||
if (bp->b_flags & B_READ) { /* read operation */
|
||||
DRIVE[rqe->driveno].reads++;
|
||||
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
|
||||
SD[rqe->sdno].reads++;
|
||||
SD[rqe->sdno].bytes_read += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].reads++;
|
||||
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
|
||||
} else { /* write operation */
|
||||
DRIVE[rqe->driveno].writes++;
|
||||
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
|
||||
SD[rqe->sdno].writes++;
|
||||
SD[rqe->sdno].bytes_written += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].writes++;
|
||||
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
|
||||
}
|
||||
ubp = rq->bp; /* user buffer */
|
||||
rqg->active--; /* one less request active */
|
||||
if (rqg->active == 0) /* request group finished, */
|
||||
rq->active--; /* one less */
|
||||
if (rq->active == 0) { /* request finished, */
|
||||
#if DEBUG
|
||||
if (debug & 4) {
|
||||
if (ubp->b_resid != 0) /* still something to transfer? */
|
||||
Debugger("resid");
|
||||
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ubp->b_bcount; i += 512) /* XXX debug */
|
||||
if (((char *) ubp->b_data)[i] != '<') { /* and not what we expected */
|
||||
printf("At 0x%x (offset 0x%x): '%c' (0x%x)\n",
|
||||
(int) (&((char *) ubp->b_data)[i]),
|
||||
i,
|
||||
((char *) ubp->b_data)[i],
|
||||
((char *) ubp->b_data)[i]);
|
||||
Debugger("complete_request checksum");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (rq->error) { /* did we have an error? */
|
||||
ubp->b_flags |= B_ERROR; /* yes, propagate to user */
|
||||
ubp->b_error = rq->error;
|
||||
} else
|
||||
ubp->b_resid = 0; /* completed our transfer */
|
||||
if (rq->isplex == 0) /* volume request, */
|
||||
VOL[rq->volplex.volno].active--; /* another request finished */
|
||||
biodone(ubp); /* top level buffer completed */
|
||||
freerq(rq); /* return the request storage */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Free a request block and anything hanging off it */
|
||||
void
|
||||
freerq(struct request *rq)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct rqgroup *rqg;
|
||||
struct rqgroup *nrqg; /* next in chain */
|
||||
int rqno;
|
||||
|
||||
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
|
||||
for (rqno = 0; rqno < rqg->count; rqno++)
|
||||
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
|
||||
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
|
||||
Free(rqg->rqe[rqno].b.b_data); /* free it */
|
||||
nrqg = rqg->next; /* note the next one */
|
||||
Free(rqg); /* and free this one */
|
||||
}
|
||||
Free(rq); /* free the request itself */
|
||||
}
|
||||
|
||||
void
|
||||
free_rqg(struct rqgroup *rqg)
|
||||
{
|
||||
if ((rqg->flags & XFR_GROUPOP) /* RAID 5 request */
|
||||
&&(rqg->rqe) /* got a buffer structure */
|
||||
&&(rqg->rqe->b.b_data)) /* and it has a buffer allocated */
|
||||
Free(rqg->rqe->b.b_data); /* free it */
|
||||
}
|
||||
|
||||
/* I/O on subdisk completed */
|
||||
void
|
||||
sdio_done(struct buf *bp)
|
||||
{
|
||||
struct sdbuf *sbp;
|
||||
|
||||
sbp = (struct sdbuf *) bp;
|
||||
if (sbp->b.b_flags & B_ERROR) { /* had an error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = sbp->b.b_error;
|
||||
}
|
||||
bp->b_resid = sbp->b.b_resid;
|
||||
biodone(sbp->bp); /* complete the caller's I/O */
|
||||
/* Now update the statistics */
|
||||
if (bp->b_flags & B_READ) { /* read operation */
|
||||
DRIVE[sbp->driveno].reads++;
|
||||
DRIVE[sbp->driveno].bytes_read += bp->b_bcount;
|
||||
SD[sbp->sdno].reads++;
|
||||
SD[sbp->sdno].bytes_read += bp->b_bcount;
|
||||
} else { /* write operation */
|
||||
DRIVE[sbp->driveno].writes++;
|
||||
DRIVE[sbp->driveno].bytes_written += bp->b_bcount;
|
||||
SD[sbp->sdno].writes++;
|
||||
SD[sbp->sdno].bytes_written += bp->b_bcount;
|
||||
}
|
||||
Free(sbp);
|
||||
}
|
886
lkm/vinum/io.c
Normal file
886
lkm/vinum/io.c
Normal file
@ -0,0 +1,886 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: io.c,v 1.16 1998/08/10 23:47:21 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#if __FreeBSD__ < 3 /* this is in sys/disklabel.h in 3.0 and on */
|
||||
#define DTYPE_VINUM 12 /* vinum volume */
|
||||
#endif
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
struct _ioctl_reply *ioctl_reply; /* data pointer, for returning error messages */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
extern struct proc *myproc;
|
||||
|
||||
/* Open the device associated with the drive, and set drive's vp */
|
||||
int
|
||||
open_drive(struct drive *drive, struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct nameidata nd;
|
||||
struct vattr va;
|
||||
int error;
|
||||
|
||||
if (drive->devicename[0] == '\0') /* no device name */
|
||||
sprintf(drive->devicename, "/dev/%s", drive->label.name); /* get it from the drive name */
|
||||
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, drive->devicename, p);
|
||||
error = vn_open(&nd, FREAD | FWRITE, 0); /* open the device */
|
||||
if (error != 0) { /* can't open? */
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = error;
|
||||
printf("vinum open_drive %s: failed with error %d\n", drive->devicename, error); /* XXX */
|
||||
return error;
|
||||
}
|
||||
drive->vp = nd.ni_vp;
|
||||
drive->p = p;
|
||||
|
||||
if (drive->vp->v_usecount > 1) { /* already in use? */
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = EBUSY;
|
||||
printf("vinum open_drive %s: Drive in use\n", drive->devicename); /* XXX */
|
||||
return EBUSY;
|
||||
}
|
||||
error = VOP_GETATTR(drive->vp, &va, NOCRED, p);
|
||||
if (error) {
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = error;
|
||||
printf("vinum open_drive %s: GETAATTR returns error %d\n", drive->devicename, error); /* XXX */
|
||||
return error;
|
||||
}
|
||||
drive->dev = va.va_rdev; /* device */
|
||||
|
||||
if (va.va_type != VBLK) { /* only consider block devices */
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1); /* this also closes the drive */
|
||||
drive->lasterror = ENOTBLK;
|
||||
printf("vinum open_drive %s: Not a block device\n", drive->devicename); /* XXX */
|
||||
return ENOTBLK;
|
||||
}
|
||||
drive->vp->v_numoutput = 0;
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set some variables in the drive struct
|
||||
* in more convenient form. Return error indication */
|
||||
int
|
||||
set_drive_parms(struct drive *drive)
|
||||
{
|
||||
drive->blocksize = BLKDEV_IOSIZE; /* XXX do we need this? */
|
||||
drive->secsperblock = drive->blocksize /* number of sectors per block */
|
||||
/ drive->partinfo.disklab->d_secsize;
|
||||
|
||||
/* Now update the label part */
|
||||
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
|
||||
#if __FreeBSD__ >= 3
|
||||
getmicrotime(&drive->label.date_of_birth); /* and current time */
|
||||
#else
|
||||
drive->label.date_of_birth = time; /* and current time */
|
||||
#endif
|
||||
drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
|
||||
*((u_int64_t) drive->partinfo.disklab->d_secsize);
|
||||
|
||||
/* number of sectors available for subdisks */
|
||||
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
|
||||
|
||||
/* XXX Bug in 3.0 as of January 1998: you can open
|
||||
* non-existent slices. They have a length of 0 */
|
||||
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
printf("vinum open_drive %s: Drive too small\n", drive->devicename); /* XXX */
|
||||
drive->lasterror = ENOSPC;
|
||||
return ENOSPC;
|
||||
}
|
||||
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
|
||||
drive->freelist = (struct drive_freelist *)
|
||||
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
|
||||
if (drive->freelist == NULL) /* can't malloc, dammit */
|
||||
return ENOSPC;
|
||||
drive->freelist_entries = 1; /* just (almost) the complete drive */
|
||||
drive->freelist[0].offset = DATASTART; /* starts here */
|
||||
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
|
||||
set_drive_state(drive->driveno, drive_up, 1); /* our drive is accessible */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initialize a drive: open the device and add device
|
||||
* information */
|
||||
int
|
||||
init_drive(struct drive *drive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
|
||||
if (drive->devicename[0] == '\0') { /* no device name yet, default to drive name */
|
||||
drive->lasterror = EINVAL;
|
||||
printf("vinum: Can't open drive without drive name\n"); /* XXX */
|
||||
return EINVAL;
|
||||
}
|
||||
error = open_drive(drive, myproc); /* open the drive */
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = VOP_IOCTL(drive->vp, /* get the partition information */
|
||||
DIOCGPART,
|
||||
(caddr_t) & drive->partinfo,
|
||||
FREAD,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error) {
|
||||
printf("vinum open_drive %s: Can't get partition information, error %d\n",
|
||||
drive->devicename,
|
||||
error); /* XXX */
|
||||
close_drive(drive);
|
||||
drive->lasterror = error;
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return error;
|
||||
}
|
||||
if (drive->partinfo.part->p_fstype != 0) { /* not plain */
|
||||
drive->lasterror = EFTYPE;
|
||||
printf("vinum open_drive %s: Wrong partition type for vinum\n", drive->devicename); /* XXX */
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return EFTYPE;
|
||||
}
|
||||
return set_drive_parms(drive); /* set various odds and ends */
|
||||
}
|
||||
|
||||
/* Close a drive if it's open. No errors */
|
||||
void
|
||||
close_drive(struct drive *drive)
|
||||
{
|
||||
if (drive->vp) {
|
||||
vn_close(drive->vp, FREAD | FWRITE, NOCRED, drive->p);
|
||||
drive->vp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove drive from the configuration.
|
||||
* Caller must ensure that it isn't active
|
||||
*/
|
||||
void
|
||||
remove_drive(int driveno)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct drive *drive = &vinum_conf.drive[driveno];
|
||||
long long int nomagic = VINUM_NOMAGIC; /* no magic number */
|
||||
|
||||
write_drive(drive, /* obliterate the magic, but leave a hint */
|
||||
(char *) &nomagic,
|
||||
8,
|
||||
VINUM_LABEL_OFFSET);
|
||||
close_drive(drive); /* and close it */
|
||||
drive->state = drive_unallocated; /* and forget everything we knew about it */
|
||||
save_config(); /* and save the updated configuration */
|
||||
}
|
||||
|
||||
/* Transfer drive data. Usually called from one of these defines;
|
||||
|
||||
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
|
||||
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
|
||||
*
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
driveio(struct drive *drive, void *buf, size_t length, off_t offset, int flag)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
int spl;
|
||||
|
||||
error = 0;
|
||||
|
||||
/* Get a buffer */
|
||||
bp = (struct buf *) Malloc(sizeof(struct buf)); /* get a buffer */
|
||||
CHECKALLOC(bp, "Can't allocate memory");
|
||||
|
||||
bzero(&buf, sizeof(buf));
|
||||
bp->b_flags = B_BUSY | flag; /* tell us when it's done */
|
||||
bp->b_iodone = drive_io_done; /* here */
|
||||
bp->b_proc = myproc; /* process */
|
||||
bp->b_dev = drive->vp->v_un.vu_specinfo->si_rdev; /* device */
|
||||
if (offset & (drive->partinfo.disklab->d_secsize - 1)) /* not on a block boundary */
|
||||
bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
|
||||
bp->b_data = buf;
|
||||
bp->b_vp = drive->vp; /* vnode */
|
||||
bp->b_bcount = length;
|
||||
bp->b_bufsize = length;
|
||||
|
||||
(*bdevsw[major(bp->b_dev)]->d_strategy) (bp); /* initiate the transfer */
|
||||
|
||||
spl = splbio();
|
||||
while ((bp->b_flags & B_DONE) == 0) {
|
||||
bp->b_flags |= B_CALL; /* wake me again */
|
||||
tsleep((caddr_t) bp, PRIBIO, "driveio", 0); /* and wait for it to complete */
|
||||
}
|
||||
splx(spl);
|
||||
if (bp->b_flags & B_ERROR) /* didn't work */
|
||||
error = bp->b_error; /* get the error return */
|
||||
Free(bp); /* then return the buffer */
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Read data from a drive
|
||||
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
read_drive(struct drive *drive, void *buf, size_t length, off_t offset)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
daddr_t nextbn;
|
||||
long bscale;
|
||||
|
||||
struct uio uio;
|
||||
struct iovec iov;
|
||||
daddr_t blocknum; /* block number */
|
||||
int blockoff; /* offset in block */
|
||||
int count; /* amount to transfer */
|
||||
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = length;
|
||||
|
||||
uio.uio_iov = &iov;
|
||||
uio.uio_iovcnt = length;
|
||||
uio.uio_offset = offset;
|
||||
uio.uio_resid = length;
|
||||
uio.uio_segflg = UIO_SYSSPACE;
|
||||
uio.uio_rw = UIO_READ;
|
||||
uio.uio_procp = myproc;
|
||||
|
||||
bscale = btodb(drive->blocksize); /* mask off offset from block number */
|
||||
do {
|
||||
blocknum = btodb(uio.uio_offset) & ~(bscale - 1); /* get the block number */
|
||||
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
|
||||
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
|
||||
uio.uio_resid);
|
||||
|
||||
/* XXX Check this. I think the test is wrong */
|
||||
if (drive->vp->v_lastr + bscale == blocknum) { /* did our last read finish in this block? */
|
||||
nextbn = blocknum + bscale; /* note the end of the transfer */
|
||||
error = breadn(drive->vp, /* and read with read-ahead */
|
||||
blocknum,
|
||||
(int) drive->blocksize,
|
||||
&nextbn,
|
||||
(int *) &drive->blocksize,
|
||||
1,
|
||||
NOCRED,
|
||||
&bp);
|
||||
} else /* random read: just read this block */
|
||||
error = bread(drive->vp, blocknum, (int) drive->blocksize, NOCRED, &bp);
|
||||
drive->vp->v_lastr = blocknum; /* note the last block we read */
|
||||
count = min(count, drive->blocksize - bp->b_resid);
|
||||
if (error) {
|
||||
brelse(bp);
|
||||
return error;
|
||||
}
|
||||
error = uiomove((char *) bp->b_data + blockoff, count, &uio); /* move the data */
|
||||
brelse(bp);
|
||||
}
|
||||
while (error == 0 && uio.uio_resid > 0 && count != 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Write data to a drive
|
||||
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
write_drive(struct drive *drive, void *buf, size_t length, off_t offset)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
struct uio uio;
|
||||
struct iovec iov;
|
||||
daddr_t blocknum; /* block number */
|
||||
int blockoff; /* offset in block */
|
||||
int count; /* amount to transfer */
|
||||
int blockshift;
|
||||
|
||||
if (drive->state == drive_down) /* currently down */
|
||||
return 0; /* ignore */
|
||||
if (drive->vp == NULL) {
|
||||
drive->lasterror = ENODEV;
|
||||
return ENODEV; /* not configured yet */
|
||||
}
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = length;
|
||||
|
||||
uio.uio_iov = &iov;
|
||||
uio.uio_iovcnt = length;
|
||||
uio.uio_offset = offset;
|
||||
uio.uio_resid = length;
|
||||
uio.uio_segflg = UIO_SYSSPACE;
|
||||
uio.uio_rw = UIO_WRITE;
|
||||
uio.uio_procp = myproc;
|
||||
|
||||
error = 0;
|
||||
blockshift = btodb(drive->blocksize) - 1; /* amount to shift block number
|
||||
* to get sector number */
|
||||
do {
|
||||
blocknum = btodb(uio.uio_offset) & ~blockshift; /* get the block number */
|
||||
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
|
||||
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
|
||||
uio.uio_resid);
|
||||
if (count == drive->blocksize) /* the whole block */
|
||||
bp = getblk(drive->vp, blocknum, drive->blocksize, 0, 0); /* just get it */
|
||||
else /* partial block: */
|
||||
error = bread(drive->vp, /* read it first */
|
||||
blocknum,
|
||||
drive->blocksize,
|
||||
NOCRED,
|
||||
&bp);
|
||||
count = min(count, drive->blocksize - bp->b_resid); /* how much will we transfer now? */
|
||||
if (error == 0)
|
||||
error = uiomove((char *) bp->b_data + blockoff, /* move the data to the block */
|
||||
count,
|
||||
&uio);
|
||||
if (error) {
|
||||
brelse(bp);
|
||||
drive->lasterror = error;
|
||||
switch (error) {
|
||||
case EIO:
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
break;
|
||||
|
||||
/* XXX Add other possibilities here */
|
||||
default:
|
||||
}
|
||||
return error;
|
||||
}
|
||||
if (count + blockoff == drive->blocksize)
|
||||
/* The transfer goes to the end of the block. There's
|
||||
* no need to wait for any more data to arrive. */
|
||||
bawrite(bp); /* start the write now */
|
||||
else
|
||||
bdwrite(bp); /* do a delayed write */
|
||||
}
|
||||
while (error == 0 && uio.uio_resid > 0 && count != 0);
|
||||
if (error)
|
||||
drive->lasterror = error;
|
||||
return error; /* OK */
|
||||
}
|
||||
|
||||
/* Wake up on completion */
|
||||
void
|
||||
drive_io_done(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
wakeup((caddr_t) bp); /* Wachet auf! */
|
||||
bp->b_flags &= ~B_CALL; /* don't do this again */
|
||||
}
|
||||
|
||||
/* Check a drive for a vinum header. If found,
|
||||
* update the drive information. We come here
|
||||
* with a partially populated drive structure
|
||||
* which includes the device name.
|
||||
*
|
||||
* Return information on what we found
|
||||
*/
|
||||
enum drive_label_info
|
||||
read_drive_label(struct drive *drive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
int result; /* result of our search */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
|
||||
error = init_drive(drive); /* find the drive */
|
||||
if (error) /* find the drive */
|
||||
return DL_CANT_OPEN; /* not ours */
|
||||
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
|
||||
CHECKALLOC(vhdr, "Can't allocate memory");
|
||||
|
||||
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
|
||||
if (drive->label.name[0] /* we have a name for this drive */
|
||||
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
|
||||
drive->lasterror = EINVAL;
|
||||
result = DL_WRONG_DRIVE; /* it's the wrong drive */
|
||||
} else {
|
||||
set_drive_parms(drive); /* and set other parameters */
|
||||
result = DL_OURS;
|
||||
}
|
||||
/* We copy the drive anyway so that we have
|
||||
* the correct name in the drive info. This
|
||||
* may not be the name specified */
|
||||
drive->label = vhdr->label; /* put in the label information */
|
||||
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
|
||||
result = DL_DELETED_LABEL;
|
||||
else
|
||||
result = DL_NOT_OURS; /* we could have it, but we don't yet */
|
||||
Free(vhdr); /* that's all. */
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Check a drive for a vinum header. If found,
|
||||
* read configuration information from the drive and
|
||||
* incorporate the data into the configuration.
|
||||
*
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
check_drive(char *drivename)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct nameidata nd; /* mount point credentials */
|
||||
char *config_text; /* read the config info from disk into here */
|
||||
volatile char *cptr; /* pointer into config information */
|
||||
char *eptr; /* end pointer into config information */
|
||||
int driveno;
|
||||
struct drive *drive;
|
||||
char *config_line; /* copy the config line to */
|
||||
|
||||
driveno = find_drive_by_dev(drivename, 1); /* doesn't exist, create it */
|
||||
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
|
||||
strcpy(drive->devicename, drivename); /* put in device name */
|
||||
|
||||
if (read_drive_label(drive) == DL_OURS) { /* ours! */
|
||||
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_text, "Can't allocate memory");
|
||||
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_line, "Can't allocate memory");
|
||||
|
||||
/* Read in both copies of the configuration information */
|
||||
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
|
||||
|
||||
if (error != 0) {
|
||||
printf("vinum: Can't read device %s, error %d\n", drive->devicename, error);
|
||||
Free(config_text);
|
||||
Free(config_line);
|
||||
free_drive(drive); /* give it back */
|
||||
return error;
|
||||
}
|
||||
/* XXX At this point, check that the two copies are the same, and do something useful if not.
|
||||
* In particular, consider which is newer, and what this means for the integrity of the
|
||||
* data on the drive */
|
||||
|
||||
/* Parse the configuration, and add it to the global configuration */
|
||||
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
|
||||
volatile int parse_status; /* return value from parse_config */
|
||||
|
||||
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
|
||||
*eptr++ = *cptr++;
|
||||
*eptr = '\0'; /* and delimit */
|
||||
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
|
||||
parse_status = parse_config(config_line, &keyword_set); /* parse the config line */
|
||||
if (parse_status < 0) { /* error in config */
|
||||
/* This config should have been parsed in user
|
||||
* space. If we run into problems here, something
|
||||
* serious is afoot. Complain and let the user
|
||||
* snarf the config to see what's wrong */
|
||||
printf("vinum: Config error on drive %s, aborting integration\n", nd.ni_dirp);
|
||||
Free(config_text);
|
||||
Free(config_line);
|
||||
free_drive(drive); /* give it back */
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
while (*cptr == '\n')
|
||||
cptr++; /* skip to next line */
|
||||
}
|
||||
Free(config_text);
|
||||
if ((vinum_conf.flags & VF_READING_CONFIG) == 0) /* not reading config */
|
||||
updateconfig(0); /* update object states */
|
||||
printf("vinum: read configuration from %s\n", drivename);
|
||||
return 0; /* it all worked */
|
||||
} else { /* no vinum label found */
|
||||
if (drive->lasterror) {
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return drive->lasterror;
|
||||
} else
|
||||
return ENODEV; /* not our device */
|
||||
}
|
||||
}
|
||||
|
||||
/* Kludge: kernel printf doesn't handle longs correctly XXX */
|
||||
static char *lltoa(long long l, char *s);
|
||||
static char *sappend(char *txt, char *s);
|
||||
|
||||
static char *
|
||||
lltoa(long long l, char *s)
|
||||
{
|
||||
if (l < 0) {
|
||||
*s++ = '-';
|
||||
l = -l;
|
||||
}
|
||||
if (l > 9) {
|
||||
s = lltoa(l / 10, s);
|
||||
l %= 10;
|
||||
}
|
||||
*s++ = l + '0';
|
||||
return s;
|
||||
}
|
||||
|
||||
static char *
|
||||
sappend(char *txt, char *s)
|
||||
{
|
||||
while (*s++ = *txt++);
|
||||
return s - 1;
|
||||
}
|
||||
|
||||
/* Format the configuration in text form into the buffer
|
||||
* at config. Don't go beyond len bytes
|
||||
* XXX this stinks. Fix soon. */
|
||||
void
|
||||
format_config(char *config, int len)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
int j;
|
||||
char *s = config;
|
||||
|
||||
bzero(config, len);
|
||||
|
||||
/* First write the drive configuration */
|
||||
for (i = 0; i < vinum_conf.drives_used; i++) {
|
||||
struct drive *drive;
|
||||
|
||||
drive = &vinum_conf.drive[i];
|
||||
if (drive->state != drive_unallocated) {
|
||||
sprintf(s,
|
||||
"drive %s state %s device %s\n",
|
||||
drive->label.name,
|
||||
drive_state(drive->state),
|
||||
drive->devicename);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Then the volume configuration */
|
||||
for (i = 0; i < vinum_conf.volumes_used; i++) {
|
||||
struct volume *vol;
|
||||
|
||||
vol = &vinum_conf.volume[i];
|
||||
if (vol->state != volume_unallocated) {
|
||||
if (vol->preferred_plex >= 0) /* preferences, */
|
||||
sprintf(s,
|
||||
"volume %s state %s readpol prefer %s",
|
||||
vol->name,
|
||||
volume_state(vol->state),
|
||||
vinum_conf.plex[vol->preferred_plex].name);
|
||||
else /* default round-robin */
|
||||
sprintf(s,
|
||||
"volume %s state %s",
|
||||
vol->name,
|
||||
volume_state(vol->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
s = sappend("\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Then the plex configuration */
|
||||
for (i = 0; i < vinum_conf.plexes_used; i++) {
|
||||
struct plex *plex;
|
||||
|
||||
plex = &vinum_conf.plex[i];
|
||||
if (plex->state != plex_unallocated) {
|
||||
sprintf(s, "plex name %s state %s org %s ",
|
||||
plex->name,
|
||||
plex_state(plex->state),
|
||||
plex_org(plex->organization));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if ((plex->organization == plex_striped)
|
||||
) {
|
||||
sprintf(s, "%db ", (int) plex->stripesize);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
}
|
||||
if (plex->volno >= 0) /* we have a volume */
|
||||
sprintf(s, "vol %s ", vinum_conf.volume[plex->volno].name);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
for (j = 0; j < plex->subdisks; j++) {
|
||||
sprintf(s, " sd %s", vinum_conf.sd[plex->sdnos[j]].name);
|
||||
}
|
||||
s = sappend("\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* And finally the subdisk configuration */
|
||||
for (i = 0; i < vinum_conf.subdisks_used; i++) {
|
||||
struct sd *sd = &vinum_conf.sd[i]; /* XXX */
|
||||
if (vinum_conf.sd[i].state != sd_unallocated) {
|
||||
sprintf(s,
|
||||
"sd name %s drive %s plex %s state %s len ",
|
||||
sd->name,
|
||||
vinum_conf.drive[sd->driveno].label.name,
|
||||
vinum_conf.plex[sd->plexno].name,
|
||||
sd_state(sd->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
s = lltoa(sd->sectors, s);
|
||||
s = sappend("b driveoffset ", s);
|
||||
s = lltoa(sd->driveoffset, s);
|
||||
s = sappend("b plexoffset ", s);
|
||||
s = lltoa(sd->plexoffset, s);
|
||||
s = sappend("b\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Write the configuration to all vinum slices */
|
||||
int
|
||||
save_config(void)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
int written_config; /* set when we firstnwrite the config to disk */
|
||||
int driveno;
|
||||
struct drive *drive; /* point to current drive info */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
char *config; /* point to config data */
|
||||
int wlabel_on; /* to set writing label on/off */
|
||||
|
||||
/* don't save the configuration while we're still working on it */
|
||||
if (vinum_conf.flags & VF_CONFIGURING)
|
||||
return 0;
|
||||
written_config = 0; /* no config written yet */
|
||||
/* Build a volume header */
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
|
||||
CHECKALLOC(vhdr, "Can't allocate config data");
|
||||
vhdr->magic = VINUM_MAGIC; /* magic number */
|
||||
vhdr->config_length = MAXCONFIG; /* length of following config info */
|
||||
|
||||
config = Malloc(MAXCONFIG); /* get space for the config data */
|
||||
CHECKALLOC(config, "Can't allocate config data");
|
||||
|
||||
format_config(config, MAXCONFIG);
|
||||
error = 0; /* no errors yet */
|
||||
for (driveno = 0; driveno < vinum_conf.drives_used; driveno++) {
|
||||
drive = &vinum_conf.drive[driveno]; /* point to drive */
|
||||
|
||||
if (drive->state != drive_down) {
|
||||
#if (__FreeBSD__ >= 3)
|
||||
getmicrotime(&drive->label.last_update); /* time of last update is now */
|
||||
#else
|
||||
drive->label.last_update = time; /* time of last update is now */
|
||||
#endif
|
||||
bcopy((char *) &drive->label, /* and the label info from the drive structure */
|
||||
(char *) &vhdr->label,
|
||||
sizeof(vhdr->label));
|
||||
if ((drive->state != drive_unallocated)
|
||||
&& (drive->state != drive_uninit)) {
|
||||
wlabel_on = 1; /* enable writing the label */
|
||||
error = VOP_IOCTL(drive->vp, /* make the label writeable */
|
||||
DIOCWLABEL,
|
||||
(caddr_t) & wlabel_on,
|
||||
FWRITE,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error == 0)
|
||||
error = write_drive(drive, vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (error == 0)
|
||||
error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET);
|
||||
wlabel_on = 0; /* enable writing the label */
|
||||
VOP_IOCTL(drive->vp, /* make the label non-writeable again */
|
||||
DIOCWLABEL,
|
||||
(caddr_t) & wlabel_on,
|
||||
FWRITE,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error) {
|
||||
printf("vinum: Can't write config to %s, error %d\n", drive->devicename, error);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
} else
|
||||
written_config = 1; /* we've written it on at least one drive */
|
||||
}
|
||||
}
|
||||
}
|
||||
Free(vhdr);
|
||||
Free(config);
|
||||
return written_config == 0; /* return 1 if we failed to write config */
|
||||
}
|
||||
|
||||
/* Disk labels are a mess. The correct way to access them
|
||||
* is with the DIOC[GSW]DINFO ioctls, but some programs, such
|
||||
* as newfs, access the disk directly, so we have to write
|
||||
* things there. We do this only on request. If a user
|
||||
* request tries to read it directly, we fake up one on the fly.
|
||||
*/
|
||||
|
||||
/* get_volume_label returns a label structure to lp, which
|
||||
* is allocated by the caller */
|
||||
void
|
||||
get_volume_label(struct volume *vol, struct disklabel *lp)
|
||||
{
|
||||
bzero(lp, sizeof(struct disklabel));
|
||||
|
||||
strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
|
||||
lp->d_type = DTYPE_VINUM;
|
||||
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
|
||||
lp->d_rpm = 14400 * vol->plexes; /* to keep them guessing */
|
||||
lp->d_interleave = 1;
|
||||
lp->d_flags = 0;
|
||||
|
||||
/* Fitting unto the vine, a vinum has a single
|
||||
* track with all its sectors */
|
||||
lp->d_secsize = DEV_BSIZE; /* bytes per sector */
|
||||
lp->d_nsectors = vol->size; /* data sectors per track */
|
||||
lp->d_ntracks = 1; /* tracks per cylinder */
|
||||
lp->d_ncylinders = 1; /* data cylinders per unit */
|
||||
lp->d_secpercyl = vol->size; /* data sectors per cylinder */
|
||||
lp->d_secperunit = vol->size; /* data sectors per unit */
|
||||
|
||||
lp->d_bbsize = BBSIZE;
|
||||
lp->d_sbsize = SBSIZE;
|
||||
|
||||
lp->d_magic = DISKMAGIC;
|
||||
lp->d_magic2 = DISKMAGIC;
|
||||
|
||||
/* Set up partitions a, b and c to be identical
|
||||
* and the size of the volume. a is UFS, b is
|
||||
* swap, c is nothing */
|
||||
lp->d_partitions[0].p_size = vol->size;
|
||||
lp->d_partitions[0].p_fsize = 1024;
|
||||
lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
|
||||
lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */
|
||||
lp->d_partitions[0].p_frag = 8; /* and fragments per block */
|
||||
lp->d_partitions[SWAP_PART].p_size = vol->size;
|
||||
lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
|
||||
lp->d_partitions[LABEL_PART].p_size = vol->size;
|
||||
lp->d_npartitions = LABEL_PART + 1;
|
||||
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
|
||||
lp->d_checksum = dkcksum(lp);
|
||||
}
|
||||
|
||||
int
|
||||
write_volume_label(int volno)
|
||||
{
|
||||
struct disklabel *lp;
|
||||
struct buf *bp;
|
||||
struct disklabel *dlp;
|
||||
struct volume *vol;
|
||||
int error;
|
||||
|
||||
lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
|
||||
if (lp == 0)
|
||||
return ENOMEM;
|
||||
|
||||
if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_used) /* invalid volume */
|
||||
return ENOENT;
|
||||
|
||||
vol = &VOL[volno]; /* volume in question */
|
||||
if (vol->state == volume_unallocated) /* nothing there */
|
||||
return ENOENT;
|
||||
|
||||
get_volume_label(vol, lp); /* get the label */
|
||||
|
||||
/* Now write to disk. This code is derived from the
|
||||
* system writedisklabel (), which does silly things
|
||||
* like reading the label and refusing to write
|
||||
* unless it's already there. */
|
||||
bp = geteblk((int) lp->d_secsize); /* get a buffer */
|
||||
bp->b_dev = minor(vol->devno) | (CDEV_MAJOR << MAJORDEV_SHIFT); /* our own raw volume */
|
||||
bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
|
||||
bp->b_bcount = lp->d_secsize;
|
||||
bzero(bp->b_data, lp->d_secsize);
|
||||
dlp = (struct disklabel *) bp->b_data;
|
||||
*dlp = *lp;
|
||||
bp->b_flags &= ~B_INVAL;
|
||||
bp->b_flags |= B_BUSY | B_WRITE;
|
||||
vinumstrategy(bp); /* write it out */
|
||||
error = biowait(bp);
|
||||
bp->b_flags |= B_INVAL | B_AGE;
|
||||
brelse(bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Initialize a subdisk */
|
||||
int
|
||||
initsd(int sdno)
|
||||
{
|
||||
return 0;
|
||||
}
|
137
lkm/vinum/lock.c
Normal file
137
lkm/vinum/lock.c
Normal file
@ -0,0 +1,137 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: lock.c,v 1.6 1998/07/28 06:32:57 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
|
||||
/* Lock routines. Currently, we lock either an individual volume
|
||||
* or the global configuration. I don't think tsleep and
|
||||
* wakeup are SMP safe. FIXME XXX */
|
||||
|
||||
/* Lock a volume, wait if it's in use */
|
||||
int
|
||||
lockvol(struct volume *vol)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((vol->flags & VF_LOCKED) != 0) {
|
||||
vol->flags |= VF_LOCKING;
|
||||
/* It would seem to make more sense to sleep on
|
||||
* the address 'vol'. Unfortuntaly we can't
|
||||
* guarantee that this address won't change due to
|
||||
* table expansion. The address we choose won't change. */
|
||||
if ((error = tsleep(&vinum_conf.volume + vol->devno,
|
||||
PRIBIO | PCATCH,
|
||||
"volock",
|
||||
0)) != 0)
|
||||
return error;
|
||||
}
|
||||
vol->flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock a volume and let the next one at it */
|
||||
void
|
||||
unlockvol(struct volume *vol)
|
||||
{
|
||||
vol->flags &= ~VF_LOCKED;
|
||||
if ((vol->flags & VF_LOCKING) != 0) {
|
||||
vol->flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf.volume + vol->devno);
|
||||
}
|
||||
}
|
||||
|
||||
/* Lock a plex, wait if it's in use */
|
||||
int
|
||||
lockplex(struct plex *plex)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((plex->flags & VF_LOCKED) != 0) {
|
||||
plex->flags |= VF_LOCKING;
|
||||
/* It would seem to make more sense to sleep on
|
||||
* the address 'plex'. Unfortuntaly we can't
|
||||
* guarantee that this address won't change due to
|
||||
* table expansion. The address we choose won't change. */
|
||||
if ((error = tsleep(&vinum_conf.plex + plex->sdnos[0],
|
||||
PRIBIO | PCATCH,
|
||||
"plexlk",
|
||||
0)) != 0)
|
||||
return error;
|
||||
}
|
||||
plex->flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock a plex and let the next one at it */
|
||||
void
|
||||
unlockplex(struct plex *plex)
|
||||
{
|
||||
plex->flags &= ~VF_LOCKED;
|
||||
if ((plex->flags & VF_LOCKING) != 0) {
|
||||
plex->flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf.plex + plex->plexno);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Get a lock for the global config, wait if it's not available */
|
||||
int
|
||||
lock_config(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((vinum_conf.flags & VF_LOCKED) != 0) {
|
||||
vinum_conf.flags |= VF_LOCKING;
|
||||
if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0)
|
||||
return error;
|
||||
}
|
||||
vinum_conf.flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock and wake up any waiters */
|
||||
void
|
||||
unlock_config(void)
|
||||
{
|
||||
vinum_conf.flags &= ~VF_LOCKED;
|
||||
if ((vinum_conf.flags & VF_LOCKING) != 0) {
|
||||
vinum_conf.flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf);
|
||||
}
|
||||
}
|
40
lkm/vinum/makestatetext
Executable file
40
lkm/vinum/makestatetext
Executable file
@ -0,0 +1,40 @@
|
||||
#!/bin/sh
|
||||
# Make statetexts.h from vinumstate.h
|
||||
# $Id: makestatetext,v 1.4 1998/03/13 05:36:16 grog Exp grog $
|
||||
infile=vinumstate.h
|
||||
ofile=statetexts.h
|
||||
cat <COPYRIGHT > $ofile
|
||||
|
||||
echo >>$ofile "/* Created by $0 on" `date`. "Do not edit */"
|
||||
echo >>$ofile
|
||||
echo >>$ofile "/* Drive state texts */"
|
||||
echo >>$ofile "char *drivestatetext [] =
|
||||
{ "
|
||||
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
FOO
|
186
lkm/vinum/memory.c
Normal file
186
lkm/vinum/memory.c
Normal file
@ -0,0 +1,186 @@
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: memory.c,v 1.16 1998/08/08 04:43:22 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#define USES_VM
|
||||
#include "vinumhdr.h"
|
||||
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
void freedatabuf(struct mc *me);
|
||||
caddr_t allocdatabuf(struct mc *me);
|
||||
|
||||
void
|
||||
expand_table(void **table, int oldsize, int newsize)
|
||||
{
|
||||
if (newsize > oldsize) {
|
||||
int *temp;
|
||||
|
||||
temp = (int *) Malloc(newsize); /* allocate a new table */
|
||||
CHECKALLOC(temp, "vinum: Can't expand table\n");
|
||||
if (*table != NULL) { /* already something there, */
|
||||
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
|
||||
Free(*table);
|
||||
}
|
||||
*table = temp;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef DEBUG
|
||||
/* increase the size of a request block */
|
||||
void
|
||||
expandrq(struct plexrq *prq)
|
||||
{
|
||||
expand_table((void **) &prq->rqe,
|
||||
prq->requests * sizeof(struct rqelement),
|
||||
(prq->requests + RQELTS) * sizeof(struct rqelement));
|
||||
bzero(&prq->rqe[prq->requests], RQELTS * sizeof(struct rqelement)); /* clear the new part */
|
||||
prq->rqcount += RQELTS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if DEBUG /* XXX debug */
|
||||
#define MALLOCENTRIES 16384
|
||||
int malloccount = 0;
|
||||
int highwater = 0; /* highest index ever allocated */
|
||||
static struct mc malloced[MALLOCENTRIES];
|
||||
|
||||
static total_malloced;
|
||||
|
||||
caddr_t
|
||||
MMalloc(int size, char *file, int line)
|
||||
{
|
||||
caddr_t result;
|
||||
int i;
|
||||
static int seq = 0;
|
||||
int s;
|
||||
struct mc me; /* information to pass to allocdatabuf */
|
||||
|
||||
if (malloccount >= MALLOCENTRIES) { /* too many */
|
||||
printf("vinum: can't allocate table space to trace memory allocation");
|
||||
return 0; /* can't continue */
|
||||
}
|
||||
result = malloc(size, M_DEVBUF, M_WAITOK); /* use malloc for smaller and irregular stuff */
|
||||
if (result == NULL)
|
||||
printf("vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
|
||||
else {
|
||||
me.flags = 0; /* allocation via malloc */
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if (((result + size) > malloced[i].address)
|
||||
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
|
||||
Debugger("Malloc overlap");
|
||||
}
|
||||
if (result) {
|
||||
i = malloccount++;
|
||||
total_malloced += size;
|
||||
malloced[i].address = result;
|
||||
malloced[i].size = size;
|
||||
malloced[i].line = line;
|
||||
malloced[i].seq = seq++;
|
||||
malloced[i].flags = me.flags;
|
||||
malloced[i].databuf = me.databuf; /* only used with kva alloc */
|
||||
bcopy(file, malloced[i].file, min(strlen(file) + 1, 16));
|
||||
}
|
||||
if (malloccount > highwater)
|
||||
highwater = malloccount;
|
||||
splx(s);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
FFree(void *mem, char *file, int line)
|
||||
{
|
||||
int i;
|
||||
int s;
|
||||
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if ((caddr_t) mem == malloced[i].address) { /* found it */
|
||||
bzero(mem, malloced[i].size); /* XXX */
|
||||
free(mem, M_DEVBUF);
|
||||
malloccount--;
|
||||
total_malloced -= malloced[i].size;
|
||||
if (i < malloccount) /* more coming after */
|
||||
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
|
||||
splx(s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
splx(s);
|
||||
printf("Freeing unallocated data at 0x%08x from %s, line %d\n", (int) mem, file, line);
|
||||
Debugger("Free");
|
||||
}
|
||||
|
||||
void
|
||||
vinum_meminfo(caddr_t data)
|
||||
{
|
||||
struct meminfo *m = (struct meminfo *) data;
|
||||
|
||||
m->mallocs = malloccount;
|
||||
m->total_malloced = total_malloced;
|
||||
m->malloced = malloced;
|
||||
m->highwater = highwater;
|
||||
}
|
||||
|
||||
int
|
||||
vinum_mallocinfo(caddr_t data)
|
||||
{
|
||||
struct mc *m = (struct mc *) data;
|
||||
unsigned int ent = *(int *) data; /* 1st word is index */
|
||||
|
||||
if (ent >= malloccount)
|
||||
return ENOENT;
|
||||
m->address = malloced[ent].address;
|
||||
m->size = malloced[ent].size;
|
||||
m->line = malloced[ent].line;
|
||||
m->seq = malloced[ent].seq;
|
||||
bcopy(malloced[ent].file, m->file, 16);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
206
lkm/vinum/parser.c
Normal file
206
lkm/vinum/parser.c
Normal file
@ -0,0 +1,206 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: parser.c,v 1.11 1998/08/10 08:50:42 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file contains the parser for the configuration routines. It's used
|
||||
* both in the kernel and in the user interface program, thus the separate file. */
|
||||
|
||||
/* Go through a text and split up into text tokens. These are either non-blank
|
||||
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
|
||||
* " characters may be escaped by \, which otherwise has no special meaning.
|
||||
*
|
||||
* Delimit by following with a \0, and return pointers to the starts at token [].
|
||||
* Return the number of tokens found as the return value.
|
||||
*
|
||||
* This method has the restriction that a closing " or ' must be followed by
|
||||
* grey space.
|
||||
*
|
||||
* Error conditions are end of line before end of quote, or no space after
|
||||
* a closing quote. In this case, tokenize() returns -1. */
|
||||
|
||||
#include <sys/param.h>
|
||||
#ifdef KERNEL
|
||||
#undef KERNEL /* XXX */
|
||||
#define REALLYKERNEL
|
||||
#else
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
/* All this mess for a single struct definition */
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/device.h>
|
||||
#include <sys/disk.h>
|
||||
#include "sys/buf.h"
|
||||
|
||||
#include <vinumvar.h>
|
||||
#include "vinumkw.h"
|
||||
#include "vinumio.h"
|
||||
#include "vinumext.h"
|
||||
|
||||
#ifdef REALLYKERNEL
|
||||
#define isspace(c) ((c == ' ') || (c == '\t')) /* check for white space */
|
||||
#else /* get it from the headers */
|
||||
#include <ctype.h>
|
||||
#endif
|
||||
|
||||
/* enum keyword is defined in vinumvar.h */
|
||||
|
||||
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
|
||||
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
|
||||
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
|
||||
|
||||
/* Normal keywords. These are all the words that vinum knows. */
|
||||
struct _keywords keywords[] =
|
||||
{keypair(drive),
|
||||
keypair(sd),
|
||||
keypair(subdisk),
|
||||
keypair(plex),
|
||||
keypair(volume),
|
||||
keypair(vol),
|
||||
keypair(setupstate),
|
||||
keypair(readpol),
|
||||
keypair(org),
|
||||
keypair(name),
|
||||
keypair(writethrough),
|
||||
keypair(writeback),
|
||||
keypair(raw),
|
||||
keypair(device),
|
||||
keypair(concat),
|
||||
keypair(raid5),
|
||||
keypair(striped),
|
||||
keypair(plexoffset),
|
||||
keypair(driveoffset),
|
||||
keypair(length),
|
||||
keypair(len),
|
||||
keypair(state),
|
||||
keypair(round),
|
||||
keypair(prefer),
|
||||
keypair(rename),
|
||||
keypair(detached),
|
||||
#ifndef KERNEL /* for vinum(8) only */
|
||||
#ifdef DEBUG
|
||||
keypair(debug),
|
||||
#endif
|
||||
keypair(attach),
|
||||
keypair(detach),
|
||||
keypair(printconfig),
|
||||
keypair(replace),
|
||||
keypair(create),
|
||||
keypair(read),
|
||||
keypair(modify),
|
||||
keypair(list),
|
||||
keypair(l),
|
||||
keypair(ld),
|
||||
keypair(ls),
|
||||
keypair(lp),
|
||||
keypair(lv),
|
||||
keypair(info),
|
||||
keypair(set),
|
||||
keypair(rm),
|
||||
keypair(init),
|
||||
keypair(label),
|
||||
keypair(resetconfig),
|
||||
keypair(start),
|
||||
keypair(stop),
|
||||
keypair(resetstats)
|
||||
#endif
|
||||
};
|
||||
struct keywordset keyword_set = KEYWORDSET(keywords);
|
||||
|
||||
#ifndef KERNEL
|
||||
struct _keywords flag_keywords[] =
|
||||
{flagkeypair(f),
|
||||
flagkeypair(d),
|
||||
flagkeypair(v),
|
||||
flagkeypair(s),
|
||||
flagkeypair(r)
|
||||
};
|
||||
struct keywordset flag_set = KEYWORDSET(flag_keywords);
|
||||
|
||||
#endif
|
||||
|
||||
int
|
||||
tokenize(char *cptr, char *token[])
|
||||
{
|
||||
char delim; /* delimiter for searching for the partner */
|
||||
int tokennr; /* index of this token */
|
||||
tokennr = 0; /* none found yet */
|
||||
|
||||
for (;;) {
|
||||
while (isspace(*cptr))
|
||||
cptr++; /* skip initial white space */
|
||||
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
|
||||
return tokennr; /* return number of tokens found */
|
||||
delim = *cptr;
|
||||
token[tokennr] = cptr; /* point to it */
|
||||
tokennr++; /* one more */
|
||||
/* XXX this is broken. It leaves superfluous \\ characters in the text */
|
||||
if ((delim == '\'') || (delim == '"')) { /* delimitered */
|
||||
for (;;) {
|
||||
cptr++;
|
||||
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
|
||||
cptr++; /* move on past */
|
||||
if (!isspace(*cptr)) /* error, no space after closing quote */
|
||||
return -1;
|
||||
*cptr++ = '\0'; /* delimit */
|
||||
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
|
||||
return -1;
|
||||
}
|
||||
} else { /* not quoted */
|
||||
while ((*cptr != '\0') && (!isspace(*cptr)) && (*cptr != '\n'))
|
||||
cptr++;
|
||||
if (*cptr != '\0') /* not end of the line, */
|
||||
*cptr++ = '\0'; /* delimit and move to the next */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Find a keyword and return an index */
|
||||
enum keyword
|
||||
get_keyword(char *name, struct keywordset *keywordset)
|
||||
{
|
||||
int i;
|
||||
struct _keywords *keywords = keywordset->k; /* point to the keywords */
|
||||
for (i = 0; i < keywordset->size; i++)
|
||||
if (!strcmp(name, keywords[i].name))
|
||||
return (enum keyword) keywords[i].keyword;
|
||||
return kw_invalid_keyword;
|
||||
}
|
882
lkm/vinum/request.c
Normal file
882
lkm/vinum/request.c
Normal file
@ -0,0 +1,882 @@
|
||||
/* XXX to do:
|
||||
|
||||
* Decide where we need splbio ()
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: request.c,v 1.17 1998/08/13 06:04:47 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
extern struct proc *myproc;
|
||||
|
||||
enum requeststatus bre(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskstart,
|
||||
daddr_t diskend);
|
||||
enum requeststatus bre5(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskstart,
|
||||
daddr_t diskend);
|
||||
enum requeststatus build_read_request(struct request *rq, int volplexno);
|
||||
enum requeststatus build_write_request(struct request *rq);
|
||||
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
|
||||
void freerq(struct request *rq);
|
||||
void free_rqg(struct rqgroup *rqg);
|
||||
int find_alternate_sd(struct request *rq);
|
||||
int check_range_covered(struct request *);
|
||||
void complete_rqe(struct buf *bp);
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
int abortrequest(struct request *rq, int error);
|
||||
void sdio(struct buf *bp);
|
||||
void sdio_done(struct buf *bp);
|
||||
int vinum_bounds_check(struct buf *bp, struct volume *vol);
|
||||
caddr_t allocdatabuf(struct rqelement *rqe);
|
||||
void freedatabuf(struct rqelement *rqe);
|
||||
|
||||
void
|
||||
vinumstrategy(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int volno;
|
||||
struct volume *vol = NULL;
|
||||
int s;
|
||||
struct devcode *device = (struct devcode *) &bp->b_dev; /* decode device number */
|
||||
enum requeststatus status;
|
||||
|
||||
switch (device->type) {
|
||||
case VINUM_SD_TYPE:
|
||||
sdio(bp);
|
||||
return;
|
||||
|
||||
/* In fact, vinum doesn't handle drives: they're
|
||||
* handled directly by the disk drivers */
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return;
|
||||
|
||||
case VINUM_VOLUME_TYPE: /* volume I/O */
|
||||
volno = VOLNO(bp->b_dev);
|
||||
vol = &VOL[volno];
|
||||
if (vol->state != volume_up) { /* can't access this volume */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */
|
||||
biodone(bp); /* have nothing to do with this */
|
||||
return;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
/* Plex I/O is pretty much the same as volume I/O
|
||||
* for a single plex. Indicate this by passing a NULL
|
||||
* pointer (set above) for the volume */
|
||||
case VINUM_PLEX_TYPE:
|
||||
bp->b_resid = bp->b_bcount; /* transfer everything */
|
||||
vinumstart(bp, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start a transfer. Return -1 on error,
|
||||
* 0 if OK, 1 if we need to retry.
|
||||
* Parameter reviveok is set when doing
|
||||
* transfers for revives: it allows transfers to
|
||||
* be started immediately when a revive is in
|
||||
* progress. During revive, normal transfers
|
||||
* are queued if they share address space with
|
||||
* a currently active revive operation. */
|
||||
int
|
||||
vinumstart(struct buf *bp, int reviveok)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int plexno;
|
||||
int maxplex; /* maximum number of plexes to handle */
|
||||
struct volume *vol;
|
||||
struct rqgroup *rqg; /* current plex's requests */
|
||||
struct rqelement *rqe; /* individual element */
|
||||
struct request *rq; /* build up our request here */
|
||||
int rqno; /* index in request list */
|
||||
enum requeststatus status;
|
||||
|
||||
/* XXX In these routines, we're assuming that
|
||||
* we will always be called with bp->b_bcount
|
||||
* which is a multiple of the sector size. This
|
||||
* is a reasonable assumption, since we are only
|
||||
* called from system routines. Should we check
|
||||
* anyway? */
|
||||
|
||||
if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */
|
||||
bp->b_error = EINVAL; /* invalid size */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return -1;
|
||||
}
|
||||
rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
|
||||
if (rq == NULL) { /* can't do it */
|
||||
bp->b_error = ENOMEM; /* can't get memory */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return -1;
|
||||
}
|
||||
bzero(rq, sizeof(struct request));
|
||||
|
||||
/* Note the volume ID. This can be NULL, which
|
||||
* the request building functions use as an
|
||||
* indication for single plex I/O */
|
||||
rq->bp = bp; /* and the user buffer struct */
|
||||
|
||||
if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */
|
||||
rq->volplex.volno = VOLNO(bp->b_dev); /* get the volume number */
|
||||
vol = &VOL[rq->volplex.volno]; /* and point to it */
|
||||
vol->active++; /* one more active request */
|
||||
maxplex = vol->plexes; /* consider all its plexes */
|
||||
} else {
|
||||
vol = NULL; /* no volume */
|
||||
rq->volplex.plexno = PLEXNO(bp->b_dev); /* point to the plex */
|
||||
rq->isplex = 1; /* note that it's a plex */
|
||||
maxplex = 1; /* just the one plex */
|
||||
}
|
||||
|
||||
if (bp->b_flags & B_READ) {
|
||||
/* This is a read request. Decide
|
||||
* which plex to read from.
|
||||
*
|
||||
* There's a potential race condition here,
|
||||
* since we're not locked, and we could end
|
||||
* up multiply incrementing the round-robin
|
||||
* counter. This doesn't have any serious
|
||||
* effects, however. */
|
||||
if (vol != NULL) {
|
||||
vol->reads++;
|
||||
vol->bytes_read += bp->b_bcount;
|
||||
plexno = vol->preferred_plex; /* get the plex to use */
|
||||
if (plexno < 0) { /* round robin */
|
||||
plexno = vol->last_plex_read;
|
||||
vol->last_plex_read++;
|
||||
if (vol->last_plex_read == vol->plexes) /* got the the end? */
|
||||
vol->last_plex_read = 0; /* wrap around */
|
||||
}
|
||||
status = build_read_request(rq, plexno); /* build a request */
|
||||
} else {
|
||||
daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */
|
||||
status = bre(rq, /* build a request list */
|
||||
rq->volplex.plexno,
|
||||
&diskaddr,
|
||||
diskaddr + (bp->b_bcount / DEV_BSIZE));
|
||||
}
|
||||
|
||||
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
|
||||
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
|
||||
if (status == REQUEST_DOWN) { /* not enough subdisks */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
}
|
||||
biodone(bp);
|
||||
freerq(rq);
|
||||
return -1;
|
||||
}
|
||||
return launch_requests(rq, reviveok); /* now start the requests if we can */
|
||||
} else
|
||||
/* This is a write operation. We write to all
|
||||
* plexes. If this is a RAID 5 plex, we must also
|
||||
* update the parity stripe. */
|
||||
{
|
||||
if (vol != NULL) {
|
||||
vol->writes++;
|
||||
vol->bytes_written += bp->b_bcount;
|
||||
status = build_write_request(rq); /* Not all the subdisks are up */
|
||||
} else { /* plex I/O */
|
||||
daddr_t diskstart;
|
||||
|
||||
diskstart = bp->b_blkno; /* start offset of transfer */
|
||||
status = bre(rq,
|
||||
PLEXNO(bp->b_dev),
|
||||
&diskstart,
|
||||
bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */
|
||||
}
|
||||
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
|
||||
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
|
||||
if (status == REQUEST_DOWN) { /* not enough subdisks */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
}
|
||||
if ((bp->b_flags & B_DONE) == 0)
|
||||
biodone(bp);
|
||||
freerq(rq);
|
||||
return -1;
|
||||
}
|
||||
return launch_requests(rq, reviveok); /* start the requests */
|
||||
}
|
||||
}
|
||||
|
||||
/* Call the low-level strategy routines to
|
||||
* perform the requests in a struct request */
|
||||
int
|
||||
launch_requests(struct request *rq, int reviveok)
|
||||
{
|
||||
struct rqgroup *rqg;
|
||||
int rqno; /* loop index */
|
||||
struct rqelement *rqe; /* current element */
|
||||
int s;
|
||||
|
||||
/* First find out whether we're reviving, and the
|
||||
* request contains a conflict. If so, we hang
|
||||
* the request off plex->waitlist of the first
|
||||
* plex we find which is reviving */
|
||||
if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */
|
||||
&&(!reviveok)) { /* and we don't want to do it now, */
|
||||
struct volume *vol = &VOL[VOLNO(rq->bp->b_dev)];
|
||||
struct plex *plex;
|
||||
int plexno;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) { /* find the reviving plex */
|
||||
plex = &PLEX[vol->plex[plexno]];
|
||||
if (plex->state == plex_reviving) /* found it */
|
||||
break;
|
||||
}
|
||||
if (plexno < vol->plexes) { /* found it? */
|
||||
struct request *waitlist = plex->waitlist; /* point to the waiting list */
|
||||
|
||||
while (waitlist->next != NULL) /* find the end */
|
||||
waitlist = waitlist->next;
|
||||
waitlist->next = rq; /* hook our request there */
|
||||
return 0; /* and get out of here */
|
||||
} else /* bad vinum, bad */
|
||||
printf("vinum: can't find reviving plex for volume %s\n", vol->name);
|
||||
}
|
||||
rq->active = 0; /* nothing yet */
|
||||
/* XXX This is probably due to a bug */
|
||||
if (rq->rqg == NULL) { /* no request */
|
||||
abortrequest(rq, EINVAL);
|
||||
return -1;
|
||||
}
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf("Request: %x\nWrite dev 0x%x, offset 0x%x, length %ld\n",
|
||||
(u_int) rq,
|
||||
rq->bp->b_dev,
|
||||
rq->bp->b_blkno,
|
||||
rq->bp->b_bcount); /* XXX */
|
||||
vinum_conf.lastrq = (int) rq;
|
||||
vinum_conf.lastbuf = rq->bp;
|
||||
#endif
|
||||
for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */
|
||||
rqg->active = rqg->count; /* they're all active */
|
||||
rq->active++; /* one more active request group */
|
||||
for (rqno = 0; rqno < rqg->count; rqno++) {
|
||||
rqe = &rqg->rqe[rqno];
|
||||
if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */
|
||||
rqg->active--; /* one less active request */
|
||||
else {
|
||||
struct drive *drive = &DRIVE[rqe->driveno]; /* drive to access */
|
||||
if ((rqe->b.b_flags & B_READ) == 0)
|
||||
rqe->b.b_vp->v_numoutput++; /* one more output going */
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
|
||||
rqe->b.b_flags & B_READ ? "Read" : "Write",
|
||||
rqe->b.b_dev,
|
||||
rqe->sdno,
|
||||
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
|
||||
rqe->b.b_blkno,
|
||||
rqe->b.b_bcount); /* XXX */
|
||||
if (debug & DEBUG_NUMOUTPUT)
|
||||
printf(" vinumstart sd %d numoutput %ld\n",
|
||||
rqe->sdno,
|
||||
rqe->b.b_vp->v_numoutput);
|
||||
#endif
|
||||
/* fire off the request */
|
||||
s = splbio();
|
||||
(*bdevsw[major(rqe->b.b_dev)]->d_strategy) (&rqe->b);
|
||||
splx(s);
|
||||
}
|
||||
/* XXX Do we need caching? Think about this more */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* define the low-level requests needed to perform a
|
||||
* high-level I/O operation for a specific plex 'plexno'.
|
||||
*
|
||||
* Return 0 if all subdisks involved in the request are up, 1 if some
|
||||
* subdisks are not up, and -1 if the request is at least partially
|
||||
* outside the bounds of the subdisks.
|
||||
*
|
||||
* Modify the pointer *diskstart to point to the end address. On
|
||||
* read, return on the first bad subdisk, so that the caller
|
||||
* (build_read_request) can try alternatives.
|
||||
*
|
||||
* On entry to this routine, the rqg structures are not assigned. The
|
||||
* assignment is performed by expandrq(). Strictly speaking, the
|
||||
* elements rqe->sdno of all entries should be set to -1, since 0
|
||||
* (from bzero) is a valid subdisk number. We avoid this problem by
|
||||
* initializing the ones we use, and not looking at the others (index
|
||||
* >= rqg->requests).
|
||||
*/
|
||||
enum requeststatus
|
||||
bre(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskaddr,
|
||||
daddr_t diskend)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
struct rqgroup *rqg;
|
||||
struct buf *bp; /* user's bp */
|
||||
struct plex *plex;
|
||||
enum requeststatus status; /* return value */
|
||||
daddr_t plexoffset; /* offset of transfer in plex */
|
||||
daddr_t stripebase; /* base address of stripe (1st subdisk) */
|
||||
daddr_t stripeoffset; /* offset in stripe */
|
||||
daddr_t blockoffset; /* offset in stripe on subdisk */
|
||||
struct rqelement *rqe; /* point to this request information */
|
||||
daddr_t diskstart = *diskaddr; /* remember where this transfer starts */
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
status = REQUEST_OK; /* return value: OK until proven otherwise */
|
||||
plex = &PLEX[plexno]; /* point to the plex */
|
||||
|
||||
switch (plex->organization) {
|
||||
case plex_concat:
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
if ((*diskaddr < (sd->plexoffset + sd->sectors)) /* The request starts before the end of this */
|
||||
&&(diskend > sd->plexoffset)) { /* subdisk and ends after the start of this sd */
|
||||
if ((sd->state != sd_up) || (plex->state != plex_up)) {
|
||||
enum requeststatus s;
|
||||
|
||||
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
|
||||
if (s) /* give up? */
|
||||
return s; /* yup */
|
||||
}
|
||||
rqg = allocrqg(rq, 1); /* space for the request */
|
||||
if (rqg == NULL) { /* malloc failed */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM;
|
||||
}
|
||||
rqg->plexno = plexno;
|
||||
|
||||
rqe = &rqg->rqe[0]; /* point to the element */
|
||||
rqe->rqg = rqg; /* group */
|
||||
rqe->sdno = sd->sdno; /* put in the subdisk number */
|
||||
plexoffset = max(sd->plexoffset, *diskaddr); /* start offset in plex */
|
||||
rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
|
||||
rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */
|
||||
rqe->dataoffset = 0;
|
||||
rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */
|
||||
sd->sectors - rqe->sdoffset);
|
||||
rqe->groupoffset = 0; /* no groups for concatenated plexes */
|
||||
rqe->grouplen = 0;
|
||||
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
|
||||
rqe->flags = 0;
|
||||
rqe->driveno = sd->driveno;
|
||||
*diskaddr += rqe->datalen; /* bump the address */
|
||||
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
|
||||
deallocrqg(rqg);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
}
|
||||
}
|
||||
if (*diskaddr > diskend) /* we're finished, */
|
||||
break; /* get out of here */
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_striped:
|
||||
{
|
||||
while (*diskaddr < diskend) { /* until we get it all sorted out */
|
||||
/* The offset of the start address from
|
||||
* the start of the stripe */
|
||||
stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
|
||||
|
||||
/* The plex-relative address of the
|
||||
* start of the stripe */
|
||||
stripebase = *diskaddr - stripeoffset;
|
||||
|
||||
/* The number of the subdisk in which
|
||||
* the start is located */
|
||||
sdno = stripeoffset / plex->stripesize;
|
||||
|
||||
/* The offset from the beginning of the stripe
|
||||
* on this subdisk */
|
||||
blockoffset = stripeoffset % plex->stripesize;
|
||||
|
||||
sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */
|
||||
if ((sd->state != sd_up) || (plex->state != plex_up)) {
|
||||
enum requeststatus s;
|
||||
|
||||
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
|
||||
if (s) /* give up? */
|
||||
return s; /* yup */
|
||||
}
|
||||
rqg = allocrqg(rq, 1); /* space for the request */
|
||||
if (rqg == NULL) { /* malloc failed */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM;
|
||||
}
|
||||
rqg->plexno = plexno;
|
||||
|
||||
rqe = &rqg->rqe[0]; /* point to the element */
|
||||
rqe->rqg = rqg;
|
||||
rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
|
||||
rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */
|
||||
rqe->dataoffset = 0;
|
||||
rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */
|
||||
plex->stripesize - blockoffset); /* and the amount left in this stripe */
|
||||
rqe->groupoffset = 0; /* no groups for striped plexes */
|
||||
rqe->grouplen = 0;
|
||||
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
|
||||
rqe->flags = 0;
|
||||
rqe->sdno = sd->sdno; /* put in the subdisk number */
|
||||
rqe->driveno = sd->driveno;
|
||||
|
||||
if (rqe->sdoffset >= sd->sectors) { /* starts beyond the end of the subdisk? */
|
||||
deallocrqg(rqg);
|
||||
return REQUEST_EOF;
|
||||
} else if (rqe->sdoffset + rqe->datalen > sd->sectors) /* ends beyond the end of the subdisk? */
|
||||
rqe->datalen = sd->sectors - rqe->sdoffset; /* yes, truncate */
|
||||
|
||||
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
|
||||
deallocrqg(rqg);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
}
|
||||
*diskaddr += rqe->datalen; /* look at the remainder */
|
||||
if (*diskaddr < diskend) { /* didn't finish the request on this stripe */
|
||||
plex->multiblock++; /* count another one */
|
||||
if (sdno == plex->subdisks - 1) /* last subdisk, */
|
||||
plex->multistripe++; /* another stripe as well */
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
printf("vinum: invalid plex type in bre");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Build up a request structure for reading volumes.
|
||||
* This function is not needed for plex reads, since there's
|
||||
* no recovery if a plex read can't be satisified. */
|
||||
enum requeststatus
|
||||
build_read_request(struct request *rq, /* request */
|
||||
int plexindex)
|
||||
{ /* index in the volume's plex table */
|
||||
BROKEN_GDB;
|
||||
struct buf *bp;
|
||||
daddr_t startaddr; /* offset of previous part of transfer */
|
||||
daddr_t diskaddr; /* offset of current part of transfer */
|
||||
daddr_t diskend; /* and end offset of transfer */
|
||||
int plexno; /* plex index in vinum_conf */
|
||||
struct rqgroup *rqg; /* point to the request we're working on */
|
||||
struct volume *vol; /* volume in question */
|
||||
off_t oldstart; /* note where we started */
|
||||
int recovered = 0; /* set if we recover a read */
|
||||
enum requeststatus status = REQUEST_OK;
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
diskaddr = bp->b_blkno; /* start offset of transfer */
|
||||
diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */
|
||||
rqg = &rq->rqg[plexindex]; /* plex request */
|
||||
vol = &VOL[rq->volplex.volno]; /* point to volume */
|
||||
|
||||
while (diskaddr < diskend) { /* build up request components */
|
||||
startaddr = diskaddr;
|
||||
status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
|
||||
switch (status) {
|
||||
case REQUEST_OK:
|
||||
continue;
|
||||
|
||||
case REQUEST_RECOVERED:
|
||||
recovered = 1;
|
||||
break;
|
||||
|
||||
case REQUEST_EOF:
|
||||
case REQUEST_ENOMEM:
|
||||
return status;
|
||||
|
||||
/* if we get here, we have either had a failure or
|
||||
* a RAID 5 recovery. We don't want to use the
|
||||
* recovery, because it's expensive, so first we
|
||||
* check if we have alternatives */
|
||||
case REQUEST_DOWN: /* can't access the plex */
|
||||
if (vol != NULL) { /* and this is volume I/O */
|
||||
/* Try to satisfy the request
|
||||
* from another plex */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
diskaddr = startaddr; /* start at the beginning again */
|
||||
oldstart = startaddr; /* and note where that was */
|
||||
if (plexno != plexindex) { /* don't try this plex again */
|
||||
bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
|
||||
if (diskaddr > oldstart) { /* we satisfied another part */
|
||||
recovered = 1; /* we recovered from the problem */
|
||||
status = REQUEST_OK; /* don't complain about it */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (plexno == (vol->plexes - 1)) /* couldn't satisfy the request */
|
||||
return REQUEST_DOWN; /* failed */
|
||||
}
|
||||
} else
|
||||
return REQUEST_DOWN; /* bad luck */
|
||||
}
|
||||
if (recovered)
|
||||
vol->recovered_reads += recovered; /* adjust our recovery count */
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Build up a request structure for writes.
|
||||
* Return 0 if all subdisks involved in the request are up, 1 if some
|
||||
* subdisks are not up, and -1 if the request is at least partially
|
||||
* outside the bounds of the subdisks. */
|
||||
enum requeststatus
|
||||
build_write_request(struct request *rq)
|
||||
{ /* request */
|
||||
BROKEN_GDB;
|
||||
struct buf *bp;
|
||||
daddr_t diskstart; /* offset of current part of transfer */
|
||||
daddr_t diskend; /* and end offset of transfer */
|
||||
int plexno; /* plex index in vinum_conf */
|
||||
struct volume *vol; /* volume in question */
|
||||
enum requeststatus status;
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
vol = &VOL[rq->volplex.volno]; /* point to volume */
|
||||
diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */
|
||||
status = REQUEST_OK;
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
diskstart = bp->b_blkno; /* start offset of transfer */
|
||||
status = min(status, bre(rq, /* build requests for the plex */
|
||||
vol->plex[plexno],
|
||||
&diskstart,
|
||||
diskend));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Fill in the struct buf part of a request element. */
|
||||
enum requeststatus
|
||||
build_rq_buffer(struct rqelement *rqe, struct plex *plex)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct sd *sd; /* point to subdisk */
|
||||
struct volume *vol;
|
||||
struct buf *bp;
|
||||
struct buf *ubp; /* user (high level) buffer header */
|
||||
|
||||
vol = &VOL[rqe->rqg->rq->volplex.volno];
|
||||
sd = &SD[rqe->sdno]; /* point to subdisk */
|
||||
bp = &rqe->b;
|
||||
ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */
|
||||
|
||||
/* Initialize the buf struct */
|
||||
bzero(&rqe->b, sizeof(struct buf));
|
||||
bp->b_proc = ubp->b_proc; /* process pointer */
|
||||
bp->b_flags = ubp->b_flags & (B_NOCACHE | B_READ | B_ASYNC); /* copy these flags from user bp */
|
||||
bp->b_flags |= B_CALL | B_BUSY; /* inform us when it's done */
|
||||
if (plex->state == plex_reviving)
|
||||
bp->b_flags |= B_ORDERED; /* keep request order if we're reviving */
|
||||
bp->b_iodone = complete_rqe; /* by calling us here */
|
||||
bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */
|
||||
bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */
|
||||
bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */
|
||||
bp->b_resid = bp->b_bcount; /* and it's still all waiting */
|
||||
bp->b_bufsize = bp->b_bcount; /* and buffer size */
|
||||
bp->b_vp = DRIVE[rqe->driveno].vp; /* drive vnode */
|
||||
bp->b_rcred = FSCRED; /* we have the file system credentials */
|
||||
bp->b_wcred = FSCRED; /* we have the file system credentials */
|
||||
|
||||
if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */
|
||||
bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */
|
||||
if (bp->b_data == NULL) { /* failed */
|
||||
Debugger("XXX");
|
||||
abortrequest(rqe->rqg->rq, ENOMEM);
|
||||
return REQUEST_ENOMEM; /* no memory */
|
||||
}
|
||||
} else
|
||||
/* Point directly to user buffer data. This means
|
||||
* that we don't need to do anything when we have
|
||||
* finished the transfer */
|
||||
bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
|
||||
return 0;
|
||||
}
|
||||
/* Abort a request: free resources and complete the
|
||||
* user request with the specified error */
|
||||
int
|
||||
abortrequest(struct request *rq, int error)
|
||||
{
|
||||
struct buf *bp = rq->bp; /* user buffer */
|
||||
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = error;
|
||||
freerq(rq); /* free everything we're doing */
|
||||
biodone(bp);
|
||||
return error; /* and give up */
|
||||
}
|
||||
|
||||
/* Check that our transfer will cover the
|
||||
* complete address space of the user request.
|
||||
*
|
||||
* Return 1 if it can, otherwise 0 */
|
||||
int
|
||||
check_range_covered(struct request *rq)
|
||||
{
|
||||
/* XXX */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Perform I/O on a subdisk */
|
||||
void
|
||||
sdio(struct buf *bp)
|
||||
{
|
||||
int s; /* spl */
|
||||
struct sd *sd;
|
||||
struct sdbuf *sbp;
|
||||
daddr_t endoffset;
|
||||
struct drive *drive;
|
||||
|
||||
sd = &SD[SDNO(bp->b_dev)]; /* point to the subdisk */
|
||||
drive = &DRIVE[sd->driveno];
|
||||
|
||||
if (drive->state != drive_up) { /* XXX until we get the states fixed */
|
||||
set_sd_state(SDNO(bp->b_dev), sd_obsolete, setstate_force);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = EIO;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
/* XXX decide which states we will really accept here. up
|
||||
* implies it could be involved with a plex, in which
|
||||
* case we don't want to dick with it */
|
||||
if ((sd->state != sd_up)
|
||||
&& (sd->state != sd_initializing)
|
||||
&& (sd->state != sd_reborn)) { /* we can't access it */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_flags = EIO;
|
||||
if (bp->b_flags & B_BUSY) /* XXX why isn't this always the case? */
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
/* Get a buffer */
|
||||
sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
|
||||
if (sbp == NULL) {
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
bcopy(bp, &sbp->b, sizeof(struct buf)); /* start with the user's buffer */
|
||||
sbp->b.b_flags |= B_CALL; /* tell us when it's done */
|
||||
sbp->b.b_iodone = sdio_done; /* here */
|
||||
sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */
|
||||
sbp->b.b_vp = DRIVE[sd->driveno].vp; /* vnode */
|
||||
sbp->b.b_blkno += sd->driveoffset;
|
||||
sbp->bp = bp; /* note the address of the original header */
|
||||
sbp->sdno = sd->sdno; /* note for statistics */
|
||||
sbp->driveno = sd->driveno;
|
||||
endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */
|
||||
if (endoffset > sd->sectors) { /* beyond the end */
|
||||
sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
|
||||
if (sbp->b.b_bcount <= 0) { /* nothing to transfer */
|
||||
bp->b_resid = bp->b_bcount; /* nothing transferred */
|
||||
/* XXX Grrr. This doesn't seem to work. Return
|
||||
* an error after all */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOSPC;
|
||||
biodone(bp);
|
||||
Free(sbp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if ((sbp->b.b_flags & B_READ) == 0) /* write */
|
||||
sbp->b.b_vp->v_numoutput++; /* one more output going */
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
|
||||
sbp->b.b_flags & B_READ ? "Read" : "Write",
|
||||
sbp->b.b_dev,
|
||||
sbp->sdno,
|
||||
(u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
|
||||
(int) sbp->b.b_blkno,
|
||||
sbp->b.b_bcount); /* XXX */
|
||||
if (debug & DEBUG_NUMOUTPUT)
|
||||
printf(" vinumstart sd %d numoutput %ld\n",
|
||||
sbp->sdno,
|
||||
sbp->b.b_vp->v_numoutput);
|
||||
#endif
|
||||
s = splbio();
|
||||
(*bdevsw[major(sbp->b.b_dev)]->d_strategy) (&sbp->b);
|
||||
splx(s);
|
||||
}
|
||||
|
||||
/* Simplified version of bounds_check_with_label
|
||||
* Determine the size of the transfer, and make sure it is
|
||||
* within the boundaries of the partition. Adjust transfer
|
||||
* if needed, and signal errors or early completion.
|
||||
*
|
||||
* Volumes are simpler than disk slices: they only contain
|
||||
* one component (though we call them a, b and c to make
|
||||
* system utilities happy), and they always take up the
|
||||
* complete space of the "partition".
|
||||
*
|
||||
* I'm still not happy with this: why should the label be
|
||||
* protected? If it weren't so damned difficult to write
|
||||
* one in the first pleace (because it's protected), it wouldn't
|
||||
* be a problem.
|
||||
*/
|
||||
int
|
||||
vinum_bounds_check(struct buf *bp, struct volume *vol)
|
||||
{
|
||||
int maxsize = vol->size; /* size of the partition (sectors) */
|
||||
int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
|
||||
|
||||
/* Would this transfer overwrite the disk label? */
|
||||
if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */
|
||||
#if LABELSECTOR != 0
|
||||
&& bp->b_blkno + size > LABELSECTOR /* and finishes after */
|
||||
#endif
|
||||
&& (!(vol->flags & VF_RAW)) /* and it's not raw */
|
||||
&&major(bp->b_dev) == BDEV_MAJOR /* and it's the block device */
|
||||
&& (bp->b_flags & B_READ) == 0 /* and it's a write */
|
||||
&& (!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */
|
||||
bp->b_error = EROFS; /* read-only */
|
||||
bp->b_flags |= B_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (size == 0) /* no transfer specified, */
|
||||
return 0; /* treat as EOF */
|
||||
/* beyond partition? */
|
||||
if (bp->b_blkno < 0 /* negative start */
|
||||
|| bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */
|
||||
/* if exactly at end of disk, return an EOF */
|
||||
if (bp->b_blkno == maxsize) {
|
||||
bp->b_resid = bp->b_bcount;
|
||||
return 0;
|
||||
}
|
||||
/* or truncate if part of it fits */
|
||||
size = maxsize - bp->b_blkno;
|
||||
if (size <= 0) { /* nothing to transfer */
|
||||
bp->b_error = EINVAL;
|
||||
bp->b_flags |= B_ERROR;
|
||||
return -1;
|
||||
}
|
||||
bp->b_bcount = size << DEV_BSHIFT;
|
||||
}
|
||||
bp->b_pblkno = bp->b_blkno;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Allocate a request group and hook
|
||||
* it in in the list for rq */
|
||||
struct rqgroup *
|
||||
allocrqg(struct request *rq, int elements)
|
||||
{
|
||||
struct rqgroup *rqg; /* the one we're going to allocate */
|
||||
int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
|
||||
|
||||
rqg = (struct rqgroup *) Malloc(size);
|
||||
if (rqg != NULL) { /* malloc OK, */
|
||||
if (rq->rqg) /* we already have requests */
|
||||
rq->lrqg->next = rqg; /* hang it off the end */
|
||||
else /* first request */
|
||||
rq->rqg = rqg; /* at the start */
|
||||
rq->lrqg = rqg; /* this one is the last in the list */
|
||||
|
||||
bzero(rqg, size); /* no old junk */
|
||||
rqg->rq = rq; /* point back to the parent request */
|
||||
rqg->count = elements; /* number of requests in the group */
|
||||
} else
|
||||
Debugger("XXX");
|
||||
return rqg;
|
||||
}
|
||||
|
||||
/* Deallocate a request group out of a chain. We do
|
||||
* this by linear search: the chain is short, this
|
||||
* almost never happens, and currently it can only
|
||||
* happen to the first member of the chain. */
|
||||
void
|
||||
deallocrqg(struct rqgroup *rqg)
|
||||
{
|
||||
struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */
|
||||
|
||||
if (rqg->rq->rqg == rqg) /* we're first in line */
|
||||
rqg->rq->rqg = rqg->next; /* unhook ourselves */
|
||||
else {
|
||||
while (rqgc->next != rqg) /* find the group */
|
||||
rqgc = rqgc->next;
|
||||
rqgc->next = rqg->next;
|
||||
}
|
||||
Free(rqgc);
|
||||
}
|
||||
|
||||
/* Character device interface */
|
||||
int
|
||||
vinumread(dev_t dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
return (physio(vinumstrategy, NULL, dev, 1, minphys, uio));
|
||||
}
|
||||
|
||||
int
|
||||
vinumwrite(dev_t dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
return (physio(vinumstrategy, NULL, dev, 0, minphys, uio));
|
||||
}
|
159
lkm/vinum/request.h
Normal file
159
lkm/vinum/request.h
Normal file
@ -0,0 +1,159 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: request.h,v 1.10 1998/08/03 07:15:26 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* Information needed to set up a transfer */
|
||||
|
||||
/* struct buf is surprisingly big (about 300
|
||||
* bytes), and it's part of the request, so this
|
||||
* value is really important. Most requests
|
||||
* don't need more than 2 subrequests per
|
||||
* plex. The table is automatically extended if
|
||||
* this value is too small. */
|
||||
#define RQELTS 2 /* default of 2 requests per transfer */
|
||||
|
||||
enum xferinfo {
|
||||
XFR_NORMAL_READ = 1,
|
||||
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
|
||||
XFR_RECOVERY_READ = 4,
|
||||
XFR_DEGRADED_WRITE = 8,
|
||||
XFR_PARITYLESS_WRITE = 0x10,
|
||||
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
|
||||
XFR_DATA_BLOCK = 0x40, /* data block in request */
|
||||
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
|
||||
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
|
||||
XFR_MALLOCED = 0x200, /* this buffer is malloced */
|
||||
#if DEBUG
|
||||
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
|
||||
#endif
|
||||
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
|
||||
/* operations that need a parity block */
|
||||
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
|
||||
/* operations that use the group parameters */
|
||||
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
|
||||
/* operations that that use the data parameters */
|
||||
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
|
||||
/* operations requiring read before write */
|
||||
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
|
||||
/* operations that need a malloced buffer */
|
||||
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
|
||||
};
|
||||
|
||||
/* Describe one low-level request, part
|
||||
* of a high-level request. This is an
|
||||
* extended struct buf buffer, and the first
|
||||
* element *must* be a struct buf. We pass this structure
|
||||
* to the I/O routines instead of a struct buf in oder
|
||||
* to be able to locate the high-level request when it
|
||||
* completes.
|
||||
*
|
||||
* All offsets and lengths are in "blocks", i.e. sectors */
|
||||
struct rqelement {
|
||||
struct buf b; /* buf structure */
|
||||
struct rqgroup *rqg; /* pointer to our group */
|
||||
/* Information about the transfer */
|
||||
daddr_t sdoffset; /* offset in subdisk */
|
||||
int useroffset; /* offset in user buffer of normal data */
|
||||
/* dataoffset and datalen refer to "individual"
|
||||
* data transfers (normal read, parityless write)
|
||||
* and also degraded write.
|
||||
*
|
||||
* groupoffset and grouplen refer to the other
|
||||
* "group" operations (normal write, recovery read)
|
||||
* Both the offsets are relative to the start of the
|
||||
* local buffer */
|
||||
int dataoffset; /* offset in buffer of the normal data */
|
||||
int groupoffset; /* offset in buffer of group data */
|
||||
short datalen; /* length of normal data (sectors) */
|
||||
short grouplen; /* length of group data (sectors) */
|
||||
short buflen; /* total buffer length to allocate */
|
||||
short flags; /* really enum xferinfo (see above) */
|
||||
/* Ways to find other components */
|
||||
short sdno; /* subdisk number */
|
||||
short driveno; /* drive number */
|
||||
};
|
||||
|
||||
/* A group of requests built to satisfy a certain
|
||||
* component of a user request */
|
||||
struct rqgroup {
|
||||
struct rqgroup *next; /* pointer to next group */
|
||||
struct request *rq; /* pointer to the request */
|
||||
short count; /* number of requests in this group */
|
||||
short active; /* and number active */
|
||||
short plexno; /* index of plex */
|
||||
int badsdno; /* index of bad subdisk or -1 */
|
||||
enum xferinfo flags; /* description of transfer */
|
||||
struct rqelement rqe[0]; /* and the elements of this request */
|
||||
};
|
||||
|
||||
/* Describe one high-level request and the
|
||||
* work we have to do to satisfy it */
|
||||
struct request {
|
||||
struct buf *bp; /* pointer to the high-level request */
|
||||
int flags;
|
||||
union {
|
||||
int volno; /* volume index */
|
||||
int plexno; /* or plex index */
|
||||
} volplex;
|
||||
int error; /* current error indication */
|
||||
short isplex; /* set if this is a plex request */
|
||||
short active; /* number of subrequests still active */
|
||||
struct rqgroup *rqg; /* pointer to the first group of requests */
|
||||
struct rqgroup *lrqg; /* and to the first group of requests */
|
||||
struct request *next; /* link of waiting requests */
|
||||
};
|
||||
|
||||
/* Extended buffer header for subdisk I/O. Includes
|
||||
* a pointer to the user I/O request. */
|
||||
struct sdbuf {
|
||||
struct buf b; /* our buffer */
|
||||
struct buf *bp; /* and pointer to parent */
|
||||
short driveno; /* drive index */
|
||||
short sdno; /* and subdisk index */
|
||||
};
|
||||
|
||||
/* Values returned by rqe and friends.
|
||||
* Be careful with these: they are in order of increasing
|
||||
* seriousness. Some routines check for > REQUEST_RECOVERED
|
||||
* to indicate a completely failed request. */
|
||||
enum requeststatus {
|
||||
REQUEST_OK, /* request built OK */
|
||||
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
|
||||
REQUEST_EOF, /* request failed: outside plex */
|
||||
REQUEST_DOWN, /* request failed: subdisk down */
|
||||
REQUEST_ENOMEM /* ran out of memory */
|
||||
};
|
128
lkm/vinum/revive.c
Normal file
128
lkm/vinum/revive.c
Normal file
@ -0,0 +1,128 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: revive.c,v 1.1 1998/08/14 06:16:59 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
|
||||
/* revive a block of a plex. Return an error
|
||||
* indication. EAGAIN means successful copy, but
|
||||
* that more blocks remain to be copied.
|
||||
* XXX We should specify a block size here. At the moment,
|
||||
* just take a default value. FIXME */
|
||||
int
|
||||
revive_block(int plexno)
|
||||
{
|
||||
struct plex *plex = &PLEX[plexno];
|
||||
struct buf *bp;
|
||||
int error = EAGAIN;
|
||||
int size;
|
||||
int s; /* priority level */
|
||||
|
||||
if (plex->revive_blocksize == 0) {
|
||||
if (plex->stripesize != 0) /* we're striped, don't revive more than */
|
||||
plex->revive_blocksize = min(DEFAULT_REVIVE_BLOCKSIZE, plex->stripesize); /* one block at a time */
|
||||
else
|
||||
plex->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
|
||||
}
|
||||
size = min(plex->revive_blocksize, plex->length - plex->revived) << DEV_BSHIFT;
|
||||
|
||||
s = splbio();
|
||||
/* Get a buffer */
|
||||
bp = geteblk(size);
|
||||
if (bp == NULL) {
|
||||
splx(s);
|
||||
return ENOMEM;
|
||||
}
|
||||
if (bp->b_qindex != 0) /* on a queue, */
|
||||
bremfree(bp); /* remove it */
|
||||
splx(s);
|
||||
|
||||
/* Amount to transfer: block size, unless it
|
||||
* would overlap the end */
|
||||
bp->b_bufsize = size;
|
||||
bp->b_bcount = bp->b_bufsize;
|
||||
bp->b_resid = 0x0;
|
||||
bp->b_blkno = plex->revived; /* we've got this far */
|
||||
|
||||
/* XXX what about reviving anonymous plexes? */
|
||||
|
||||
/* First, read the data from the volume. We don't
|
||||
* care which plex, that's bre's job */
|
||||
bp->b_dev = VINUMBDEV(plex->volno, 0, 0, VINUM_VOLUME_TYPE); /* create the device number */
|
||||
bp->b_flags = B_BUSY | B_READ;
|
||||
vinumstart(bp, 1);
|
||||
biowait(bp);
|
||||
if (bp->b_flags & B_ERROR)
|
||||
error = bp->b_error;
|
||||
else
|
||||
/* Now write to the plex */
|
||||
{
|
||||
s = splbio();
|
||||
if (bp->b_qindex != 0) /* on a queue, */
|
||||
bremfree(bp); /* remove it */
|
||||
splx(s);
|
||||
bp->b_dev = VINUMBDEV(plex->volno, plex->volplexno, 0, VINUM_PLEX_TYPE); /* create the device number */
|
||||
|
||||
bp->b_flags = B_BUSY; /* make this a write */
|
||||
bp->b_resid = 0x0;
|
||||
vinumstart(bp, 1);
|
||||
biowait(bp);
|
||||
if (bp->b_flags & B_ERROR)
|
||||
error = bp->b_error;
|
||||
else {
|
||||
plex->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
|
||||
if (plex->revived >= plex->length) { /* finished */
|
||||
plex->revived = 0;
|
||||
plex->state = plex_up; /* do we need to do more? */
|
||||
if (plex->volno >= 0) /* we have a volume, */
|
||||
set_volume_state(plex->volno, volume_up, 0);
|
||||
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
|
||||
save_config(); /* and save the updated configuration */
|
||||
error = 0; /* we're done */
|
||||
}
|
||||
}
|
||||
while (plex->waitlist) { /* we have waiting requests */
|
||||
launch_requests(plex->waitlist, 1); /* do them now */
|
||||
plex->waitlist = plex->waitlist->next; /* and move on to the next */
|
||||
}
|
||||
}
|
||||
if (bp->b_qindex == 0) /* not on a queue, */
|
||||
brelse(bp); /* is this kosher? */
|
||||
return error;
|
||||
}
|
755
lkm/vinum/state.c
Normal file
755
lkm/vinum/state.c
Normal file
@ -0,0 +1,755 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: state.c,v 2.6 1998/08/19 08:04:47 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
|
||||
/* Update drive state */
|
||||
/* Return 1 if the state changes, otherwise 0 */
|
||||
int
|
||||
set_drive_state(int driveno, enum drivestate state, int flags)
|
||||
{
|
||||
struct drive *drive = &DRIVE[driveno];
|
||||
int oldstate = drive->state;
|
||||
int sdno;
|
||||
|
||||
if (drive->state == drive_unallocated) /* no drive to do anything with, */
|
||||
return 0;
|
||||
|
||||
if (state != oldstate) { /* don't change it if it's not different */
|
||||
if (state == drive_down) { /* the drive's going down */
|
||||
if (flags || (drive->opencount == 0)) { /* we can do it */
|
||||
close_drive(drive);
|
||||
drive->state = state;
|
||||
printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
|
||||
} else
|
||||
return 0; /* don't do it */
|
||||
}
|
||||
drive->state = state; /* set the state */
|
||||
if (((drive->state == drive_up)
|
||||
|| ((drive->state == drive_coming_up)))
|
||||
&& (drive->vp == NULL)) /* should be open, but we're not */
|
||||
init_drive(drive); /* which changes the state again */
|
||||
if ((state != oldstate) /* state has changed */
|
||||
&&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
|
||||
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
|
||||
if (SD[sdno].driveno == driveno) /* belongs to this drive */
|
||||
set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
|
||||
}
|
||||
save_config(); /* and save the updated configuration */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try to set the subdisk state. Return 1 if state changed to
|
||||
* what we wanted, -1 if it changed to something else, and 0
|
||||
* if no change.
|
||||
*
|
||||
* This routine is called both from the user (up, down states
|
||||
* only) and internally.
|
||||
*/
|
||||
int
|
||||
set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
|
||||
{
|
||||
struct sd *sd = &SD[sdno];
|
||||
int oldstate = sd->state;
|
||||
int status = 1; /* status to return */
|
||||
|
||||
if (state == oldstate)
|
||||
return 0; /* no change */
|
||||
|
||||
if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
|
||||
return 0;
|
||||
|
||||
if (sd->driveoffset < 0) { /* not allocated space */
|
||||
sd->state = sd_down;
|
||||
if (state != sd_down)
|
||||
return -1;
|
||||
} else { /* space allocated */
|
||||
switch (state) {
|
||||
case sd_down:
|
||||
if ((!flags & setstate_force) /* but gently */
|
||||
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
|
||||
return 0; /* don't do it */
|
||||
break;
|
||||
|
||||
case sd_up:
|
||||
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
|
||||
return 0; /* not even by force */
|
||||
switch (sd->state) {
|
||||
case sd_obsolete:
|
||||
case sd_down: /* been down, no data lost */
|
||||
if ((sd->plexno) /* we're associated with a plex */
|
||||
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||||
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||||
break;
|
||||
/* XXX Get this right: make sure that other plexes in
|
||||
* the volume cover this address space, otherwise
|
||||
* we make this one sd_up */
|
||||
sd->state = sd_reborn; /* here it is again */
|
||||
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||||
status = -1;
|
||||
break;
|
||||
|
||||
case sd_init: /* brand new */
|
||||
if (flags & setstate_configuring) /* we're doing this while configuring */
|
||||
break;
|
||||
sd->state = sd_empty; /* nothing in it */
|
||||
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||||
status = -1;
|
||||
break;
|
||||
|
||||
case sd_initializing:
|
||||
break; /* go on and do it */
|
||||
|
||||
case sd_empty:
|
||||
if ((sd->plexno) /* we're associated with a plex */
|
||||
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||||
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||||
break;
|
||||
return 0; /* can't do it */
|
||||
|
||||
default: /* can't do it */
|
||||
/* There's no way to bring subdisks up directly from
|
||||
* other states. First they need to be initialized
|
||||
* or revived */
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default: /* other ones, only internal with force */
|
||||
if (flags & setstate_force == 0) /* no force? What's this? */
|
||||
return 0; /* don't do it */
|
||||
}
|
||||
}
|
||||
sd->state = state;
|
||||
printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
|
||||
if ((flags & setstate_norecurse) == 0)
|
||||
set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Called from request routines when they find
|
||||
* a subdisk which is not kosher. Decide whether
|
||||
* it warrants changing the state. Return
|
||||
* REQUEST_DOWN if we can't use the subdisk,
|
||||
* REQUEST_OK if we can. */
|
||||
enum requeststatus
|
||||
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
|
||||
{
|
||||
struct plex *plex = &PLEX[sd->plexno];
|
||||
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
|
||||
|
||||
/* first, see if the plex wants to be accessed */
|
||||
switch (plex->state) {
|
||||
case plex_reviving:
|
||||
/* When writing, we'll write anything that starts
|
||||
* up to the current revive pointer, but we'll
|
||||
* only accept a read which finishes before the
|
||||
* current revive pointer.
|
||||
*/
|
||||
if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
|
||||
||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
|
||||
if (writeop) { /* writing to a consistent down disk */
|
||||
if (DRIVE[sd->driveno].state == drive_up)
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||||
else
|
||||
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||||
}
|
||||
return REQUEST_DOWN; /* that part of the plex is still down */
|
||||
} else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
|
||||
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case plex_up:
|
||||
case plex_degraded:
|
||||
case plex_flaky:
|
||||
/* We can access the plex: let's see
|
||||
* how the subdisk feels */
|
||||
switch (sd->state) {
|
||||
case sd_up:
|
||||
return REQUEST_OK;
|
||||
|
||||
case sd_reborn:
|
||||
if (writeop)
|
||||
return REQUEST_OK; /* always write to a reborn disk */
|
||||
/* Handle the mapping. We don't want to reject
|
||||
* a read request to a reborn subdisk if that's
|
||||
* all we have. XXX */
|
||||
return REQUEST_DOWN;
|
||||
|
||||
case sd_down:
|
||||
case sd_crashed:
|
||||
if (writeop) { /* writing to a consistent down disk */
|
||||
if (DRIVE[sd->driveno].state == drive_up)
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||||
else
|
||||
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||||
}
|
||||
return REQUEST_DOWN; /* and it's down one way or another */
|
||||
|
||||
default:
|
||||
return REQUEST_DOWN;
|
||||
}
|
||||
|
||||
default:
|
||||
return REQUEST_DOWN;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
add_defective_region(struct plex *plex, off_t offset, size_t length)
|
||||
{
|
||||
/* XXX get this ordered, and coalesce regions if necessary */
|
||||
if (++plex->defective_regions > plex->defective_region_count)
|
||||
EXPAND(plex->defective_region,
|
||||
struct plexregion,
|
||||
plex->defective_region_count,
|
||||
PLEX_REGION_TABLE_SIZE);
|
||||
plex->defective_region[plex->defective_regions - 1].offset = offset;
|
||||
plex->defective_region[plex->defective_regions - 1].length = length;
|
||||
}
|
||||
|
||||
void
|
||||
add_unmapped_region(struct plex *plex, off_t offset, size_t length)
|
||||
{
|
||||
if (++plex->unmapped_regions > plex->unmapped_region_count)
|
||||
EXPAND(plex->unmapped_region,
|
||||
struct plexregion,
|
||||
plex->unmapped_region_count,
|
||||
PLEX_REGION_TABLE_SIZE);
|
||||
plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
|
||||
plex->unmapped_region[plex->unmapped_regions - 1].length = length;
|
||||
}
|
||||
|
||||
/* Rebuild a plex free list and set state if
|
||||
* we have a configuration error */
|
||||
void
|
||||
rebuild_plex_unmappedlist(struct plex *plex)
|
||||
{
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
int lastsdend = 0; /* end offset of last subdisk */
|
||||
|
||||
if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
|
||||
Free(plex->unmapped_region);
|
||||
plex->unmapped_region = NULL;
|
||||
plex->unmapped_regions = 0;
|
||||
plex->unmapped_region_count = 0;
|
||||
}
|
||||
if (plex->defective_region != NULL) {
|
||||
Free(plex->defective_region);
|
||||
plex->defective_region = NULL;
|
||||
plex->defective_regions = 0;
|
||||
plex->defective_region_count = 0;
|
||||
}
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
if (sd->plexoffset < lastsdend) { /* overlap */
|
||||
printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
|
||||
set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
|
||||
} else if (sd->plexoffset > lastsdend) /* gap */
|
||||
add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
|
||||
else if (sd->state < sd_reborn) /* this part defective */
|
||||
add_defective_region(plex, sd->plexoffset, sd->sectors);
|
||||
lastsdend = sd->plexoffset + sd->sectors;
|
||||
}
|
||||
}
|
||||
|
||||
/* return a state map for the subdisks of a plex */
|
||||
enum sdstates
|
||||
sdstatemap(struct plex *plex, int *sddowncount)
|
||||
{
|
||||
int sdno;
|
||||
enum sdstates statemap = 0; /* note the states we find */
|
||||
|
||||
*sddowncount = 0; /* no subdisks down yet */
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
|
||||
|
||||
switch (sd->state) {
|
||||
case sd_empty:
|
||||
statemap |= sd_emptystate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_init:
|
||||
statemap |= sd_initstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_down:
|
||||
statemap |= sd_downstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_crashed:
|
||||
statemap |= sd_crashedstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_obsolete:
|
||||
statemap |= sd_obsolete;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_stale:
|
||||
statemap |= sd_stalestate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_reborn:
|
||||
statemap |= sd_rebornstate;
|
||||
break;
|
||||
|
||||
case sd_up:
|
||||
statemap |= sd_upstate;
|
||||
break;
|
||||
|
||||
default:
|
||||
statemap |= sd_otherstate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return statemap;
|
||||
}
|
||||
|
||||
/* determine the state of the volume relative to this plex */
|
||||
enum volplexstate
|
||||
vpstate(struct plex *plex)
|
||||
{
|
||||
struct volume *vol;
|
||||
enum volplexstate state = volplex_onlyusdown; /* state to return */
|
||||
int plexno;
|
||||
|
||||
if (plex->volno < 0) /* not associated with a volume */
|
||||
return volplex_onlyusdown; /* assume the worst */
|
||||
|
||||
vol = &VOL[plex->volno]; /* point to our volume */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
|
||||
if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
|
||||
state |= volplex_onlyus; /* yes */
|
||||
} else {
|
||||
if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
|
||||
state |= volplex_otherup; /* and when they were up, they were up */
|
||||
else
|
||||
state |= volplex_alldown; /* and when they were down, they were down */
|
||||
}
|
||||
}
|
||||
return state; /* and when they were only halfway up */
|
||||
} /* they were neither up nor down */
|
||||
|
||||
/* Check if all bits b are set in a */
|
||||
int allset(int a, int b);
|
||||
|
||||
int
|
||||
allset(int a, int b)
|
||||
{
|
||||
return (a & b) == b;
|
||||
}
|
||||
|
||||
/* Update the state of a plex dependent on its subdisks.
|
||||
* Also rebuild the unmapped_region and defective_region table */
|
||||
int
|
||||
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
|
||||
{
|
||||
int sddowncount = 0; /* number of down subdisks */
|
||||
struct plex *plex = &PLEX[plexno]; /* point to our plex */
|
||||
enum plexstate oldstate = plex->state;
|
||||
enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
|
||||
enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
|
||||
|
||||
if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
|
||||
return 0; /* no change */
|
||||
|
||||
if (plex->state == plex_unallocated) /* no plex to do anything with, */
|
||||
return 0;
|
||||
|
||||
switch (state) {
|
||||
case plex_up:
|
||||
if ((plex->state == plex_initializing) /* we're initializing */
|
||||
&&(statemap != sd_upstate)) /* but SDs aren't up yet */
|
||||
return 0; /* do nothing */
|
||||
|
||||
/* We don't really care what our state was before
|
||||
* if we want to come up. We rely entirely on the
|
||||
* state of our subdisks and our volume */
|
||||
switch (vps) {
|
||||
case volplex_onlyusdown:
|
||||
case volplex_alldown: /* another plex is down, and so are we */
|
||||
if (statemap == sd_upstate) { /* all subdisks ready for action */
|
||||
if ((plex->state == plex_init) /* we're brand spanking new */
|
||||
&&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
|
||||
/* Conceptually, an empty plex does not contain valid data,
|
||||
* but normally we'll see this state when we have just
|
||||
* created a plex, and it's either consistent from earlier,
|
||||
* or we don't care about the previous contents (we're going
|
||||
* to create a file system or use it for swap).
|
||||
*
|
||||
* We need to do this in one swell foop: on the next call
|
||||
* we will no longer be just empty.
|
||||
*
|
||||
* We'll still come back to this function for the remaining
|
||||
* plexes in the volume. They'll be up already, so that
|
||||
* doesn't change anything, but it's not worth the additional
|
||||
* code to stop doing it. */
|
||||
struct volume *vol = &VOL[plex->volno];
|
||||
int plexno;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++)
|
||||
PLEX[vol->plex[plexno]].state = plex_up;
|
||||
}
|
||||
plex->state = plex_up; /* bring up up, anyway */
|
||||
} else
|
||||
plex->state = plex_down;
|
||||
break;
|
||||
|
||||
case volplex_onlyusup: /* only we are up: others are down */
|
||||
case volplex_onlyus: /* we're up and alone */
|
||||
if ((statemap == sd_upstate) /* subdisks all up */
|
||||
||(statemap == sd_emptystate)) /* or all empty */
|
||||
plex->state = plex_up; /* go for it */
|
||||
else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||||
plex->state = plex_flaky;
|
||||
else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
|
||||
plex->state = plex_degraded; /* so far no corruption */
|
||||
else
|
||||
plex->state = plex_faulty;
|
||||
break;
|
||||
|
||||
case volplex_otherup: /* another plex is up */
|
||||
case volplex_otherupdown: /* other plexes are up and down */
|
||||
if ((statemap == sd_upstate) /* subdisks all up */
|
||||
||(statemap == sd_emptystate) /* or all empty */
|
||||
) {
|
||||
/* Is the data in all subdisks valid? */
|
||||
if (statemap == statemap & (sd_downstate | sd_rebornstate | sd_upstate))
|
||||
break; /* yes, we can bring the plex up */
|
||||
plex->state = plex_reviving; /* we need reviving */
|
||||
return EAGAIN;
|
||||
} else
|
||||
plex->state = plex_faulty; /* still in error */
|
||||
break;
|
||||
|
||||
case volplex_allup: /* all plexes are up */
|
||||
case volplex_someup:
|
||||
if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||||
break; /* no change */
|
||||
else
|
||||
plex->state = plex_degraded; /* we're not all there */
|
||||
}
|
||||
|
||||
if (plex->state != oldstate)
|
||||
break;
|
||||
return 0; /* no change */
|
||||
|
||||
case plex_down: /* want to take it down */
|
||||
if (((vps == volplex_onlyus) /* we're the only one up */
|
||||
||(vps == volplex_onlyusup)) /* we're the only one up */
|
||||
&&(!(flags & setstate_force))) /* and we don't want to use force */
|
||||
return 0; /* can't do it */
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
/* This is only requested by the driver.
|
||||
* Trust ourselves */
|
||||
case plex_faulty:
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
case plex_initializing:
|
||||
/* XXX consider what safeguards we need here */
|
||||
if ((flags & setstate_force) == 0)
|
||||
return 0;
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
/* What's this? */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
|
||||
/* Now see what we have left, and whether
|
||||
* we're taking the volume down */
|
||||
if (plex->volno >= 0) { /* we have a volume */
|
||||
struct volume *vol = &VOL[plex->volno];
|
||||
|
||||
vps = vpstate(plex); /* get our combined state again */
|
||||
if ((flags & setstate_norecurse) == 0) { /* we can recurse */
|
||||
if ((vol->state == volume_up)
|
||||
&& (vps == volplex_alldown)) /* and we're all down */
|
||||
set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
|
||||
else if ((vol->state == volume_down)
|
||||
&& (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
|
||||
set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
|
||||
}
|
||||
}
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Update the state of a plex dependent on its plexes.
|
||||
* Also rebuild the unmapped_region and defective_region table */
|
||||
int
|
||||
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
|
||||
{
|
||||
int plexno;
|
||||
enum plexstates {
|
||||
plex_downstate = 1, /* found a plex which is down */
|
||||
plex_degradedstate = 2, /* found a plex which is halfway up */
|
||||
plex_upstate = 4 /* found a plex which is completely up */
|
||||
};
|
||||
|
||||
int plexstatemap = 0; /* note the states we find */
|
||||
struct volume *vol = &VOL[volno]; /* point to our volume */
|
||||
|
||||
if (vol->state == state) /* we're there already */
|
||||
return 0; /* no change */
|
||||
if (vol->state == volume_unallocated) /* no volume to do anything with, */
|
||||
return 0;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
|
||||
switch (plex->state) {
|
||||
case plex_degraded:
|
||||
case plex_flaky:
|
||||
case plex_reviving:
|
||||
plexstatemap |= plex_degradedstate;
|
||||
break;
|
||||
|
||||
case plex_up:
|
||||
plexstatemap |= plex_upstate;
|
||||
break;
|
||||
|
||||
default:
|
||||
plexstatemap |= plex_downstate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == volume_up) { /* want to come up */
|
||||
if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
|
||||
vol->state = volume_up; /* did it */
|
||||
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
/* Here we should check whether we have enough
|
||||
* coverage for the complete volume. Writeme XXX */
|
||||
} else if (state == volume_down) { /* want to go down */
|
||||
if ((vol->opencount == 0) /* not open */
|
||||
||(flags & setstate_force != 0)) { /* or we're forcing */
|
||||
vol->state = volume_down;
|
||||
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0; /* no change */
|
||||
}
|
||||
|
||||
/* Start an object, in other words do what we can to get it up.
|
||||
* This is called from vinumioctl (VINUMSTART).
|
||||
* Return error indications via ioctl_reply
|
||||
*/
|
||||
void
|
||||
start_object(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int status;
|
||||
int realstatus; /* what we really have */
|
||||
int objindex = data->index; /* data gets overwritten */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||||
|
||||
switch (data->type) {
|
||||
case drive_object:
|
||||
status = set_drive_state(objindex, drive_up, setstate_none);
|
||||
realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case sd_object:
|
||||
status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
|
||||
realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
if (PLEX[objindex].state == plex_reviving) { /* reviving, */
|
||||
ioctl_reply->error = revive_block(objindex); /* revive another block */
|
||||
ioctl_reply->msg[0] = '\0'; /* no comment */
|
||||
return;
|
||||
}
|
||||
status = set_plex_state(objindex, plex_up, setstate_none);
|
||||
realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
status = set_volume_state(objindex, volume_up, setstate_none);
|
||||
realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
default:
|
||||
ioctl_reply->error = EINVAL;
|
||||
strcpy(ioctl_reply->msg, "Invalid object type");
|
||||
return;
|
||||
}
|
||||
/* There's no point in saying anything here:
|
||||
* the userland program does it better */
|
||||
ioctl_reply->msg[0] = '\0';
|
||||
if (realstatus == 0) /* couldn't do it */
|
||||
ioctl_reply->error = EINVAL;
|
||||
else
|
||||
ioctl_reply->error = 0;
|
||||
}
|
||||
|
||||
/* Stop an object, in other words do what we can to get it down
|
||||
* This is called from vinumioctl (VINUMSTOP).
|
||||
* Return error indications via ioctl_reply.
|
||||
*/
|
||||
void
|
||||
stop_object(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int status = 1;
|
||||
int objindex = data->index; /* save the number from change */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||||
|
||||
switch (data->type) {
|
||||
case drive_object:
|
||||
status = set_drive_state(objindex, drive_down, data->force);
|
||||
break;
|
||||
|
||||
case sd_object:
|
||||
status = set_sd_state(objindex, sd_down, data->force);
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
status = set_plex_state(objindex, plex_down, data->force);
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
status = set_volume_state(objindex, volume_down, data->force);
|
||||
break;
|
||||
|
||||
default:
|
||||
ioctl_reply->error = EINVAL;
|
||||
strcpy(ioctl_reply->msg, "Invalid object type");
|
||||
return;
|
||||
}
|
||||
ioctl_reply->msg[0] = '\0';
|
||||
if (status == 0) /* couldn't do it */
|
||||
ioctl_reply->error = EINVAL;
|
||||
else
|
||||
ioctl_reply->error = 0;
|
||||
}
|
||||
|
||||
/* VINUM_SETSTATE ioctl: set an object state
|
||||
* msg is the message passed by the user */
|
||||
void
|
||||
setstate(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
|
||||
|
||||
switch (msg->state) {
|
||||
case object_down:
|
||||
stop_object(msg);
|
||||
break;
|
||||
|
||||
case object_initializing:
|
||||
switch (msg->type) {
|
||||
case sd_object:
|
||||
sd = &SD[msg->index];
|
||||
if ((msg->index >= vinum_conf.subdisks_used)
|
||||
|| (sd->state == sd_unallocated)) {
|
||||
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||||
ioctl_reply->error = EFAULT;
|
||||
return;
|
||||
}
|
||||
set_sd_state(msg->index, sd_initializing, msg->force);
|
||||
if (sd->state != sd_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
} else
|
||||
ioctl_reply->error = 0;
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
plex = &PLEX[msg->index];
|
||||
if ((msg->index >= vinum_conf.plexes_used)
|
||||
|| (plex->state == plex_unallocated)) {
|
||||
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||||
ioctl_reply->error = EFAULT;
|
||||
return;
|
||||
}
|
||||
set_plex_state(msg->index, plex_initializing, msg->force);
|
||||
if (plex->state != plex_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
} else {
|
||||
ioctl_reply->error = 0;
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
|
||||
if (sd->state != sd_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
strcpy(ioctl_reply->msg, "Invalid object");
|
||||
ioctl_reply->error = EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
case object_up:
|
||||
start_object(msg);
|
||||
}
|
||||
}
|
88
lkm/vinum/statetexts.h
Normal file
88
lkm/vinum/statetexts.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
|
||||
*/
|
||||
/* Created by ./makestatetext on Tue 4 Aug 15:53:16 CST 1998. Do not edit */
|
||||
|
||||
/* Drive state texts */
|
||||
char *drivestatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"down",
|
||||
"coming_up",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"init",
|
||||
"initializing",
|
||||
"empty",
|
||||
"obsolete",
|
||||
"stale",
|
||||
"crashed",
|
||||
"down",
|
||||
"reborn",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"init",
|
||||
"faulty",
|
||||
"down",
|
||||
"reviving",
|
||||
"initializing",
|
||||
"corrupt",
|
||||
"degraded",
|
||||
"flaky",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"down",
|
||||
"up",
|
||||
};
|
211
lkm/vinum/util.c
Normal file
211
lkm/vinum/util.c
Normal file
@ -0,0 +1,211 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: util.c,v 1.7 1998/08/07 09:23:10 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file contains utility routines used both in kernel and user context */
|
||||
|
||||
#include "vinumhdr.h"
|
||||
#include "statetexts.h"
|
||||
#ifndef REALLYKERNEL
|
||||
#include <stdio.h>
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
#endif
|
||||
|
||||
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
|
||||
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
|
||||
/* Return drive state as a string */
|
||||
char *
|
||||
drive_state(enum drivestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(drive)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return drivestatetext[state];
|
||||
}
|
||||
|
||||
/* Return volume state as a string */
|
||||
char *
|
||||
volume_state(enum volumestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(vol)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return volstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex state as a string */
|
||||
char *
|
||||
plex_state(enum plexstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(plex)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return plexstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex organization as a string */
|
||||
char *
|
||||
plex_org(enum plexorg org)
|
||||
{
|
||||
switch (org) {
|
||||
case plex_disorg: /* disorganized */
|
||||
return "disorg";
|
||||
break;
|
||||
|
||||
case plex_concat: /* concatenated plex */
|
||||
return "concat";
|
||||
break;
|
||||
|
||||
case plex_striped: /* striped plex */
|
||||
return "striped";
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
sprintf(numeric_state, "Invalid org %d", (int) org);
|
||||
return numeric_state;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return sd state as a string */
|
||||
char *
|
||||
sd_state(enum sdstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(sd)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return sdstatetext[state];
|
||||
}
|
||||
|
||||
/* Now convert in the other direction */
|
||||
/* These are currently used only internally,
|
||||
* so we don't do too much error checking */
|
||||
enum drivestate
|
||||
DriveState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(drive); i++)
|
||||
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
|
||||
return (enum drivestate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum sdstate
|
||||
SdState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(sd); i++)
|
||||
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
|
||||
return (enum sdstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum plexstate
|
||||
PlexState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(plex); i++)
|
||||
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
|
||||
return (enum plexstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum volumestate
|
||||
VolState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(vol); i++)
|
||||
if (strcmp(text, volstatetext[i]) == 0) /* found it */
|
||||
return (enum volstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Take a number with an optional scale factor and convert
|
||||
* it to a number of bytes.
|
||||
*
|
||||
* The scale factors are:
|
||||
*
|
||||
* b blocks (of 512 bytes)
|
||||
* k kilobytes (1024 bytes)
|
||||
* m megabytes (of 1024 * 1024 bytes)
|
||||
* g gigabytes (of 1024 * 1024 * 1024 bytes)
|
||||
*/
|
||||
u_int64_t
|
||||
sizespec(char *spec)
|
||||
{
|
||||
u_int64_t size;
|
||||
char *s;
|
||||
|
||||
size = 0;
|
||||
s = spec;
|
||||
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
|
||||
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
|
||||
size = size * 10 + *s++ - '0'; /* convert it */
|
||||
switch (*s) {
|
||||
case '\0':
|
||||
return size;
|
||||
|
||||
case 'B':
|
||||
case 'b':
|
||||
return size * 512;
|
||||
|
||||
case 'K':
|
||||
case 'k':
|
||||
return size * 1024;
|
||||
|
||||
case 'M':
|
||||
case 'm':
|
||||
return size * 1024 * 1024;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return size * 1024 * 1024 * 1024;
|
||||
}
|
||||
}
|
||||
#ifdef REALLYKERNEL
|
||||
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
|
||||
#else
|
||||
fprintf(stderr, "Invalid length specification: %s", spec);
|
||||
longjmp(command_fail, -1);
|
||||
#endif
|
||||
/* NOTREACHED */
|
||||
return -1;
|
||||
}
|
512
lkm/vinum/vinum.c
Normal file
512
lkm/vinum/vinum.c
Normal file
@ -0,0 +1,512 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinum.c,v 1.19 1998/08/13 05:24:02 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "sys/sysproto.h" /* for sync(2) */
|
||||
#ifdef DEBUG
|
||||
#include <sys/reboot.h>
|
||||
int debug = 0;
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
struct proc *myproc;
|
||||
|
||||
#if __FreeBSD__ < 3
|
||||
STATIC struct cdevsw vinum_cdevsw;
|
||||
STATIC struct bdevsw vinum_bdevsw =
|
||||
{
|
||||
vinumopen, vinumclose, vinumstrategy, vinumioctl,
|
||||
vinumdump, vinumsize, 0,
|
||||
"vinum", &vinum_cdevsw, -1
|
||||
};
|
||||
#else /* goodbye, bdevsw */
|
||||
STATIC struct cdevsw vinum_cdevsw =
|
||||
{
|
||||
vinumopen, vinumclose, vinumread, vinumwrite,
|
||||
vinumioctl, nostop, nullreset, nodevtotty,
|
||||
seltrue, nommap, vinumstrategy, "vinum",
|
||||
NULL, -1, vinumdump, vinumsize,
|
||||
D_DISK, 0, -1
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Called by main() during pseudo-device attachment. */
|
||||
STATIC void vinumattach(void *);
|
||||
|
||||
STATIC void vinumgetdisklabel(dev_t);
|
||||
void vinum_scandisk(void);
|
||||
int vinum_inactive(void);
|
||||
void free_vinum(int);
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
extern jmp_buf command_fail; /* return here if config fails */
|
||||
|
||||
struct _vinum_conf vinum_conf; /* configuration information */
|
||||
|
||||
STATIC int vinum_devsw_installed = 0;
|
||||
|
||||
/*
|
||||
* Called by main() during pseudo-device attachment. All we need
|
||||
* to do is allocate enough space for devices to be configured later, and
|
||||
* add devsw entries.
|
||||
*/
|
||||
void
|
||||
vinumattach(void *dummy)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
char *buf; /* pointer to temporary buffer */
|
||||
struct _ioctl_reply *ioctl_reply; /* struct to return */
|
||||
struct uio uio;
|
||||
struct iovec iovec;
|
||||
|
||||
/* modload should prevent multiple loads, so this is worth a panic */
|
||||
if ((vinum_conf.flags & VF_LOADED) != NULL)
|
||||
panic("vinum: already loaded");
|
||||
|
||||
printf("vinum: loaded\n");
|
||||
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
|
||||
|
||||
/* We don't have a p pointer here, so take it from curproc */
|
||||
myproc = curproc;
|
||||
#if __FreeBSD__ < 3
|
||||
bdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_bdevsw);
|
||||
#else
|
||||
cdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_cdevsw);
|
||||
#endif
|
||||
#ifdef DEVFS
|
||||
#error DEVFS not finished yet
|
||||
#endif
|
||||
|
||||
uio.uio_iov = &iovec;
|
||||
uio.uio_iovcnt = 1; /* just one buffer */
|
||||
uio.uio_offset = 0; /* start at the beginning */
|
||||
uio.uio_resid = 512; /* one sector */
|
||||
uio.uio_segflg = UIO_SYSSPACE; /* we're in system space */
|
||||
uio.uio_rw = UIO_READ; /* do we need this? */
|
||||
uio.uio_procp = curproc; /* do it for our own process */
|
||||
|
||||
iovec.iov_len = 512;
|
||||
buf = (char *) Malloc(iovec.iov_len); /* get a buffer */
|
||||
CHECKALLOC(buf, "vinum: no memory\n"); /* can't get 512 bytes? */
|
||||
iovec.iov_base = buf; /* read into buf */
|
||||
|
||||
/* allocate space: drives... */
|
||||
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
|
||||
CHECKALLOC(DRIVE, "vinum: no memory\n");
|
||||
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
|
||||
vinum_conf.drives_used = 0; /* and number in use */
|
||||
|
||||
/* volumes, ... */
|
||||
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
|
||||
CHECKALLOC(VOL, "vinum: no memory\n");
|
||||
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
|
||||
vinum_conf.volumes_used = 0; /* and number in use */
|
||||
|
||||
/* plexes, ... */
|
||||
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
|
||||
CHECKALLOC(PLEX, "vinum: no memory\n");
|
||||
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
|
||||
vinum_conf.plexes_used = 0; /* and number in use */
|
||||
|
||||
/* and subdisks */
|
||||
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
|
||||
CHECKALLOC(SD, "vinum: no memory\n");
|
||||
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
|
||||
vinum_conf.subdisks_used = 0; /* and number in use */
|
||||
|
||||
ioctl_reply = NULL; /* no reply on longjmp */
|
||||
}
|
||||
|
||||
|
||||
#ifdef ACTUALLY_LKM_NOT_KERNEL /* stuff for LKMs */
|
||||
|
||||
/* Check if we have anything open. If so, return 0 (not inactive),
|
||||
* otherwise 1 (inactive) */
|
||||
int
|
||||
vinum_inactive(void)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
int can_do = 1; /* assume we can do it */
|
||||
|
||||
lock_config();
|
||||
for (i = 0; i < vinum_conf.volumes_used; i++) {
|
||||
if (VOL[i].pid != NULL) { /* volume is open */
|
||||
can_do = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unlock_config();
|
||||
return can_do;
|
||||
}
|
||||
|
||||
/* Free all structures.
|
||||
* If cleardrive is 0, save the configuration; otherwise
|
||||
* remove the configuration from the drive.
|
||||
*
|
||||
* Before coming here, ensure that no volumes are open.
|
||||
*/
|
||||
void
|
||||
free_vinum(int cleardrive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
|
||||
if (cleardrive) {
|
||||
for (i = 0; i < vinum_conf.drives_used; i++)
|
||||
remove_drive(i); /* remove the drive */
|
||||
} else { /* keep the config */
|
||||
save_config();
|
||||
if (DRIVE != NULL) {
|
||||
for (i = 0; i < vinum_conf.drives_used; i++)
|
||||
free_drive(&DRIVE[i]); /* close files and things */
|
||||
Free(DRIVE);
|
||||
}
|
||||
}
|
||||
if (SD != NULL)
|
||||
Free(SD);
|
||||
if (PLEX != NULL) {
|
||||
for (i = 0; i < vinum_conf.plexes_used; i++) {
|
||||
struct plex *plex = &vinum_conf.plex[i];
|
||||
|
||||
if (plex->state != plex_unallocated) { /* we have real data there */
|
||||
if (plex->sdnos)
|
||||
Free(plex->sdnos);
|
||||
if (plex->unmapped_regions)
|
||||
Free(plex->unmapped_region);
|
||||
if (plex->defective_regions)
|
||||
Free(plex->defective_region);
|
||||
}
|
||||
}
|
||||
Free(PLEX);
|
||||
}
|
||||
if (VOL != NULL)
|
||||
Free(VOL);
|
||||
bzero(&vinum_conf, sizeof(vinum_conf));
|
||||
}
|
||||
|
||||
MOD_MISC(vinum);
|
||||
|
||||
/*
|
||||
* Function called when loading the driver.
|
||||
*/
|
||||
STATIC int
|
||||
vinum_load(struct lkm_table *lkmtp, int cmd)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
/* Debugger ("vinum_load"); */
|
||||
vinumattach(NULL);
|
||||
return 0; /* OK */
|
||||
}
|
||||
|
||||
/*
|
||||
* Function called when unloading the driver.
|
||||
*/
|
||||
STATIC int
|
||||
vinum_unload(struct lkm_table *lkmtp, int cmd)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
if (vinum_inactive()) { /* is anything open? */
|
||||
struct sync_args dummyarg =
|
||||
{0};
|
||||
#if __FreeBSD__ < 3
|
||||
int retval;
|
||||
#endif
|
||||
|
||||
printf("vinum: unloaded\n");
|
||||
#if __FreeBSD__ < 3
|
||||
sync(curproc, &dummyarg, &retval); /* write out buffers */
|
||||
#else
|
||||
sync(curproc, &dummyarg); /* write out buffers */
|
||||
#endif
|
||||
free_vinum(0); /* no: clean up */
|
||||
#if __FreeBSD__ < 3
|
||||
bdevsw[BDEV_MAJOR] = NULL; /* clear bdevsw */
|
||||
#endif
|
||||
cdevsw[CDEV_MAJOR] = NULL; /* and cdevsw */
|
||||
return 0;
|
||||
} else
|
||||
return EBUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatcher function for the module (load/unload/stat).
|
||||
*/
|
||||
int
|
||||
vinum_mod(struct lkm_table *lkmtp, int cmd, int ver)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
MOD_DISPATCH(vinum, /* module name */
|
||||
lkmtp, /* LKM table */
|
||||
cmd, /* command */
|
||||
ver,
|
||||
vinum_load, /* load with this function */
|
||||
vinum_unload, /* and unload with this */
|
||||
lkm_nullcmd);
|
||||
}
|
||||
|
||||
#else /* not LKM */
|
||||
#error "This driver must be compiled as a loadable kernel module"
|
||||
#endif /* LKM */
|
||||
|
||||
/* ARGSUSED */
|
||||
/* Open a vinum object
|
||||
* At the moment, we only open volumes and the
|
||||
* super device. It's a nice concept to be
|
||||
* able to open drives, subdisks and plexes, but
|
||||
* I can't think what good it could be */
|
||||
int
|
||||
vinumopen(dev_t dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int s; /* spl */
|
||||
int error;
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
struct sd *sd;
|
||||
struct devcode *device;
|
||||
|
||||
device = (struct devcode *) &dev;
|
||||
|
||||
error = 0;
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
index = VOLNO(dev);
|
||||
if (index >= vinum_conf.volumes_used)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
s = splhigh(); /* quick lock */
|
||||
if (error)
|
||||
return error;
|
||||
if (vol->opencount == 0)
|
||||
vol->openflags = flags; /* set our flags */
|
||||
vol->opencount++;
|
||||
vol->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
if (VOLNO(dev) >= vinum_conf.volumes_used)
|
||||
return ENXIO;
|
||||
index = PLEXNO(dev); /* get plex index in vinum_conf */
|
||||
if (index >= vinum_conf.plexes_used)
|
||||
return ENXIO; /* no such device */
|
||||
plex = &PLEX[index];
|
||||
|
||||
switch (plex->state) {
|
||||
case plex_unallocated:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
s = splhigh();
|
||||
if (plex->pid /* it's open already */
|
||||
&& (plex->pid != p->p_pid)) { /* and not by us, */
|
||||
splx(s);
|
||||
return EBUSY; /* one at a time, please */
|
||||
}
|
||||
plex->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
|
||||
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
|
||||
return ENXIO; /* no such device */
|
||||
index = SDNO(dev); /* get the subdisk number */
|
||||
if (index >= vinum_conf.subdisks_used)
|
||||
return ENXIO; /* no such device */
|
||||
sd = &SD[index];
|
||||
|
||||
/* Opening a subdisk is always a special operation, so we
|
||||
* ignore the state as long as it represents a real subdisk */
|
||||
switch (sd->state) {
|
||||
case sd_unallocated:
|
||||
case sd_uninit:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
s = splhigh();
|
||||
if (sd->pid /* it's open already */
|
||||
&& (sd->pid != p->p_pid)) { /* and not by us, */
|
||||
splx(s);
|
||||
return EBUSY; /* one at a time, please */
|
||||
}
|
||||
sd->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
return ENODEV; /* don't know what to do with these */
|
||||
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
if (p->p_ucred->cr_uid == 0) { /* root calling, */
|
||||
vinum_conf.opencount++; /* one more opener */
|
||||
return 0; /* no worries opening super dev */
|
||||
} else
|
||||
return EPERM; /* you can't do that! */
|
||||
}
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
int
|
||||
vinumclose(dev_t dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
struct sd *sd;
|
||||
struct devcode *device = (struct devcode *) &dev;
|
||||
|
||||
index = VOLNO(dev);
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
if (index >= vinum_conf.volumes_used)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
vol->opencount = 0; /* reset our flags */
|
||||
vol->pid = NULL; /* and forget who owned us */
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
if (VOLNO(dev) >= vinum_conf.volumes_used)
|
||||
return ENXIO;
|
||||
index = PLEXNO(dev); /* get plex index in vinum_conf */
|
||||
if (index >= vinum_conf.plexes_used)
|
||||
return ENXIO; /* no such device */
|
||||
plex = &PLEX[index];
|
||||
plex->pid = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
|
||||
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
|
||||
return ENXIO; /* no such device */
|
||||
index = SDNO(dev); /* get the subdisk number */
|
||||
if (index >= vinum_conf.subdisks_used)
|
||||
return ENXIO; /* no such device */
|
||||
sd = &SD[index];
|
||||
sd->pid = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
if (p->p_ucred->cr_uid == 0) /* root calling, */
|
||||
vinum_conf.opencount--; /* one less opener */
|
||||
return 0; /* no worries closing super dev */
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
return ENODEV; /* don't know what to do with these */
|
||||
}
|
||||
}
|
||||
|
||||
/* size routine */
|
||||
int
|
||||
vinumsize(dev_t dev)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct volume *vol;
|
||||
int size;
|
||||
|
||||
/* XXX This is bogus. We don't need to open
|
||||
* a device to find its size */
|
||||
vol = &VOL[VOLNO(dev)];
|
||||
|
||||
if (vol->state == volume_up)
|
||||
size = vol->size;
|
||||
else
|
||||
return 0; /* err on the size of conservatism */
|
||||
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int
|
||||
vinumdump(dev_t dev)
|
||||
{
|
||||
/* Not implemented. */
|
||||
return ENXIO;
|
||||
}
|
214
lkm/vinum/vinumext.h
Normal file
214
lkm/vinum/vinumext.h
Normal file
@ -0,0 +1,214 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumext.h,v 1.14 1998/08/11 00:03:57 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* vinumext.h: external definitions */
|
||||
|
||||
extern struct _vinum_conf vinum_conf; /* configuration information */
|
||||
|
||||
#ifdef DEBUG
|
||||
extern debug; /* debug flags */
|
||||
#endif
|
||||
|
||||
#define CHECKALLOC(ptr, msg) \
|
||||
if (ptr == NULL) \
|
||||
{ \
|
||||
printf (msg); \
|
||||
longjmp (command_fail, -1); \
|
||||
}
|
||||
#ifndef KERNEL
|
||||
struct vnode;
|
||||
struct proc;
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL
|
||||
int give_sd_to_plex(int plexno, int sdno);
|
||||
int give_plex_to_volume(int volno, int plexno);
|
||||
int check_drive(char *);
|
||||
enum drive_label_info read_drive_label(struct drive *drive);
|
||||
int parse_config(char *, struct keywordset *);
|
||||
int parse_user_config(char *cptr, struct keywordset *keyset);
|
||||
u_int64_t sizespec(char *spec);
|
||||
int volume_index(struct volume *volume);
|
||||
int plex_index(struct plex *plex);
|
||||
int sd_index(struct sd *sd);
|
||||
int drive_index(struct drive *drive);
|
||||
int my_plex(int volno, int plexno);
|
||||
int my_sd(int plexno, int sdno);
|
||||
int get_empty_drive(void);
|
||||
int find_drive(const char *name, int create);
|
||||
int find_drive_by_dev(const char *devname, int create);
|
||||
int get_empty_sd(void);
|
||||
int find_subdisk(const char *name, int create);
|
||||
void free_sd(int sdno);
|
||||
void free_volume(int volno);
|
||||
int get_empty_plex(void);
|
||||
int find_plex(const char *name, int create);
|
||||
void free_plex(int plexno);
|
||||
int get_empty_volume(void);
|
||||
int find_volume(const char *name, int create);
|
||||
void config_subdisk(void);
|
||||
void config_plex(void);
|
||||
void config_volume(void);
|
||||
void config_drive(void);
|
||||
void updateconfig(int);
|
||||
void update_sd_config(int sdno, int kernelstate);
|
||||
void update_plex_config(int plexno, int kernelstate);
|
||||
void update_volume_config(int volno, int kernelstate);
|
||||
void update_config(void);
|
||||
void drive_io_done(struct buf *);
|
||||
int save_config(void);
|
||||
void write_config(char *, int);
|
||||
int start_config(void);
|
||||
void finish_config(int);
|
||||
void remove(struct vinum_ioctl_msg *msg);
|
||||
void remove_drive_entry(int driveno, int force, int recurse);
|
||||
void remove_sd_entry(int sdno, int force, int recurse);
|
||||
void remove_plex_entry(int plexno, int force, int recurse);
|
||||
void remove_volume_entry(int volno, int force, int recurse);
|
||||
|
||||
void checkernel(char *);
|
||||
int open_drive(struct drive *, struct proc *);
|
||||
void close_drive(struct drive *drive);
|
||||
int driveio(struct drive *, void *, size_t, off_t, int);
|
||||
/* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
|
||||
#define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) */
|
||||
int set_drive_parms(struct drive *drive);
|
||||
int init_drive(struct drive *);
|
||||
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
|
||||
void throw_rude_remark(int, char *,...);
|
||||
|
||||
int read_drive(struct drive *drive, void *buf, size_t length, off_t offset);
|
||||
int write_drive(struct drive *drive, void *buf, size_t length, off_t offset);
|
||||
void format_config(char *config, int len);
|
||||
void checkkernel(char *op);
|
||||
void free_drive(struct drive *drive);
|
||||
void down_drive(struct drive *drive);
|
||||
void remove_drive(int driveno);
|
||||
|
||||
/* I/O */
|
||||
d_open_t vinumopen;
|
||||
d_close_t vinumclose;
|
||||
d_strategy_t vinumstrategy;
|
||||
d_ioctl_t vinumioctl;
|
||||
d_dump_t vinumdump;
|
||||
d_psize_t vinumsize;
|
||||
d_read_t vinumread;
|
||||
d_write_t vinumwrite;
|
||||
|
||||
int vinumstart(struct buf *bp, int reviveok);
|
||||
int launch_requests(struct request *rq, int reviveok);
|
||||
|
||||
/* XXX Do we need this? */
|
||||
int vinumpart(dev_t);
|
||||
|
||||
/* Memory allocation */
|
||||
void vinum_meminfo(caddr_t data);
|
||||
int vinum_mallocinfo(caddr_t data);
|
||||
|
||||
void expand_table(void **, int, int);
|
||||
|
||||
void add_defective_region(struct plex *plex, off_t offset, size_t length);
|
||||
void add_unmapped_region(struct plex *plex, off_t offset, size_t length);
|
||||
void rebuild_plex_unmappedlist(struct plex *plex);
|
||||
struct request;
|
||||
struct rqgroup *allocrqg(struct request *rq, int elements);
|
||||
void deallocrqg(struct rqgroup *rqg);
|
||||
|
||||
/* State transitions */
|
||||
int set_drive_state(int driveno, enum drivestate state, int force);
|
||||
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
|
||||
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
|
||||
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
|
||||
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
|
||||
void get_volume_label(struct volume *vol, struct disklabel *lp);
|
||||
int write_volume_label(int);
|
||||
void start_object(struct vinum_ioctl_msg *);
|
||||
void stop_object(struct vinum_ioctl_msg *);
|
||||
void setstate(struct vinum_ioctl_msg *msg);
|
||||
void vinum_label(int);
|
||||
int vinum_writedisklabel(struct volume *, struct disklabel *);
|
||||
int initsd(int);
|
||||
|
||||
int restart_plex(int plexno);
|
||||
int revive_block(int plexno);
|
||||
|
||||
/* Auxiliary functions */
|
||||
enum sdstates sdstatemap(struct plex *plex, int *sddowncount);
|
||||
enum volplexstate vpstate(struct plex *plex);
|
||||
#endif
|
||||
|
||||
enum keyword get_keyword(char *, struct keywordset *);
|
||||
void listconfig(void);
|
||||
char *drive_state(enum drivestate);
|
||||
char *volume_state(enum volumestate);
|
||||
char *plex_state(enum plexstate);
|
||||
char *plex_org(enum plexorg);
|
||||
char *sd_state(enum sdstate);
|
||||
enum drivestate DriveState(char *text);
|
||||
enum sdstate SdState(char *text);
|
||||
enum plexstate PlexState(char *text);
|
||||
enum volumestate VolState(char *text);
|
||||
struct drive *validdrive(int driveno, struct _ioctl_reply *);
|
||||
struct sd *validsd(int sdno, struct _ioctl_reply *);
|
||||
struct plex *validplex(int plexno, struct _ioctl_reply *);
|
||||
struct volume *validvol(int volno, struct _ioctl_reply *);
|
||||
int tokenize(char *, char *[]);
|
||||
void resetstats(struct vinum_ioctl_msg *msg);
|
||||
|
||||
/* Locking */
|
||||
int lockvol(struct volume *vol);
|
||||
void unlockvol(struct volume *vol);
|
||||
int lockplex(struct plex *plex);
|
||||
void unlockplex(struct plex *plex);
|
||||
int lockrange(struct plex *plex, off_t first, off_t last);
|
||||
void unlockrange(struct plex *plex, off_t first, off_t last);
|
||||
int lock_config(void);
|
||||
void unlock_config(void);
|
||||
|
||||
#ifdef DEBUG
|
||||
#define expandrq(prq) \
|
||||
{ \
|
||||
expand_table ((void **) &prq->rqe, \
|
||||
prq->requests * sizeof (struct rqelement), \
|
||||
(prq->requests + RQELTS) * sizeof (struct rqelement) ); \
|
||||
bzero (&prq->rqe [prq->requests], RQELTS * sizeof (struct rqelement)); \
|
||||
prq->rqcount += RQELTS; \
|
||||
}
|
||||
#else
|
||||
void expandrq(struct plexrq *);
|
||||
#endif
|
104
lkm/vinum/vinumhdr.h
Normal file
104
lkm/vinum/vinumhdr.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Header files used by all modules */
|
||||
/* $Id: vinumhdr.h,v 1.7 1998/08/07 04:41:18 grog Exp grog $ */
|
||||
|
||||
#ifdef KERNEL
|
||||
#define REALLYKERNEL
|
||||
#endif
|
||||
#include <sys/param.h>
|
||||
#ifdef REALLYKERNEL
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#endif
|
||||
#ifdef DEVFS
|
||||
#error "DEVFS code not complete yet"
|
||||
#include <sys/devfsext.h>
|
||||
#endif /*DEVFS */
|
||||
#include <sys/proc.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/dkstat.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <ufs/ffs/fs.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/device.h>
|
||||
#undef KERNEL /* XXX */
|
||||
#include <sys/disk.h>
|
||||
#ifdef REALLYKERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/dkbad.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdarg.h>
|
||||
#include <vm/vm.h>
|
||||
#ifdef USES_VM
|
||||
/* XXX Do we need this? */
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_prot.h>
|
||||
/* #include <vm/vm_page.h> */
|
||||
#include <sys/vmmeter.h>
|
||||
/* #include <machine/pmap.h> */
|
||||
#include <machine/cputypes.h>
|
||||
#endif /* USES_VM */
|
||||
#include <vinumvar.h>
|
||||
#include <vinumio.h>
|
||||
#include "vinumkw.h"
|
||||
#include "vinumext.h"
|
||||
|
||||
#undef Free /* defined in some funny net stuff */
|
||||
#ifdef REALLYKERNEL
|
||||
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
caddr_t MMalloc (int size, char *, int);
|
||||
void FFree (void *mem, char *, int);
|
||||
#else
|
||||
#define Malloc(x) malloc ((x)) /* just the size */
|
||||
#define Free(x) free ((x)) /* just the address */
|
||||
#endif
|
||||
|
132
lkm/vinum/vinumio.h
Normal file
132
lkm/vinum/vinumio.h
Normal file
@ -0,0 +1,132 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumio.h,v 1.10 1998/08/10 05:46:19 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define MAX_IOCTL_REPLY 256
|
||||
#define L 'F' /* ID letter of our ioctls */
|
||||
/* VINUM_CREATE returns a buffer of this kind */
|
||||
struct _ioctl_reply {
|
||||
int error;
|
||||
char msg[MAX_IOCTL_REPLY];
|
||||
};
|
||||
|
||||
/* ioctl requests */
|
||||
#define BUFSIZE 1024 /* size of buffer, including continuations */
|
||||
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
|
||||
#define VINUM_GETCONFIG _IOR(L, 65, struct _vinum_conf) /* get global config */
|
||||
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct drive) /* get drive config */
|
||||
#define VINUM_SDCONFIG _IOWR(L, 67, struct sd) /* get subdisk config */
|
||||
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct plex) /* get plex config */
|
||||
#define VINUM_VOLCONFIG _IOWR(L, 69, struct volume) /* get volume config */
|
||||
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct sd) /* get sd config for plex (plex, sdno) */
|
||||
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
|
||||
#define VINUM_SAVECONFIG _IOC(0, L, 72, 0) /* release locks, update, write config to disk */
|
||||
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
|
||||
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
|
||||
#ifdef DEBUG
|
||||
|
||||
struct debuginfo {
|
||||
int changeit;
|
||||
int param;
|
||||
};
|
||||
|
||||
#define VINUM_DEBUG _IOWR(L, 75, struct debuginfo) /* call the debugger from ioctl () */
|
||||
#endif
|
||||
|
||||
enum objecttype {
|
||||
drive_object,
|
||||
sd_object,
|
||||
plex_object,
|
||||
volume_object,
|
||||
invalid_object
|
||||
};
|
||||
|
||||
/* Start an object. Pass two integers:
|
||||
* msg [0] index in vinum_conf.<object>
|
||||
* msg [1] type of object (see below)
|
||||
*
|
||||
* Return ioctl_reply
|
||||
*/
|
||||
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
|
||||
|
||||
/* The state to set with VINUM_SETSTATE. Since
|
||||
* each object has a different set of states, we
|
||||
* need to translate later */
|
||||
enum objectstate {
|
||||
object_down,
|
||||
object_initializing,
|
||||
object_up
|
||||
};
|
||||
|
||||
/* This structure is used for modifying objects
|
||||
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
|
||||
* VINUM_DETACH, VINUM_REPLACE
|
||||
*/
|
||||
struct vinum_ioctl_msg {
|
||||
int index;
|
||||
enum objecttype type;
|
||||
enum objectstate state; /* state to set (VINUM_SETSTATE) */
|
||||
int force; /* do it even if it doesn't make sense */
|
||||
int recurse; /* recurse (VINUM_REMOVE) */
|
||||
int otherobject; /* superordinate object (attach),
|
||||
* replacement object (replace) */
|
||||
int rename; /* rename object (attach) */
|
||||
int64_t offset; /* offset of subdisk (for attach) */
|
||||
};
|
||||
|
||||
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
|
||||
#define VINUM_STARTCONFIG _IOC(0, L, 78, 0) /* start a configuration operation */
|
||||
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
|
||||
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
|
||||
#define VINUM_LABEL _IOC(IOC_IN | IOC_OUT, L, 81, MAX_IOCTL_REPLY) /* label a volume */
|
||||
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
|
||||
#define VINUM_REMOVE _IOC(IOC_IN | IOC_OUT, L, 83, MAX_IOCTL_REPLY) /* remove an object */
|
||||
#define VINUM_GETUNMAPPED _IOWR(L, 84, struct plexregion) /* get unmapped element (plex, re) */
|
||||
#define VINUM_GETDEFECTIVE _IOWR(L, 85, struct plexregion) /* get defective element (plex, re) */
|
||||
#define VINUM_RESETSTATS _IOC(IOC_IN | IOC_OUT, L, 86, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_ATTACH _IOC(IOC_IN | IOC_OUT, L, 87, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_DETACH _IOC(IOC_IN | IOC_OUT, L, 88, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
|
||||
struct vinum_rename_msg {
|
||||
int index;
|
||||
int recurse; /* rename subordinate objects too */
|
||||
enum objecttype type;
|
||||
char newname[MAXNAME]; /* new name to give to object */
|
||||
};
|
||||
|
||||
#define VINUM_RENAME _IOC(IOC_IN | IOC_OUT, L, 89, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_REPLACE _IOC(IOC_IN | IOC_OUT, L, 90, MAX_IOCTL_REPLY) /* reset object stats */
|
787
lkm/vinum/vinumioctl.c
Normal file
787
lkm/vinum/vinumioctl.c
Normal file
@ -0,0 +1,787 @@
|
||||
/* XXX replace all the checks on object validity with
|
||||
* calls to valid<object> */
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumioctl.c,v 1.1 1998/08/14 08:46:10 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "sys/sysproto.h" /* for sync(2) */
|
||||
#ifdef DEBUG
|
||||
#include <sys/reboot.h>
|
||||
#endif
|
||||
|
||||
jmp_buf command_fail; /* return on a failed command */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
struct proc *myproc;
|
||||
|
||||
int vinum_inactive(void);
|
||||
void free_vinum(int);
|
||||
void attachobject(struct vinum_ioctl_msg *);
|
||||
void detachobject(struct vinum_ioctl_msg *);
|
||||
void renameobject(struct vinum_rename_msg *);
|
||||
void replaceobject(struct vinum_ioctl_msg *);
|
||||
|
||||
/* ioctl routine */
|
||||
int
|
||||
vinumioctl(dev_t dev,
|
||||
#if __FreeBSD__ >= 3
|
||||
u_long cmd,
|
||||
#else
|
||||
int cmd,
|
||||
#endif
|
||||
caddr_t data,
|
||||
int flag,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
unsigned int objno;
|
||||
int error = 0;
|
||||
struct volume *vol;
|
||||
unsigned int index; /* for transferring config info */
|
||||
unsigned int sdno; /* for transferring config info */
|
||||
int fe; /* free list element number */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
|
||||
|
||||
struct devcode *device = (struct devcode *) &dev;
|
||||
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
myproc = p; /* save pointer to process */
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error) /* bombed out */
|
||||
return 0; /* the reply will contain meaningful info */
|
||||
switch (cmd) {
|
||||
/* XXX #ifdef DEBUG */
|
||||
case VINUM_DEBUG:
|
||||
boothowto |= RB_GDB; /* serial debug line */
|
||||
if (((struct debuginfo *) data)->changeit) /* change debug settings */
|
||||
debug = (((struct debuginfo *) data)->param);
|
||||
else
|
||||
Debugger("vinum debug");
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
/* XXX #endif */
|
||||
|
||||
case VINUM_CREATE: /* create a vinum object */
|
||||
error = lock_config(); /* get the config for us alone */
|
||||
if (error) /* can't do it, */
|
||||
return error; /* give up */
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error == 0) { /* first time, */
|
||||
parse_user_config((char *) data, &keyword_set); /* update the config */
|
||||
ioctl_reply->error = 0; /* no error if we make it here */
|
||||
} else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
|
||||
ioctl_reply->error = EINVAL; /* note that something's up */
|
||||
ioctl_reply->msg[0] = '\0'; /* no message? */
|
||||
}
|
||||
unlock_config();
|
||||
return 0; /* must be 0 to return the real error info */
|
||||
|
||||
case VINUM_GETCONFIG: /* get the configuration information */
|
||||
bcopy(&vinum_conf, data, sizeof(vinum_conf));
|
||||
return 0;
|
||||
|
||||
/* start configuring the subsystem */
|
||||
case VINUM_STARTCONFIG:
|
||||
return start_config(); /* just lock it */
|
||||
|
||||
/* Move the individual parts of the config to user space.
|
||||
|
||||
* Specify the index of the object in the first word of data,
|
||||
* and return the object there
|
||||
*/
|
||||
case VINUM_DRIVECONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.drives_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&DRIVE[index], data, sizeof(struct drive)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_SDCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.subdisks_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&SD[index], data, sizeof(struct sd)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.plexes_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&PLEX[index], data, sizeof(struct plex)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_VOLCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.volumes_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&VOL[index], data, sizeof(struct volume)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXSDCONFIG:
|
||||
index = *(int *) data; /* get the plex index */
|
||||
sdno = ((int *) data)[1]; /* and the sd index */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
|
||||
data,
|
||||
sizeof(struct sd));
|
||||
return 0;
|
||||
|
||||
case VINUM_SAVECONFIG:
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
finish_config(1); /* finish the configuration and update it */
|
||||
error = save_config(); /* save configuration to disk */
|
||||
} else
|
||||
error = EINVAL; /* queue up for this one, please */
|
||||
return error;
|
||||
|
||||
case VINUM_RELEASECONFIG: /* release the config */
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
finish_config(0); /* finish the configuration, don't change it */
|
||||
error = save_config(); /* save configuration to disk */
|
||||
} else
|
||||
error = EINVAL; /* release what config? */
|
||||
return error;
|
||||
|
||||
case VINUM_INIT:
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETCONFIG:
|
||||
if (vinum_inactive() && (vinum_conf.opencount < 2)) { /* if we're not active */
|
||||
/* Note the open count. We may be called from v, so we'll be open.
|
||||
* Keep the count so we don't underflow */
|
||||
int oc = vinum_conf.opencount;
|
||||
free_vinum(1); /* clean up everything */
|
||||
printf("vinum: CONFIGURATION OBLITERATED\n");
|
||||
vinum_conf.opencount = oc;
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
}
|
||||
return EBUSY;
|
||||
|
||||
case VINUM_SETSTATE:
|
||||
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
|
||||
return 0;
|
||||
|
||||
case VINUM_MEMINFO:
|
||||
vinum_meminfo(data);
|
||||
return 0;
|
||||
|
||||
case VINUM_MALLOCINFO:
|
||||
return vinum_mallocinfo(data);
|
||||
|
||||
case VINUM_LABEL: /* label a volume */
|
||||
ioctl_reply->error = write_volume_label(*(int *) data); /* index of the volume to label */
|
||||
ioctl_reply->msg[0] = '\0'; /* no message */
|
||||
return 0;
|
||||
|
||||
case VINUM_REMOVE:
|
||||
remove((struct vinum_ioctl_msg *) data); /* remove an object */
|
||||
return 0;
|
||||
|
||||
case VINUM_GETFREELIST: /* get a drive free list element */
|
||||
index = *(int *) data; /* get the drive index */
|
||||
fe = ((int *) data)[1]; /* and the free list element */
|
||||
if ((index >= (unsigned) vinum_conf.drives_used) /* plex doesn't exist */
|
||||
||(DRIVE[index].state == drive_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&DRIVE[index].freelist[fe],
|
||||
data,
|
||||
sizeof(struct drive_freelist));
|
||||
return 0;
|
||||
|
||||
case VINUM_GETDEFECTIVE: /* get a plex defective area element */
|
||||
index = *(int *) data; /* get the plex index */
|
||||
fe = ((int *) data)[1]; /* and the region number */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(PLEX[index].state == plex_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= PLEX[index].defective_regions) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&PLEX[index].defective_region[fe],
|
||||
data,
|
||||
sizeof(struct plexregion));
|
||||
return 0;
|
||||
|
||||
case VINUM_GETUNMAPPED: /* get a plex unmapped area element */
|
||||
index = *(int *) data; /* get the plex index */
|
||||
fe = ((int *) data)[1]; /* and the region number */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(PLEX[index].state == plex_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= PLEX[index].unmapped_regions) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&PLEX[index].unmapped_region[fe],
|
||||
data,
|
||||
sizeof(struct plexregion));
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETSTATS:
|
||||
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
|
||||
return 0;
|
||||
|
||||
/* attach an object to a superordinate object */
|
||||
case VINUM_ATTACH:
|
||||
attachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* detach an object from a superordinate object */
|
||||
case VINUM_DETACH:
|
||||
detachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* rename an object */
|
||||
case VINUM_RENAME:
|
||||
renameobject((struct vinum_rename_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* replace an object */
|
||||
case VINUM_REPLACE:
|
||||
replaceobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
default:
|
||||
/* FALLTHROUGH */
|
||||
}
|
||||
|
||||
default:
|
||||
#if __FreeBSD__>=3
|
||||
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %lx\n",
|
||||
device->type,
|
||||
device->sd,
|
||||
device->plex,
|
||||
device->major,
|
||||
device->volume,
|
||||
cmd); /* XXX */
|
||||
|
||||
#else
|
||||
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %x\n",
|
||||
device->type,
|
||||
device->sd,
|
||||
device->plex,
|
||||
device->major,
|
||||
device->volume,
|
||||
cmd); /* XXX */
|
||||
|
||||
#endif
|
||||
return EINVAL;
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
case VINUM_PLEX_TYPE:
|
||||
return EAGAIN; /* try again next week */
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
objno = SDNO(dev);
|
||||
|
||||
switch (cmd) {
|
||||
case VINUM_INITSD: /* initialize subdisk */
|
||||
return initsd(objno);
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
case VINUM_VOLUME_TYPE:
|
||||
objno = VOLNO(dev);
|
||||
|
||||
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_used) /* not a valid volume */
|
||||
return ENXIO;
|
||||
vol = &VOL[objno];
|
||||
if (vol->state != volume_up) /* not up, */
|
||||
return EIO; /* I/O error */
|
||||
|
||||
switch (cmd) {
|
||||
case DIOCGDINFO: /* get disk label */
|
||||
get_volume_label(vol, (struct disklabel *) data);
|
||||
break;
|
||||
|
||||
/* Care! DIOCGPART returns *pointers* to
|
||||
* the caller, so we need to store this crap as well.
|
||||
* And yes, we need it. */
|
||||
case DIOCGPART: /* get partition information */
|
||||
get_volume_label(vol, &vol->label);
|
||||
((struct partinfo *) data)->disklab = &vol->label;
|
||||
((struct partinfo *) data)->part = &vol->label.d_partitions[0];
|
||||
break;
|
||||
|
||||
/* We don't have this stuff on hardware,
|
||||
* so just pretend to do it so that
|
||||
* utilities don't get upset. */
|
||||
case DIOCWDINFO: /* write partition info */
|
||||
case DIOCSDINFO: /* set partition info */
|
||||
return 0; /* not a titty */
|
||||
|
||||
case DIOCWLABEL: /* set or reset label writeable */
|
||||
if ((flag & FWRITE) == 0) /* not writeable? */
|
||||
return EACCES; /* no, die */
|
||||
if (*(int *) data != 0) /* set it? */
|
||||
vol->flags |= VF_WLABEL; /* yes */
|
||||
else
|
||||
vol->flags &= ~VF_WLABEL; /* no, reset */
|
||||
break;
|
||||
|
||||
default:
|
||||
return ENOTTY; /* not my kind of ioctl */
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0; /* XXX */
|
||||
}
|
||||
|
||||
/* The following four functions check the supplied
|
||||
* object index and return a pointer to the object
|
||||
* if it exists. Otherwise they longjump out via
|
||||
* throw_rude_remark */
|
||||
struct drive *
|
||||
validdrive(int driveno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((driveno < vinum_conf.drives_used)
|
||||
&& (DRIVE[driveno].state != drive_unallocated))
|
||||
return &DRIVE[driveno];
|
||||
strcpy(reply->msg, "No such drive");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct sd *
|
||||
validsd(int sdno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((sdno < vinum_conf.subdisks_used)
|
||||
&& (SD[sdno].state != sd_unallocated))
|
||||
return &SD[sdno];
|
||||
strcpy(reply->msg, "No such subdisk");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct plex *
|
||||
validplex(int plexno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((plexno < vinum_conf.plexes_used)
|
||||
&& (PLEX[plexno].state != plex_unallocated))
|
||||
return &PLEX[plexno];
|
||||
strcpy(reply->msg, "No such plex");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct volume *
|
||||
validvol(int volno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((volno < vinum_conf.volumes_used)
|
||||
&& (VOL[volno].state != volume_unallocated))
|
||||
return &VOL[volno];
|
||||
strcpy(reply->msg, "No such volume");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* reset an object's stats */
|
||||
void
|
||||
resetstats(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object:
|
||||
if (msg->index < vinum_conf.drives_used) {
|
||||
struct drive *drive = &DRIVE[msg->index];
|
||||
if (drive->state != drive_unallocated) {
|
||||
drive->reads = 0; /* number of reads on this drive */
|
||||
drive->writes = 0; /* number of writes on this drive */
|
||||
drive->bytes_read = 0; /* number of bytes read */
|
||||
drive->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case sd_object:
|
||||
if (msg->index < vinum_conf.subdisks_used) {
|
||||
struct sd *sd = &SD[msg->index];
|
||||
if (sd->state != sd_unallocated) {
|
||||
sd->reads = 0; /* number of reads on this subdisk */
|
||||
sd->writes = 0; /* number of writes on this subdisk */
|
||||
sd->bytes_read = 0; /* number of bytes read */
|
||||
sd->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
if (msg->index < vinum_conf.plexes_used) {
|
||||
struct plex *plex = &PLEX[msg->index];
|
||||
if (plex->state != plex_unallocated) {
|
||||
plex->reads = 0;
|
||||
plex->writes = 0; /* number of writes on this plex */
|
||||
plex->bytes_read = 0; /* number of bytes read */
|
||||
plex->bytes_written = 0; /* number of bytes written */
|
||||
plex->multiblock = 0; /* requests that needed more than one block */
|
||||
plex->multistripe = 0; /* requests that needed more than one stripe */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
if (msg->index < vinum_conf.volumes_used) {
|
||||
struct volume *vol = &VOL[msg->index];
|
||||
if (vol->state != volume_unallocated) {
|
||||
vol->bytes_read = 0; /* number of bytes read */
|
||||
vol->bytes_written = 0; /* number of bytes written */
|
||||
vol->reads = 0; /* number of reads on this volume */
|
||||
vol->writes = 0; /* number of writes on this volume */
|
||||
vol->recovered_reads = 0; /* reads recovered from another plex */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case invalid_object: /* can't get this */
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* attach an object to a superior object */
|
||||
void
|
||||
attachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL) /* not a valid subdisk */
|
||||
return;
|
||||
plex = validplex(msg->otherobject, reply);
|
||||
if (plex) {
|
||||
if (sd->plexno >= 0) { /* already belong to a plex */
|
||||
reply->error = EBUSY; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd->plexoffset = msg->offset; /* this is where we want it */
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
|
||||
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
if (plex->organization != plex_concat) { /* can't attach to striped and raid-5 */
|
||||
reply->error = EINVAL; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
vol = validvol(msg->otherobject, reply); /* and volume information */
|
||||
if (vol) {
|
||||
if ((vol->plexes == MAXPLEX) /* we have too many already */
|
||||
||(plex->volno >= 0)) { /* or the plex has an owner */
|
||||
reply->error = EINVAL; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
set_plex_state(plex->plexno, plex_down, setstate_force); /* make sure it's down */
|
||||
give_plex_to_volume(msg->otherobject, msg->index); /* and give it to the volume */
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
if (plex->state == plex_reviving)
|
||||
reply->error = EAGAIN; /* need to revive it */
|
||||
else
|
||||
reply->error = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* detach an object from a superior object */
|
||||
void
|
||||
detachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
int sdno;
|
||||
int plexno;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL)
|
||||
return;
|
||||
if (sd->plexno < 0) { /* doesn't belong to a plex */
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Subdisk is not attached");
|
||||
return;
|
||||
} else { /* valid plex number */
|
||||
plex = &PLEX[sd->plexno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((plex->state == plex_up) /* and the plex is up */
|
||||
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
|
||||
reply->error = EBUSY; /* we need this sd */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd->plexno = -1; /* anonymous sd */
|
||||
if (plex->subdisks == 1) { /* this was the only subdisk */
|
||||
Free(plex->sdnos); /* free the subdisk array */
|
||||
plex->sdnos = NULL; /* and note the fact */
|
||||
plex->subdisks_allocated = 0; /* no subdisk space */
|
||||
} else {
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
|
||||
break;
|
||||
}
|
||||
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
|
||||
bcopy(&plex->sdnos[sdno + 1],
|
||||
&plex->sdnos[sdno],
|
||||
(plex->subdisks - 1 - sdno) * sizeof(int));
|
||||
}
|
||||
plex->subdisks--;
|
||||
rebuild_plex_unmappedlist(plex); /* rebuild the unmapped list */
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* this subdisk is named after the plex */
|
||||
bcopy(sd->name,
|
||||
&sd->name[3],
|
||||
min(strlen(sd->name), MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
update_plex_config(plex->plexno, 0);
|
||||
if ((plex->organization == plex_striped) /* we've just mutilated our plex, */
|
||||
||(plex->organization == plex_striped)) /* the data no longer matches */
|
||||
set_plex_state(plex->plexno,
|
||||
plex_down,
|
||||
setstate_force | setstate_configuring);
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
if (plex->volno >= 0) {
|
||||
int volno = plex->volno;
|
||||
|
||||
vol = &VOL[volno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((vol->state == volume_up) /* and the volume is up */
|
||||
&&(vol->plexes == 1))) { /* and this is the last plex */
|
||||
/* XXX As elsewhere, check whether we will lose
|
||||
* mapping by removing this plex */
|
||||
reply->error = EBUSY; /* we need this plex */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex->volno = -1; /* anonymous plex */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
if (vol->plex[plexno] == msg->index) /* found our plex */
|
||||
break;
|
||||
}
|
||||
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
|
||||
bcopy(&vol[plexno + 1], &vol[plexno], (vol->plexes - 1 - plexno) * sizeof(int));
|
||||
vol->plexes--;
|
||||
if (!bcmp(vol->name, plex->name, strlen(vol->name))) { /* this plex is named after the volume */
|
||||
/* First, check if the subdisks are the same */
|
||||
if (msg->recurse) {
|
||||
int sdno;
|
||||
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]];
|
||||
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* subdisk is named after the plex */
|
||||
bcopy(sd->name, &sd->name[3], min(strlen(sd->name), MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
bcopy(plex->name, &plex->name[3], min(strlen(plex->name), MAXPLEXNAME - 3));
|
||||
bcopy("ex-", plex->name, 3);
|
||||
plex->name[MAXPLEXNAME - 1] = '\0';
|
||||
}
|
||||
update_plex_config(plex->plexno, 0);
|
||||
update_volume_config(volno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
} else {
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Plex is not attached");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
renameobject(struct vinum_rename_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
drive = validdrive(msg->index, reply);
|
||||
if (drive) {
|
||||
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case sd_object: /* you can't attach a subdisk to anything */
|
||||
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd) {
|
||||
bcopy(msg->newname, sd->name, MAXSDNAME);
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object: /* you can't attach a plex to anything */
|
||||
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex = validplex(msg->index, reply);
|
||||
if (plex) {
|
||||
bcopy(msg->newname, plex->name, MAXPLEXNAME);
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case volume_object: /* you can't attach a volume to anything */
|
||||
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
vol = validvol(msg->index, reply);
|
||||
if (vol) {
|
||||
bcopy(msg->newname, vol->name, MAXVOLNAME);
|
||||
update_volume_config(msg->index, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case invalid_object:
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace one object with another */
|
||||
void
|
||||
replaceobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
reply->error = ENODEV; /* until I know how to do this */
|
||||
strcpy(reply->msg, "replace not implemented yet");
|
||||
/* save_config (); */
|
||||
}
|
120
lkm/vinum/vinumkw.h
Normal file
120
lkm/vinum/vinumkw.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumkw.h,v 1.7 1998/08/07 02:35:51 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* Command keywords that vinum knows. These include both user-level
|
||||
* and kernel-level stuff */
|
||||
|
||||
/* Our complete vocabulary. The names of the commands are
|
||||
* the same as the identifier without the kw_ at the beginning
|
||||
* (i.e. kw_create defines the "create" keyword). Preprocessor
|
||||
* magic in parser.c does the rest. */
|
||||
enum keyword {
|
||||
kw_create,
|
||||
kw_modify,
|
||||
kw_list,
|
||||
kw_l = kw_list,
|
||||
kw_ld, /* list drive */
|
||||
kw_ls, /* list subdisk */
|
||||
kw_lp, /* list plex */
|
||||
kw_lv, /* list volume */
|
||||
kw_set,
|
||||
kw_rm,
|
||||
kw_start,
|
||||
kw_stop,
|
||||
kw_drive,
|
||||
kw_sd,
|
||||
kw_subdisk = kw_sd,
|
||||
kw_plex,
|
||||
kw_volume,
|
||||
kw_vol = kw_volume,
|
||||
kw_read,
|
||||
kw_readpol,
|
||||
kw_org,
|
||||
kw_name,
|
||||
kw_concat,
|
||||
kw_striped,
|
||||
kw_raid5,
|
||||
kw_driveoffset,
|
||||
kw_plexoffset,
|
||||
kw_len,
|
||||
kw_length = kw_len,
|
||||
kw_state,
|
||||
kw_setupstate,
|
||||
kw_d, /* flag names */
|
||||
kw_f,
|
||||
kw_r,
|
||||
kw_s,
|
||||
kw_v,
|
||||
kw_round, /* round robin */
|
||||
kw_prefer, /* prefer plex */
|
||||
kw_device,
|
||||
kw_init,
|
||||
kw_label,
|
||||
kw_resetconfig,
|
||||
kw_writethrough,
|
||||
kw_writeback,
|
||||
kw_raw,
|
||||
kw_resetstats,
|
||||
kw_attach,
|
||||
kw_detach,
|
||||
kw_rename,
|
||||
kw_printconfig,
|
||||
kw_replace,
|
||||
kw_detached,
|
||||
#ifdef DEBUG
|
||||
kw_debug, /* go into debugger */
|
||||
kw_info,
|
||||
#endif
|
||||
kw_invalid_keyword = -1
|
||||
};
|
||||
|
||||
struct _keywords {
|
||||
char *name;
|
||||
enum keyword keyword;
|
||||
};
|
||||
|
||||
struct keywordset {
|
||||
int size;
|
||||
struct _keywords *k;
|
||||
};
|
||||
|
||||
extern struct _keywords keywords[];
|
||||
extern struct _keywords flag_keywords[];
|
||||
|
||||
extern struct keywordset keyword_set;
|
||||
extern struct keywordset flag_set;
|
213
lkm/vinum/vinumstate.h
Normal file
213
lkm/vinum/vinumstate.h
Normal file
@ -0,0 +1,213 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumstate.h,v 1.11 1998/08/04 06:22:49 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file gets read by makestatetext to create text files
|
||||
* with the names of the states, so don't change the file
|
||||
* format */
|
||||
|
||||
enum volumestate {
|
||||
volume_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
volume_uninit,
|
||||
/* mentioned elsewhere but not defined */
|
||||
|
||||
volume_down,
|
||||
|
||||
/* The volume is up and functional, but not all plexes may be available */
|
||||
volume_up,
|
||||
volume_laststate = volume_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum plexstate {
|
||||
/* An empty entry, not a plex at all. */
|
||||
plex_unallocated,
|
||||
|
||||
/* The plex has been allocated, but there configuration
|
||||
* is not complete */
|
||||
plex_init,
|
||||
|
||||
/* A plex which has gone completely down because of
|
||||
* I/O errors. */
|
||||
plex_faulty,
|
||||
|
||||
/* A plex which has been taken down by the
|
||||
* administrator. */
|
||||
plex_down,
|
||||
|
||||
/* A plex which is currently being brought up after
|
||||
* being not up. This involves copying data from
|
||||
* another plex */
|
||||
plex_reviving,
|
||||
|
||||
/* A plex which is being initialized */
|
||||
plex_initializing,
|
||||
|
||||
/* *** The remaining states represent plexes which are
|
||||
* at least partially up. Keep these separate so that
|
||||
* they can be checked more easily. */
|
||||
|
||||
/* A plex entry which is at least partially up. Not
|
||||
* all subdisks are available, and an inconsistency
|
||||
* has occurred. If no other plex is uncorrupted,
|
||||
* the volume is no longer consistent. */
|
||||
plex_corrupt,
|
||||
|
||||
plex_firstup = plex_corrupt, /* first "up" state */
|
||||
|
||||
/* A plex entry which is at least partially up. Not
|
||||
* all subdisks are available, but so far no
|
||||
* inconsistency has occurred (this will change with
|
||||
* the first write to the address space occupied by
|
||||
* a defective subdisk). A RAID 5 plex with one subdisk
|
||||
* down will remain degraded even after a write */
|
||||
plex_degraded,
|
||||
|
||||
/* A plex which is really up, but which has a reborn
|
||||
* subdisk which we don't completely trust, and
|
||||
* which we don't want to read if we can avoid it */
|
||||
plex_flaky,
|
||||
|
||||
/* A plex entry which is completely up. All subdisks
|
||||
* are up. */
|
||||
plex_up,
|
||||
|
||||
plex_laststate = plex_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
/* subdisk states */
|
||||
enum sdstate {
|
||||
/* An empty entry, not a subdisk at all. */
|
||||
sd_unallocated,
|
||||
|
||||
/* A subdisk entry which has not been created
|
||||
* completely. Some fields may be empty.
|
||||
*/
|
||||
sd_uninit,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, but the disk hasn't
|
||||
* been updated.
|
||||
*/
|
||||
sd_init,
|
||||
|
||||
/* A subdisk entry which has been created completely and
|
||||
* which is currently being initialized */
|
||||
sd_initializing,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, and the disk has been
|
||||
* updated, but there is no data on the disk.
|
||||
*/
|
||||
sd_empty,
|
||||
|
||||
/* *** The following states represent invalid data */
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down, and as a result updates have been
|
||||
* missed.
|
||||
*/
|
||||
sd_obsolete,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down, updates have been lost, and then
|
||||
* the drive came up again.
|
||||
*/
|
||||
sd_stale,
|
||||
|
||||
/* *** The following states represent valid, inaccessible data */
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down. No attempt has been made to write
|
||||
* to the subdisk since the crash.
|
||||
*/
|
||||
sd_crashed,
|
||||
|
||||
/* A subdisk entry which was up, which contained
|
||||
* valid data, and which was taken down by the
|
||||
* administrator. The data is valid. */
|
||||
sd_down,
|
||||
|
||||
/* *** The following states represent accessible subdisks
|
||||
* with valid data */
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down and up again. No updates were lost,
|
||||
* but it is possible that the subdisk has been
|
||||
* damaged. We won't read from this subdisk if we
|
||||
* have a choice. If this is the only subdisk which
|
||||
* covers this address space in the plex, we set its
|
||||
* state to sd_up under these circumstances, so this
|
||||
* status implies that there is another subdisk to
|
||||
* fulfil the request.
|
||||
*/
|
||||
sd_reborn,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data is valid.
|
||||
*/
|
||||
sd_up,
|
||||
|
||||
sd_laststate = sd_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum drivestate {
|
||||
drive_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
drive_uninit,
|
||||
/* just mentioned in some other config entry */
|
||||
|
||||
drive_down,
|
||||
/* not accessible */
|
||||
|
||||
drive_coming_up,
|
||||
/* in the process of being brought up */
|
||||
|
||||
drive_up,
|
||||
/* up and running */
|
||||
|
||||
drive_laststate = drive_up /* last value, for table dimensions */
|
||||
};
|
510
lkm/vinum/vinumvar.h
Normal file
510
lkm/vinum/vinumvar.h
Normal file
@ -0,0 +1,510 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumvar.h,v 1.15 1998/08/14 06:36:41 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* XXX gdb can't find our global pointers, so use this kludge to
|
||||
* point to them locally. Remove after testing */
|
||||
#define BROKEN_GDB struct _vinum_conf *VC = &vinum_conf
|
||||
|
||||
#include <sys/time.h>
|
||||
#include "vinumstate.h"
|
||||
/* Some configuration maxima. They're an enum because
|
||||
* we can't define global constants. Sorry about that.
|
||||
*
|
||||
* These aren't as bad as they look: most of them
|
||||
* are soft limits. Only the MAXCONFIG parameter is set in stone
|
||||
*/
|
||||
|
||||
enum constants {
|
||||
VINUM_HEADER = 512, /* size of header on disk */
|
||||
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
|
||||
/* XXX Do we still need this? */
|
||||
MINVINUMSLICE = 1048576, /* minimum size of a slice */
|
||||
|
||||
CDEV_MAJOR = 91, /* major number for character device */
|
||||
BDEV_MAJOR = 25, /* and block device */
|
||||
|
||||
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
|
||||
|
||||
/* type field in minor number */
|
||||
VINUM_VOLUME_TYPE = 0,
|
||||
VINUM_PLEX_TYPE = 1,
|
||||
VINUM_SD_TYPE = 2,
|
||||
VINUM_DRIVE_TYPE = 3,
|
||||
VINUM_SUPERDEV_TYPE = 4, /* super device. */
|
||||
|
||||
/* Shifts for the individual fields in the device */
|
||||
VINUM_TYPE_SHIFT = 28,
|
||||
VINUM_VOL_SHIFT = 0,
|
||||
VINUM_PLEX_SHIFT = 16,
|
||||
VINUM_SD_SHIFT = 20,
|
||||
VINUM_VOL_WIDTH = 8,
|
||||
VINUM_PLEX_WIDTH = 3,
|
||||
VINUM_SD_WIDTH = 8,
|
||||
MAJORDEV_SHIFT = 8,
|
||||
|
||||
|
||||
/* Create a block device number */
|
||||
#define VINUMBDEV(v,p,s,t) ((BDEV_MAJOR << MAJORDEV_SHIFT) \
|
||||
| (v << VINUM_VOL_SHIFT) \
|
||||
| (p << VINUM_PLEX_SHIFT) \
|
||||
| (s << VINUM_SD_SHIFT) \
|
||||
| (t << VINUM_TYPE_SHIFT) )
|
||||
|
||||
/* And a character device number */
|
||||
#define VINUMCDEV(v,p,s,t) ((CDEV_MAJOR << MAJORDEV_SHIFT) \
|
||||
| (v << VINUM_VOL_SHIFT) \
|
||||
| (p << VINUM_PLEX_SHIFT) \
|
||||
| (s << VINUM_SD_SHIFT) \
|
||||
| (t << VINUM_TYPE_SHIFT) )
|
||||
|
||||
/* extract device type */
|
||||
#define DEVTYPE(x) ((x >> VINUM_TYPE_SHIFT) & 7)
|
||||
|
||||
/* extract volume number */
|
||||
#define VOLNO(x) (x & ((1 << VINUM_VOL_WIDTH) - 1))
|
||||
|
||||
/* extract plex number */
|
||||
#define PLEXNO(x) (VOL [VOLNO (x)].plex [(x >> VINUM_PLEX_SHIFT) & ((1 << VINUM_PLEX_WIDTH) - 1)])
|
||||
|
||||
/* extract subdisk number */
|
||||
#define SDNO(x) (PLEX [PLEXNO (x)].sdnos [(x >> VINUM_SD_SHIFT) & ((1 << VINUM_SD_WIDTH) - 1)])
|
||||
|
||||
/* extract drive number */
|
||||
#define DRIVENO(x) (SD [SDNO (x)].driveno)
|
||||
|
||||
VINUM_SUPERDEV = VINUMBDEV(0, 0, 0, VINUM_SUPERDEV_TYPE), /* superdevice number */
|
||||
|
||||
/* the number of object entries to cater for initially, and also the
|
||||
* value by which they are incremented. It doesn't take long
|
||||
* to extend them, so theoretically we could start with 1 of each, but
|
||||
* it's untidy to allocate such small areas. These values are
|
||||
* probably too small.
|
||||
*/
|
||||
|
||||
INITIAL_DRIVES = 4,
|
||||
INITIAL_VOLUMES = 4,
|
||||
INITIAL_PLEXES = 8,
|
||||
INITIAL_SUBDISKS = 16,
|
||||
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
|
||||
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
|
||||
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
|
||||
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
|
||||
INITIAL_LOCKS = 8, /* number of locks to allocate to a volume */
|
||||
DEFAULT_REVIVE_BLOCKSIZE = 32768, /* size of block to transfer in one op */
|
||||
};
|
||||
|
||||
/* device numbers */
|
||||
|
||||
/*
|
||||
* 31 30 28 27 20 19 18 16 15 8 7 0
|
||||
* |-----------------------------------------------------------------------------------------------|
|
||||
* |X | Type | Subdisk number | X| Plex | Major number | volume number |
|
||||
* |-----------------------------------------------------------------------------------------------|
|
||||
*
|
||||
* 0x2 03 1 19 06
|
||||
*/
|
||||
struct devcode {
|
||||
/* CARE. These fields assume a big-endian word. On a
|
||||
* little-endian system, they're the wrong way around */
|
||||
unsigned volume:8; /* up to 256 volumes */
|
||||
unsigned major:8; /* this is where the major number fits */
|
||||
unsigned plex:3; /* up to 8 plexes per volume */
|
||||
unsigned unused:1; /* up for grabs */
|
||||
unsigned sd:8; /* up to 256 subdisks per plex */
|
||||
unsigned type:3; /* type of object */
|
||||
/* type field
|
||||
VINUM_VOLUME = 0,
|
||||
VINUM_PLEX = 1,
|
||||
VINUM_SUBDISK = 2,
|
||||
VINUM_DRIVE = 3,
|
||||
VINUM_SUPERDEV = 4, */
|
||||
unsigned signbit:1; /* to make 32 bits */
|
||||
};
|
||||
|
||||
#define VINUM_DIR "/dev/vinum"
|
||||
#define VINUM_RDIR "/dev/rvinum"
|
||||
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control"
|
||||
#define MAXDRIVENAME 32 /* maximum length of a device name */
|
||||
#define MAXSDNAME 64 /* maximum length of a subdisk name */
|
||||
#define MAXPLEXNAME 64 /* maximum length of a plex name */
|
||||
#define MAXVOLNAME 64 /* maximum length of a volume name */
|
||||
#define MAXNAME 64 /* maximum length of any name */
|
||||
#define MAXVOLPLEX 8 /* maximum number of plexes in a volume */
|
||||
|
||||
/* Flags for all objects. Most of them only apply to
|
||||
* specific objects, but we have space for all in any
|
||||
* 32 bit flags word. */
|
||||
enum objflags {
|
||||
VF_LOCKED = 1, /* somebody has locked access to this object */
|
||||
VF_LOCKING = 2, /* we want access to this object */
|
||||
VF_WRITETHROUGH = 8, /* volume: write through */
|
||||
VF_INITED = 0x10, /* unit has been initialized */
|
||||
VF_WLABEL = 0x20, /* label area is writable */
|
||||
VF_LABELLING = 0x40, /* unit is currently being labelled */
|
||||
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
|
||||
VF_RAW = 0x100, /* raw volume (no file system) */
|
||||
VF_LOADED = 0x200, /* module is loaded */
|
||||
VF_CONFIGURING = 0x400, /* somebody is changing the config */
|
||||
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
|
||||
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
|
||||
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
|
||||
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
|
||||
VF_KERNELOP = 0x8000, /* we're performing ops from kernel space */
|
||||
};
|
||||
|
||||
/* Global configuration information for the vinum subsystem */
|
||||
struct _vinum_conf {
|
||||
/* Pointers to vinum structures */
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *volume;
|
||||
|
||||
/* the number allocated */
|
||||
int drives_allocated;
|
||||
int subdisks_allocated;
|
||||
int plexes_allocated;
|
||||
int volumes_allocated;
|
||||
|
||||
/* and the number currently in use */
|
||||
int drives_used;
|
||||
int subdisks_used;
|
||||
int plexes_used;
|
||||
int volumes_used;
|
||||
|
||||
int flags;
|
||||
int opencount; /* number of times we've been opened */
|
||||
#if DEBUG
|
||||
int lastrq;
|
||||
struct buf *lastbuf;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Use these defines to simplify code */
|
||||
#define DRIVE vinum_conf.drive
|
||||
#define SD vinum_conf.sd
|
||||
#define PLEX vinum_conf.plex
|
||||
#define VOL vinum_conf.volume
|
||||
#define VFLAGS vinum_conf.flags
|
||||
|
||||
/* Slice header
|
||||
|
||||
* Vinum drives start with this structure:
|
||||
*
|
||||
* Sector
|
||||
* |--------------------------------------|
|
||||
* | PDP-11 memorial boot block | 0
|
||||
* |--------------------------------------|
|
||||
* | Disk label, maybe | 1
|
||||
* |--------------------------------------|
|
||||
* | Slice definition (vinum_hdr) | 2
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, first copy | 3
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, second copy | 3 + size of config
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
*/
|
||||
|
||||
/* Sizes and offsets of our information */
|
||||
enum {
|
||||
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
|
||||
VINUMHEADERLEN = 512, /* size of vinum label */
|
||||
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
|
||||
MAXCONFIG = 65536, /* and size of config copy */
|
||||
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
|
||||
};
|
||||
|
||||
/* hostname is 256 bytes long, but we don't need to shlep
|
||||
* multiple copies in vinum. We use the host name just
|
||||
* to identify this system, and 32 bytes should be ample
|
||||
* for that purpose */
|
||||
#define VINUMHOSTNAMELEN 32
|
||||
|
||||
struct vinum_label {
|
||||
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
|
||||
char name[MAXDRIVENAME]; /* our name of the drive */
|
||||
struct timeval date_of_birth; /* the time it was created */
|
||||
struct timeval last_update; /* and the time of last update */
|
||||
off_t drive_size; /* total size in bytes of the drive.
|
||||
* This value includes the headers */
|
||||
};
|
||||
|
||||
struct vinum_hdr {
|
||||
long long magic; /* we're long on magic numbers */
|
||||
/* XXX Get these right for big-endian */
|
||||
#define VINUM_MAGIC 22322600044678729LL /* should be this */
|
||||
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
|
||||
int config_length; /* size in bytes of each copy of the
|
||||
* configuration info.
|
||||
* This must be a multiple of the sector size. */
|
||||
|
||||
struct vinum_label label; /* unique label */
|
||||
};
|
||||
|
||||
/* Information returned from read_drive_label */
|
||||
enum drive_label_info {
|
||||
DL_CANT_OPEN, /* invalid partition */
|
||||
DL_NOT_OURS, /* valid partition, but no vinum label */
|
||||
DL_DELETED_LABEL, /* valid partition, deleted label found */
|
||||
DL_WRONG_DRIVE, /* drive name doesn't match */
|
||||
DL_OURS /* valid partition and label found */
|
||||
};
|
||||
|
||||
/*** Drive definitions ***/
|
||||
/* A drive corresponds to a disk slice. We use a different term to show
|
||||
* the difference in usage: it doesn't have to be a slice, and could
|
||||
* theroretically be a complete, unpartitioned disk */
|
||||
|
||||
struct drive {
|
||||
enum drivestate state; /* current state */
|
||||
int subdisks_allocated; /* number of entries in sd */
|
||||
int subdisks_used; /* and the number used */
|
||||
int blocksize; /* size of fs blocks */
|
||||
u_int64_t sectors_available; /* number of sectors still available */
|
||||
int secsperblock;
|
||||
int lasterror; /* last error on drive */
|
||||
int driveno; /* index of drive in vinum_conf */
|
||||
int opencount; /* number of up subdisks */
|
||||
u_int64_t reads; /* number of reads on this drive */
|
||||
u_int64_t writes; /* number of writes on this drive */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
dev_t dev; /* and device number */
|
||||
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
|
||||
struct vnode *vp; /* vnode pointer */
|
||||
struct proc *p;
|
||||
struct vinum_label label; /* and the label information */
|
||||
struct partinfo partinfo; /* partition information */
|
||||
int freelist_size; /* number of entries alloced in free list */
|
||||
int freelist_entries; /* number of entries used in free list */
|
||||
struct drive_freelist { /* sorted list of free space on drive */
|
||||
u_int64_t offset;
|
||||
long sectors;
|
||||
} *freelist;
|
||||
};
|
||||
|
||||
/*** Subdisk definitions ***/
|
||||
|
||||
struct sd {
|
||||
enum sdstate state; /* state */
|
||||
/* offsets in blocks */
|
||||
int64_t driveoffset; /* offset on drive */
|
||||
int64_t plexoffset; /* offset in plex */
|
||||
u_int64_t sectors; /* and length in sectors */
|
||||
int plexno; /* index of plex, if it belongs */
|
||||
int driveno; /* index of the drive on which it is located */
|
||||
int sdno; /* our index in vinum_conf */
|
||||
int pid; /* pid of process which opened us */
|
||||
u_int64_t reads; /* number of reads on this subdisk */
|
||||
u_int64_t writes; /* number of writes on this subdisk */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
char name[MAXSDNAME]; /* name of subdisk */
|
||||
};
|
||||
|
||||
/*** Plex definitions ***/
|
||||
|
||||
/* kinds of plex organization */
|
||||
enum plexorg {
|
||||
plex_disorg, /* disorganized */
|
||||
plex_concat, /* concatenated plex */
|
||||
plex_striped, /* striped plex */
|
||||
plex_raid5 /* RAID5 plex */
|
||||
};
|
||||
|
||||
/* Region in plex (either defective or unmapped) */
|
||||
struct plexregion {
|
||||
u_int64_t offset; /* start of region */
|
||||
u_int64_t length; /* length */
|
||||
};
|
||||
|
||||
struct plex {
|
||||
enum plexorg organization; /* Plex organization */
|
||||
enum plexstate state; /* and current state */
|
||||
u_int64_t length; /* total length of plex (max offset) */
|
||||
int flags;
|
||||
int stripesize; /* size of stripe or raid band, in sectors */
|
||||
int subdisks; /* number of associated subdisks */
|
||||
int subdisks_allocated; /* number of subdisks allocated space for */
|
||||
int *sdnos; /* list of component subdisks */
|
||||
int plexno; /* index of plex in vinum_conf */
|
||||
int volno; /* index of volume */
|
||||
int volplexno; /* number of plex in volume */
|
||||
int pid; /* pid of process which opened us */
|
||||
/* Lock information */
|
||||
int locks; /* number of locks used */
|
||||
int alloclocks; /* number of locks allocated */
|
||||
struct rangelock *lock; /* ranges of locked addresses */
|
||||
/* Statistics */
|
||||
u_int64_t reads; /* number of reads on this plex */
|
||||
u_int64_t writes; /* number of writes on this plex */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t multiblock; /* requests that needed more than one block */
|
||||
u_int64_t multistripe; /* requests that needed more than one stripe */
|
||||
/* revive parameters */
|
||||
u_int64_t revived; /* block number of current revive request */
|
||||
int revive_blocksize; /* revive block size (bytes) */
|
||||
int revive_interval; /* and time to wait between transfers */
|
||||
struct request *waitlist; /* list of requests waiting on revive op */
|
||||
/* geometry control */
|
||||
int defective_regions; /* number of regions which are defective */
|
||||
int defective_region_count; /* number of entries in defective_region */
|
||||
struct plexregion *defective_region; /* list of offset/length pairs: defective sds */
|
||||
int unmapped_regions; /* number of regions which are missing */
|
||||
int unmapped_region_count; /* number of entries in unmapped_region */
|
||||
struct plexregion *unmapped_region; /* list of offset/length pairs: missing sds */
|
||||
char name[MAXPLEXNAME]; /* name of plex */
|
||||
};
|
||||
|
||||
/*** Volume definitions ***/
|
||||
|
||||
#define MAXPLEX 8 /* maximum number of plexes */
|
||||
|
||||
|
||||
struct volume {
|
||||
enum volumestate state; /* current state */
|
||||
int plexes; /* number of plexes */
|
||||
int preferred_plex; /* plex to read from, -1 for round-robin */
|
||||
int last_plex_read; /* index of plex used for last read,
|
||||
* for round-robin */
|
||||
dev_t devno; /* device number */
|
||||
int flags; /* status and configuration flags */
|
||||
int opencount; /* number of opens (all the same process) */
|
||||
int openflags; /* flags supplied to last open(2) */
|
||||
u_int64_t size; /* size of volume */
|
||||
int disk; /* disk index */
|
||||
int blocksize; /* logical block size */
|
||||
int active; /* number of outstanding requests active */
|
||||
int subops; /* and the number of suboperations */
|
||||
pid_t pid; /* pid of locker */
|
||||
/* Statistics */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t reads; /* number of reads on this volume */
|
||||
u_int64_t writes; /* number of writes on this volume */
|
||||
u_int64_t recovered_reads; /* reads recovered from another plex */
|
||||
/* Unlike subdisks in the plex, space for the plex pointers is static */
|
||||
int plex[MAXPLEX]; /* index of plexes */
|
||||
char name[MAXVOLNAME]; /* name of volume */
|
||||
struct disklabel label; /* for DIOCGPART */
|
||||
};
|
||||
|
||||
/* Table expansion. Expand table, which contains oldcount
|
||||
* entries of type element, by increment entries, and change
|
||||
* oldcount accordingly */
|
||||
#define EXPAND(table, element, oldcount, increment) \
|
||||
{ \
|
||||
expand_table ((void **) &table, \
|
||||
oldcount * sizeof (element), \
|
||||
(oldcount + increment) * sizeof (element) ); \
|
||||
oldcount += increment; \
|
||||
}
|
||||
|
||||
/* Information on vinum's memory usage */
|
||||
struct meminfo {
|
||||
int mallocs; /* number of malloced blocks */
|
||||
int total_malloced; /* total amount malloced */
|
||||
int highwater; /* maximum number of mallocs */
|
||||
struct mc *malloced; /* pointer to kernel table */
|
||||
};
|
||||
|
||||
struct mc {
|
||||
int seq;
|
||||
int size;
|
||||
short line;
|
||||
short flags;
|
||||
#define ALLOC_KVA 1 /* allocated via kva calls */
|
||||
int *databuf; /* really vm_object_t */
|
||||
caddr_t address;
|
||||
char file[16];
|
||||
};
|
||||
|
||||
/* These enums are used by the state transition
|
||||
* routines. They're in bit map format:
|
||||
*
|
||||
* Bit 0: Other plexes in the volume are down
|
||||
* Bit 1: Other plexes in the volume are up
|
||||
* Bit 2: The current plex is up
|
||||
* Maybe they should be local to
|
||||
* state.c */
|
||||
enum volplexstate {
|
||||
volplex_onlyusdown = 0, /* we're the only plex, and we're down */
|
||||
volplex_alldown, /* 1: another plex is down, and so are we */
|
||||
volplex_otherup, /* 2: another plex is up */
|
||||
volplex_otherupdown, /* other plexes are up and down */
|
||||
volplex_onlyus, /* 4: we're up and alone */
|
||||
volplex_onlyusup, /* only we are up, others are down */
|
||||
volplex_allup, /* all plexes are up */
|
||||
volplex_someup /* some plexes are up, including us */
|
||||
};
|
||||
|
||||
/* state map for plex */
|
||||
enum sdstates {
|
||||
sd_emptystate = 1,
|
||||
sd_downstate = 2, /* found an SD which is down */
|
||||
sd_crashedstate = 4, /* found an SD which is crashed */
|
||||
sd_obsoletestate = 8, /* found an SD which is obsolete */
|
||||
sd_stalestate = 16, /* found an SD which is stale */
|
||||
sd_rebornstate = 32, /* found an SD which is reborn */
|
||||
sd_upstate = 64, /* found an SD which is up */
|
||||
sd_initstate = 128, /* found an SD which is init */
|
||||
sd_otherstate = 256 /* found an SD in some other state */
|
||||
};
|
||||
|
||||
/* This is really just a parameter to pass to
|
||||
* set_<foo>_state, but since it needs to be known
|
||||
* in the external definitions, we need to define
|
||||
* it here */
|
||||
enum setstateflags {
|
||||
setstate_none = 0, /* no flags */
|
||||
setstate_force = 1, /* force the state change */
|
||||
setstate_configuring = 2, /* we're currently configuring, don't save */
|
||||
setstate_recursing = 4, /* we're called from another setstate function */
|
||||
setstate_norecurse = 8 /* don't call other setstate functions */
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
/* Debugging stuff */
|
||||
#define DEBUG_ADDRESSES 1
|
||||
#define DEBUG_NUMOUTPUT 2
|
||||
#endif
|
37
sys/dev/vinum/COPYRIGHT
Normal file
37
sys/dev/vinum/COPYRIGHT
Normal file
@ -0,0 +1,37 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
|
||||
*/
|
26
sys/dev/vinum/Makefile
Normal file
26
sys/dev/vinum/Makefile
Normal file
@ -0,0 +1,26 @@
|
||||
# $Id: Makefile.lkm.lite,v 1.2 1998/08/13 06:07:29 grog Exp grog $
|
||||
|
||||
.PATH: ${.CURDIR}/../../sys/dev/ccd
|
||||
KMOD= vinum_mod
|
||||
SRCS= vinum.c vinum.h vnode_if.h parser.c config.c io.c util.c vinumhdr.h request.h \
|
||||
state.c memory.c request.c lock.c vinumext.h vinumio.h vinumkw.h \
|
||||
vinumstate.h vinumvar.h revive.c vinumioctl.c interrupt.c
|
||||
NOMAN=
|
||||
PSEUDO_LKM=
|
||||
CFLAGS = -I. -O -g -I/usr/include/machine -DDEBUG -Wall -Wno-unused -Wno-parentheses
|
||||
|
||||
CLEANFILES+= vinum.h vnode_if.h vnode_if.c
|
||||
|
||||
all:
|
||||
|
||||
# We don't need this, but the Makefile wants it
|
||||
vinum.h:
|
||||
touch $@
|
||||
|
||||
state.h: maketabs vinumstate.h
|
||||
./maketabs >state.h
|
||||
|
||||
maketabs: maketabs.c
|
||||
${CC} -g -o maketabs maketabs.c
|
||||
|
||||
.include <bsd.kmod.mk>
|
40
sys/dev/vinum/makestatetext
Executable file
40
sys/dev/vinum/makestatetext
Executable file
@ -0,0 +1,40 @@
|
||||
#!/bin/sh
|
||||
# Make statetexts.h from vinumstate.h
|
||||
# $Id: makestatetext,v 1.4 1998/03/13 05:36:16 grog Exp grog $
|
||||
infile=vinumstate.h
|
||||
ofile=statetexts.h
|
||||
cat <COPYRIGHT > $ofile
|
||||
|
||||
echo >>$ofile "/* Created by $0 on" `date`. "Do not edit */"
|
||||
echo >>$ofile
|
||||
echo >>$ofile "/* Drive state texts */"
|
||||
echo >>$ofile "char *drivestatetext [] =
|
||||
{ "
|
||||
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
FOO
|
159
sys/dev/vinum/request.h
Normal file
159
sys/dev/vinum/request.h
Normal file
@ -0,0 +1,159 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: request.h,v 1.10 1998/08/03 07:15:26 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* Information needed to set up a transfer */
|
||||
|
||||
/* struct buf is surprisingly big (about 300
|
||||
* bytes), and it's part of the request, so this
|
||||
* value is really important. Most requests
|
||||
* don't need more than 2 subrequests per
|
||||
* plex. The table is automatically extended if
|
||||
* this value is too small. */
|
||||
#define RQELTS 2 /* default of 2 requests per transfer */
|
||||
|
||||
enum xferinfo {
|
||||
XFR_NORMAL_READ = 1,
|
||||
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
|
||||
XFR_RECOVERY_READ = 4,
|
||||
XFR_DEGRADED_WRITE = 8,
|
||||
XFR_PARITYLESS_WRITE = 0x10,
|
||||
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
|
||||
XFR_DATA_BLOCK = 0x40, /* data block in request */
|
||||
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
|
||||
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
|
||||
XFR_MALLOCED = 0x200, /* this buffer is malloced */
|
||||
#if DEBUG
|
||||
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
|
||||
#endif
|
||||
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
|
||||
/* operations that need a parity block */
|
||||
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
|
||||
/* operations that use the group parameters */
|
||||
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
|
||||
/* operations that that use the data parameters */
|
||||
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
|
||||
/* operations requiring read before write */
|
||||
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
|
||||
/* operations that need a malloced buffer */
|
||||
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
|
||||
};
|
||||
|
||||
/* Describe one low-level request, part
|
||||
* of a high-level request. This is an
|
||||
* extended struct buf buffer, and the first
|
||||
* element *must* be a struct buf. We pass this structure
|
||||
* to the I/O routines instead of a struct buf in oder
|
||||
* to be able to locate the high-level request when it
|
||||
* completes.
|
||||
*
|
||||
* All offsets and lengths are in "blocks", i.e. sectors */
|
||||
struct rqelement {
|
||||
struct buf b; /* buf structure */
|
||||
struct rqgroup *rqg; /* pointer to our group */
|
||||
/* Information about the transfer */
|
||||
daddr_t sdoffset; /* offset in subdisk */
|
||||
int useroffset; /* offset in user buffer of normal data */
|
||||
/* dataoffset and datalen refer to "individual"
|
||||
* data transfers (normal read, parityless write)
|
||||
* and also degraded write.
|
||||
*
|
||||
* groupoffset and grouplen refer to the other
|
||||
* "group" operations (normal write, recovery read)
|
||||
* Both the offsets are relative to the start of the
|
||||
* local buffer */
|
||||
int dataoffset; /* offset in buffer of the normal data */
|
||||
int groupoffset; /* offset in buffer of group data */
|
||||
short datalen; /* length of normal data (sectors) */
|
||||
short grouplen; /* length of group data (sectors) */
|
||||
short buflen; /* total buffer length to allocate */
|
||||
short flags; /* really enum xferinfo (see above) */
|
||||
/* Ways to find other components */
|
||||
short sdno; /* subdisk number */
|
||||
short driveno; /* drive number */
|
||||
};
|
||||
|
||||
/* A group of requests built to satisfy a certain
|
||||
* component of a user request */
|
||||
struct rqgroup {
|
||||
struct rqgroup *next; /* pointer to next group */
|
||||
struct request *rq; /* pointer to the request */
|
||||
short count; /* number of requests in this group */
|
||||
short active; /* and number active */
|
||||
short plexno; /* index of plex */
|
||||
int badsdno; /* index of bad subdisk or -1 */
|
||||
enum xferinfo flags; /* description of transfer */
|
||||
struct rqelement rqe[0]; /* and the elements of this request */
|
||||
};
|
||||
|
||||
/* Describe one high-level request and the
|
||||
* work we have to do to satisfy it */
|
||||
struct request {
|
||||
struct buf *bp; /* pointer to the high-level request */
|
||||
int flags;
|
||||
union {
|
||||
int volno; /* volume index */
|
||||
int plexno; /* or plex index */
|
||||
} volplex;
|
||||
int error; /* current error indication */
|
||||
short isplex; /* set if this is a plex request */
|
||||
short active; /* number of subrequests still active */
|
||||
struct rqgroup *rqg; /* pointer to the first group of requests */
|
||||
struct rqgroup *lrqg; /* and to the first group of requests */
|
||||
struct request *next; /* link of waiting requests */
|
||||
};
|
||||
|
||||
/* Extended buffer header for subdisk I/O. Includes
|
||||
* a pointer to the user I/O request. */
|
||||
struct sdbuf {
|
||||
struct buf b; /* our buffer */
|
||||
struct buf *bp; /* and pointer to parent */
|
||||
short driveno; /* drive index */
|
||||
short sdno; /* and subdisk index */
|
||||
};
|
||||
|
||||
/* Values returned by rqe and friends.
|
||||
* Be careful with these: they are in order of increasing
|
||||
* seriousness. Some routines check for > REQUEST_RECOVERED
|
||||
* to indicate a completely failed request. */
|
||||
enum requeststatus {
|
||||
REQUEST_OK, /* request built OK */
|
||||
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
|
||||
REQUEST_EOF, /* request failed: outside plex */
|
||||
REQUEST_DOWN, /* request failed: subdisk down */
|
||||
REQUEST_ENOMEM /* ran out of memory */
|
||||
};
|
88
sys/dev/vinum/statetexts.h
Normal file
88
sys/dev/vinum/statetexts.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
|
||||
*/
|
||||
/* Created by ./makestatetext on Tue 4 Aug 15:53:16 CST 1998. Do not edit */
|
||||
|
||||
/* Drive state texts */
|
||||
char *drivestatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"down",
|
||||
"coming_up",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"init",
|
||||
"initializing",
|
||||
"empty",
|
||||
"obsolete",
|
||||
"stale",
|
||||
"crashed",
|
||||
"down",
|
||||
"reborn",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"init",
|
||||
"faulty",
|
||||
"down",
|
||||
"reviving",
|
||||
"initializing",
|
||||
"corrupt",
|
||||
"degraded",
|
||||
"flaky",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"down",
|
||||
"up",
|
||||
};
|
512
sys/dev/vinum/vinum.c
Normal file
512
sys/dev/vinum/vinum.c
Normal file
@ -0,0 +1,512 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinum.c,v 1.19 1998/08/13 05:24:02 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "sys/sysproto.h" /* for sync(2) */
|
||||
#ifdef DEBUG
|
||||
#include <sys/reboot.h>
|
||||
int debug = 0;
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
struct proc *myproc;
|
||||
|
||||
#if __FreeBSD__ < 3
|
||||
STATIC struct cdevsw vinum_cdevsw;
|
||||
STATIC struct bdevsw vinum_bdevsw =
|
||||
{
|
||||
vinumopen, vinumclose, vinumstrategy, vinumioctl,
|
||||
vinumdump, vinumsize, 0,
|
||||
"vinum", &vinum_cdevsw, -1
|
||||
};
|
||||
#else /* goodbye, bdevsw */
|
||||
STATIC struct cdevsw vinum_cdevsw =
|
||||
{
|
||||
vinumopen, vinumclose, vinumread, vinumwrite,
|
||||
vinumioctl, nostop, nullreset, nodevtotty,
|
||||
seltrue, nommap, vinumstrategy, "vinum",
|
||||
NULL, -1, vinumdump, vinumsize,
|
||||
D_DISK, 0, -1
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Called by main() during pseudo-device attachment. */
|
||||
STATIC void vinumattach(void *);
|
||||
|
||||
STATIC void vinumgetdisklabel(dev_t);
|
||||
void vinum_scandisk(void);
|
||||
int vinum_inactive(void);
|
||||
void free_vinum(int);
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
extern jmp_buf command_fail; /* return here if config fails */
|
||||
|
||||
struct _vinum_conf vinum_conf; /* configuration information */
|
||||
|
||||
STATIC int vinum_devsw_installed = 0;
|
||||
|
||||
/*
|
||||
* Called by main() during pseudo-device attachment. All we need
|
||||
* to do is allocate enough space for devices to be configured later, and
|
||||
* add devsw entries.
|
||||
*/
|
||||
void
|
||||
vinumattach(void *dummy)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
char *buf; /* pointer to temporary buffer */
|
||||
struct _ioctl_reply *ioctl_reply; /* struct to return */
|
||||
struct uio uio;
|
||||
struct iovec iovec;
|
||||
|
||||
/* modload should prevent multiple loads, so this is worth a panic */
|
||||
if ((vinum_conf.flags & VF_LOADED) != NULL)
|
||||
panic("vinum: already loaded");
|
||||
|
||||
printf("vinum: loaded\n");
|
||||
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
|
||||
|
||||
/* We don't have a p pointer here, so take it from curproc */
|
||||
myproc = curproc;
|
||||
#if __FreeBSD__ < 3
|
||||
bdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_bdevsw);
|
||||
#else
|
||||
cdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_cdevsw);
|
||||
#endif
|
||||
#ifdef DEVFS
|
||||
#error DEVFS not finished yet
|
||||
#endif
|
||||
|
||||
uio.uio_iov = &iovec;
|
||||
uio.uio_iovcnt = 1; /* just one buffer */
|
||||
uio.uio_offset = 0; /* start at the beginning */
|
||||
uio.uio_resid = 512; /* one sector */
|
||||
uio.uio_segflg = UIO_SYSSPACE; /* we're in system space */
|
||||
uio.uio_rw = UIO_READ; /* do we need this? */
|
||||
uio.uio_procp = curproc; /* do it for our own process */
|
||||
|
||||
iovec.iov_len = 512;
|
||||
buf = (char *) Malloc(iovec.iov_len); /* get a buffer */
|
||||
CHECKALLOC(buf, "vinum: no memory\n"); /* can't get 512 bytes? */
|
||||
iovec.iov_base = buf; /* read into buf */
|
||||
|
||||
/* allocate space: drives... */
|
||||
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
|
||||
CHECKALLOC(DRIVE, "vinum: no memory\n");
|
||||
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
|
||||
vinum_conf.drives_used = 0; /* and number in use */
|
||||
|
||||
/* volumes, ... */
|
||||
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
|
||||
CHECKALLOC(VOL, "vinum: no memory\n");
|
||||
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
|
||||
vinum_conf.volumes_used = 0; /* and number in use */
|
||||
|
||||
/* plexes, ... */
|
||||
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
|
||||
CHECKALLOC(PLEX, "vinum: no memory\n");
|
||||
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
|
||||
vinum_conf.plexes_used = 0; /* and number in use */
|
||||
|
||||
/* and subdisks */
|
||||
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
|
||||
CHECKALLOC(SD, "vinum: no memory\n");
|
||||
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
|
||||
vinum_conf.subdisks_used = 0; /* and number in use */
|
||||
|
||||
ioctl_reply = NULL; /* no reply on longjmp */
|
||||
}
|
||||
|
||||
|
||||
#ifdef ACTUALLY_LKM_NOT_KERNEL /* stuff for LKMs */
|
||||
|
||||
/* Check if we have anything open. If so, return 0 (not inactive),
|
||||
* otherwise 1 (inactive) */
|
||||
int
|
||||
vinum_inactive(void)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
int can_do = 1; /* assume we can do it */
|
||||
|
||||
lock_config();
|
||||
for (i = 0; i < vinum_conf.volumes_used; i++) {
|
||||
if (VOL[i].pid != NULL) { /* volume is open */
|
||||
can_do = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unlock_config();
|
||||
return can_do;
|
||||
}
|
||||
|
||||
/* Free all structures.
|
||||
* If cleardrive is 0, save the configuration; otherwise
|
||||
* remove the configuration from the drive.
|
||||
*
|
||||
* Before coming here, ensure that no volumes are open.
|
||||
*/
|
||||
void
|
||||
free_vinum(int cleardrive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
|
||||
if (cleardrive) {
|
||||
for (i = 0; i < vinum_conf.drives_used; i++)
|
||||
remove_drive(i); /* remove the drive */
|
||||
} else { /* keep the config */
|
||||
save_config();
|
||||
if (DRIVE != NULL) {
|
||||
for (i = 0; i < vinum_conf.drives_used; i++)
|
||||
free_drive(&DRIVE[i]); /* close files and things */
|
||||
Free(DRIVE);
|
||||
}
|
||||
}
|
||||
if (SD != NULL)
|
||||
Free(SD);
|
||||
if (PLEX != NULL) {
|
||||
for (i = 0; i < vinum_conf.plexes_used; i++) {
|
||||
struct plex *plex = &vinum_conf.plex[i];
|
||||
|
||||
if (plex->state != plex_unallocated) { /* we have real data there */
|
||||
if (plex->sdnos)
|
||||
Free(plex->sdnos);
|
||||
if (plex->unmapped_regions)
|
||||
Free(plex->unmapped_region);
|
||||
if (plex->defective_regions)
|
||||
Free(plex->defective_region);
|
||||
}
|
||||
}
|
||||
Free(PLEX);
|
||||
}
|
||||
if (VOL != NULL)
|
||||
Free(VOL);
|
||||
bzero(&vinum_conf, sizeof(vinum_conf));
|
||||
}
|
||||
|
||||
MOD_MISC(vinum);
|
||||
|
||||
/*
|
||||
* Function called when loading the driver.
|
||||
*/
|
||||
STATIC int
|
||||
vinum_load(struct lkm_table *lkmtp, int cmd)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
/* Debugger ("vinum_load"); */
|
||||
vinumattach(NULL);
|
||||
return 0; /* OK */
|
||||
}
|
||||
|
||||
/*
|
||||
* Function called when unloading the driver.
|
||||
*/
|
||||
STATIC int
|
||||
vinum_unload(struct lkm_table *lkmtp, int cmd)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
if (vinum_inactive()) { /* is anything open? */
|
||||
struct sync_args dummyarg =
|
||||
{0};
|
||||
#if __FreeBSD__ < 3
|
||||
int retval;
|
||||
#endif
|
||||
|
||||
printf("vinum: unloaded\n");
|
||||
#if __FreeBSD__ < 3
|
||||
sync(curproc, &dummyarg, &retval); /* write out buffers */
|
||||
#else
|
||||
sync(curproc, &dummyarg); /* write out buffers */
|
||||
#endif
|
||||
free_vinum(0); /* no: clean up */
|
||||
#if __FreeBSD__ < 3
|
||||
bdevsw[BDEV_MAJOR] = NULL; /* clear bdevsw */
|
||||
#endif
|
||||
cdevsw[CDEV_MAJOR] = NULL; /* and cdevsw */
|
||||
return 0;
|
||||
} else
|
||||
return EBUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatcher function for the module (load/unload/stat).
|
||||
*/
|
||||
int
|
||||
vinum_mod(struct lkm_table *lkmtp, int cmd, int ver)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
MOD_DISPATCH(vinum, /* module name */
|
||||
lkmtp, /* LKM table */
|
||||
cmd, /* command */
|
||||
ver,
|
||||
vinum_load, /* load with this function */
|
||||
vinum_unload, /* and unload with this */
|
||||
lkm_nullcmd);
|
||||
}
|
||||
|
||||
#else /* not LKM */
|
||||
#error "This driver must be compiled as a loadable kernel module"
|
||||
#endif /* LKM */
|
||||
|
||||
/* ARGSUSED */
|
||||
/* Open a vinum object
|
||||
* At the moment, we only open volumes and the
|
||||
* super device. It's a nice concept to be
|
||||
* able to open drives, subdisks and plexes, but
|
||||
* I can't think what good it could be */
|
||||
int
|
||||
vinumopen(dev_t dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int s; /* spl */
|
||||
int error;
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
struct sd *sd;
|
||||
struct devcode *device;
|
||||
|
||||
device = (struct devcode *) &dev;
|
||||
|
||||
error = 0;
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
index = VOLNO(dev);
|
||||
if (index >= vinum_conf.volumes_used)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
s = splhigh(); /* quick lock */
|
||||
if (error)
|
||||
return error;
|
||||
if (vol->opencount == 0)
|
||||
vol->openflags = flags; /* set our flags */
|
||||
vol->opencount++;
|
||||
vol->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
if (VOLNO(dev) >= vinum_conf.volumes_used)
|
||||
return ENXIO;
|
||||
index = PLEXNO(dev); /* get plex index in vinum_conf */
|
||||
if (index >= vinum_conf.plexes_used)
|
||||
return ENXIO; /* no such device */
|
||||
plex = &PLEX[index];
|
||||
|
||||
switch (plex->state) {
|
||||
case plex_unallocated:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
s = splhigh();
|
||||
if (plex->pid /* it's open already */
|
||||
&& (plex->pid != p->p_pid)) { /* and not by us, */
|
||||
splx(s);
|
||||
return EBUSY; /* one at a time, please */
|
||||
}
|
||||
plex->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
|
||||
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
|
||||
return ENXIO; /* no such device */
|
||||
index = SDNO(dev); /* get the subdisk number */
|
||||
if (index >= vinum_conf.subdisks_used)
|
||||
return ENXIO; /* no such device */
|
||||
sd = &SD[index];
|
||||
|
||||
/* Opening a subdisk is always a special operation, so we
|
||||
* ignore the state as long as it represents a real subdisk */
|
||||
switch (sd->state) {
|
||||
case sd_unallocated:
|
||||
case sd_uninit:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
s = splhigh();
|
||||
if (sd->pid /* it's open already */
|
||||
&& (sd->pid != p->p_pid)) { /* and not by us, */
|
||||
splx(s);
|
||||
return EBUSY; /* one at a time, please */
|
||||
}
|
||||
sd->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
return ENODEV; /* don't know what to do with these */
|
||||
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
if (p->p_ucred->cr_uid == 0) { /* root calling, */
|
||||
vinum_conf.opencount++; /* one more opener */
|
||||
return 0; /* no worries opening super dev */
|
||||
} else
|
||||
return EPERM; /* you can't do that! */
|
||||
}
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
int
|
||||
vinumclose(dev_t dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
struct sd *sd;
|
||||
struct devcode *device = (struct devcode *) &dev;
|
||||
|
||||
index = VOLNO(dev);
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
if (index >= vinum_conf.volumes_used)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
vol->opencount = 0; /* reset our flags */
|
||||
vol->pid = NULL; /* and forget who owned us */
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
if (VOLNO(dev) >= vinum_conf.volumes_used)
|
||||
return ENXIO;
|
||||
index = PLEXNO(dev); /* get plex index in vinum_conf */
|
||||
if (index >= vinum_conf.plexes_used)
|
||||
return ENXIO; /* no such device */
|
||||
plex = &PLEX[index];
|
||||
plex->pid = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
|
||||
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
|
||||
return ENXIO; /* no such device */
|
||||
index = SDNO(dev); /* get the subdisk number */
|
||||
if (index >= vinum_conf.subdisks_used)
|
||||
return ENXIO; /* no such device */
|
||||
sd = &SD[index];
|
||||
sd->pid = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
if (p->p_ucred->cr_uid == 0) /* root calling, */
|
||||
vinum_conf.opencount--; /* one less opener */
|
||||
return 0; /* no worries closing super dev */
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
return ENODEV; /* don't know what to do with these */
|
||||
}
|
||||
}
|
||||
|
||||
/* size routine */
|
||||
int
|
||||
vinumsize(dev_t dev)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct volume *vol;
|
||||
int size;
|
||||
|
||||
/* XXX This is bogus. We don't need to open
|
||||
* a device to find its size */
|
||||
vol = &VOL[VOLNO(dev)];
|
||||
|
||||
if (vol->state == volume_up)
|
||||
size = vol->size;
|
||||
else
|
||||
return 0; /* err on the size of conservatism */
|
||||
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int
|
||||
vinumdump(dev_t dev)
|
||||
{
|
||||
/* Not implemented. */
|
||||
return ENXIO;
|
||||
}
|
1712
sys/dev/vinum/vinumconfig.c
Normal file
1712
sys/dev/vinum/vinumconfig.c
Normal file
File diff suppressed because it is too large
Load Diff
214
sys/dev/vinum/vinumext.h
Normal file
214
sys/dev/vinum/vinumext.h
Normal file
@ -0,0 +1,214 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumext.h,v 1.14 1998/08/11 00:03:57 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* vinumext.h: external definitions */
|
||||
|
||||
extern struct _vinum_conf vinum_conf; /* configuration information */
|
||||
|
||||
#ifdef DEBUG
|
||||
extern debug; /* debug flags */
|
||||
#endif
|
||||
|
||||
#define CHECKALLOC(ptr, msg) \
|
||||
if (ptr == NULL) \
|
||||
{ \
|
||||
printf (msg); \
|
||||
longjmp (command_fail, -1); \
|
||||
}
|
||||
#ifndef KERNEL
|
||||
struct vnode;
|
||||
struct proc;
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL
|
||||
int give_sd_to_plex(int plexno, int sdno);
|
||||
int give_plex_to_volume(int volno, int plexno);
|
||||
int check_drive(char *);
|
||||
enum drive_label_info read_drive_label(struct drive *drive);
|
||||
int parse_config(char *, struct keywordset *);
|
||||
int parse_user_config(char *cptr, struct keywordset *keyset);
|
||||
u_int64_t sizespec(char *spec);
|
||||
int volume_index(struct volume *volume);
|
||||
int plex_index(struct plex *plex);
|
||||
int sd_index(struct sd *sd);
|
||||
int drive_index(struct drive *drive);
|
||||
int my_plex(int volno, int plexno);
|
||||
int my_sd(int plexno, int sdno);
|
||||
int get_empty_drive(void);
|
||||
int find_drive(const char *name, int create);
|
||||
int find_drive_by_dev(const char *devname, int create);
|
||||
int get_empty_sd(void);
|
||||
int find_subdisk(const char *name, int create);
|
||||
void free_sd(int sdno);
|
||||
void free_volume(int volno);
|
||||
int get_empty_plex(void);
|
||||
int find_plex(const char *name, int create);
|
||||
void free_plex(int plexno);
|
||||
int get_empty_volume(void);
|
||||
int find_volume(const char *name, int create);
|
||||
void config_subdisk(void);
|
||||
void config_plex(void);
|
||||
void config_volume(void);
|
||||
void config_drive(void);
|
||||
void updateconfig(int);
|
||||
void update_sd_config(int sdno, int kernelstate);
|
||||
void update_plex_config(int plexno, int kernelstate);
|
||||
void update_volume_config(int volno, int kernelstate);
|
||||
void update_config(void);
|
||||
void drive_io_done(struct buf *);
|
||||
int save_config(void);
|
||||
void write_config(char *, int);
|
||||
int start_config(void);
|
||||
void finish_config(int);
|
||||
void remove(struct vinum_ioctl_msg *msg);
|
||||
void remove_drive_entry(int driveno, int force, int recurse);
|
||||
void remove_sd_entry(int sdno, int force, int recurse);
|
||||
void remove_plex_entry(int plexno, int force, int recurse);
|
||||
void remove_volume_entry(int volno, int force, int recurse);
|
||||
|
||||
void checkernel(char *);
|
||||
int open_drive(struct drive *, struct proc *);
|
||||
void close_drive(struct drive *drive);
|
||||
int driveio(struct drive *, void *, size_t, off_t, int);
|
||||
/* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
|
||||
#define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) */
|
||||
int set_drive_parms(struct drive *drive);
|
||||
int init_drive(struct drive *);
|
||||
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
|
||||
void throw_rude_remark(int, char *,...);
|
||||
|
||||
int read_drive(struct drive *drive, void *buf, size_t length, off_t offset);
|
||||
int write_drive(struct drive *drive, void *buf, size_t length, off_t offset);
|
||||
void format_config(char *config, int len);
|
||||
void checkkernel(char *op);
|
||||
void free_drive(struct drive *drive);
|
||||
void down_drive(struct drive *drive);
|
||||
void remove_drive(int driveno);
|
||||
|
||||
/* I/O */
|
||||
d_open_t vinumopen;
|
||||
d_close_t vinumclose;
|
||||
d_strategy_t vinumstrategy;
|
||||
d_ioctl_t vinumioctl;
|
||||
d_dump_t vinumdump;
|
||||
d_psize_t vinumsize;
|
||||
d_read_t vinumread;
|
||||
d_write_t vinumwrite;
|
||||
|
||||
int vinumstart(struct buf *bp, int reviveok);
|
||||
int launch_requests(struct request *rq, int reviveok);
|
||||
|
||||
/* XXX Do we need this? */
|
||||
int vinumpart(dev_t);
|
||||
|
||||
/* Memory allocation */
|
||||
void vinum_meminfo(caddr_t data);
|
||||
int vinum_mallocinfo(caddr_t data);
|
||||
|
||||
void expand_table(void **, int, int);
|
||||
|
||||
void add_defective_region(struct plex *plex, off_t offset, size_t length);
|
||||
void add_unmapped_region(struct plex *plex, off_t offset, size_t length);
|
||||
void rebuild_plex_unmappedlist(struct plex *plex);
|
||||
struct request;
|
||||
struct rqgroup *allocrqg(struct request *rq, int elements);
|
||||
void deallocrqg(struct rqgroup *rqg);
|
||||
|
||||
/* State transitions */
|
||||
int set_drive_state(int driveno, enum drivestate state, int force);
|
||||
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
|
||||
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
|
||||
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
|
||||
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
|
||||
void get_volume_label(struct volume *vol, struct disklabel *lp);
|
||||
int write_volume_label(int);
|
||||
void start_object(struct vinum_ioctl_msg *);
|
||||
void stop_object(struct vinum_ioctl_msg *);
|
||||
void setstate(struct vinum_ioctl_msg *msg);
|
||||
void vinum_label(int);
|
||||
int vinum_writedisklabel(struct volume *, struct disklabel *);
|
||||
int initsd(int);
|
||||
|
||||
int restart_plex(int plexno);
|
||||
int revive_block(int plexno);
|
||||
|
||||
/* Auxiliary functions */
|
||||
enum sdstates sdstatemap(struct plex *plex, int *sddowncount);
|
||||
enum volplexstate vpstate(struct plex *plex);
|
||||
#endif
|
||||
|
||||
enum keyword get_keyword(char *, struct keywordset *);
|
||||
void listconfig(void);
|
||||
char *drive_state(enum drivestate);
|
||||
char *volume_state(enum volumestate);
|
||||
char *plex_state(enum plexstate);
|
||||
char *plex_org(enum plexorg);
|
||||
char *sd_state(enum sdstate);
|
||||
enum drivestate DriveState(char *text);
|
||||
enum sdstate SdState(char *text);
|
||||
enum plexstate PlexState(char *text);
|
||||
enum volumestate VolState(char *text);
|
||||
struct drive *validdrive(int driveno, struct _ioctl_reply *);
|
||||
struct sd *validsd(int sdno, struct _ioctl_reply *);
|
||||
struct plex *validplex(int plexno, struct _ioctl_reply *);
|
||||
struct volume *validvol(int volno, struct _ioctl_reply *);
|
||||
int tokenize(char *, char *[]);
|
||||
void resetstats(struct vinum_ioctl_msg *msg);
|
||||
|
||||
/* Locking */
|
||||
int lockvol(struct volume *vol);
|
||||
void unlockvol(struct volume *vol);
|
||||
int lockplex(struct plex *plex);
|
||||
void unlockplex(struct plex *plex);
|
||||
int lockrange(struct plex *plex, off_t first, off_t last);
|
||||
void unlockrange(struct plex *plex, off_t first, off_t last);
|
||||
int lock_config(void);
|
||||
void unlock_config(void);
|
||||
|
||||
#ifdef DEBUG
|
||||
#define expandrq(prq) \
|
||||
{ \
|
||||
expand_table ((void **) &prq->rqe, \
|
||||
prq->requests * sizeof (struct rqelement), \
|
||||
(prq->requests + RQELTS) * sizeof (struct rqelement) ); \
|
||||
bzero (&prq->rqe [prq->requests], RQELTS * sizeof (struct rqelement)); \
|
||||
prq->rqcount += RQELTS; \
|
||||
}
|
||||
#else
|
||||
void expandrq(struct plexrq *);
|
||||
#endif
|
104
sys/dev/vinum/vinumhdr.h
Normal file
104
sys/dev/vinum/vinumhdr.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Header files used by all modules */
|
||||
/* $Id: vinumhdr.h,v 1.7 1998/08/07 04:41:18 grog Exp grog $ */
|
||||
|
||||
#ifdef KERNEL
|
||||
#define REALLYKERNEL
|
||||
#endif
|
||||
#include <sys/param.h>
|
||||
#ifdef REALLYKERNEL
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#endif
|
||||
#ifdef DEVFS
|
||||
#error "DEVFS code not complete yet"
|
||||
#include <sys/devfsext.h>
|
||||
#endif /*DEVFS */
|
||||
#include <sys/proc.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/dkstat.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <ufs/ffs/fs.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/device.h>
|
||||
#undef KERNEL /* XXX */
|
||||
#include <sys/disk.h>
|
||||
#ifdef REALLYKERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/dkbad.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdarg.h>
|
||||
#include <vm/vm.h>
|
||||
#ifdef USES_VM
|
||||
/* XXX Do we need this? */
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_prot.h>
|
||||
/* #include <vm/vm_page.h> */
|
||||
#include <sys/vmmeter.h>
|
||||
/* #include <machine/pmap.h> */
|
||||
#include <machine/cputypes.h>
|
||||
#endif /* USES_VM */
|
||||
#include <vinumvar.h>
|
||||
#include <vinumio.h>
|
||||
#include "vinumkw.h"
|
||||
#include "vinumext.h"
|
||||
|
||||
#undef Free /* defined in some funny net stuff */
|
||||
#ifdef REALLYKERNEL
|
||||
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
caddr_t MMalloc (int size, char *, int);
|
||||
void FFree (void *mem, char *, int);
|
||||
#else
|
||||
#define Malloc(x) malloc ((x)) /* just the size */
|
||||
#define Free(x) free ((x)) /* just the address */
|
||||
#endif
|
||||
|
190
sys/dev/vinum/vinuminterrupt.c
Normal file
190
sys/dev/vinum/vinuminterrupt.c
Normal file
@ -0,0 +1,190 @@
|
||||
/* interrupt.c: bottom half of the driver */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: interrupt.c,v 1.1 1998/08/13 06:12:27 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
void freerq(struct request *rq);
|
||||
void free_rqg(struct rqgroup *rqg);
|
||||
void complete_rqe(struct buf *bp);
|
||||
void sdio_done(struct buf *bp);
|
||||
|
||||
/* Take a completed buffer, transfer the data back if
|
||||
* it's a read, and complete the high-level request
|
||||
* if this is the last subrequest.
|
||||
*
|
||||
* The bp parameter is in fact a struct rqelement, which
|
||||
* includes a couple of extras at the end.
|
||||
*/
|
||||
void
|
||||
complete_rqe(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct rqelement *rqe;
|
||||
struct request *rq;
|
||||
struct rqgroup *rqg;
|
||||
struct buf *ubp; /* user buffer */
|
||||
|
||||
rqe = (struct rqelement *) bp; /* point to the element element that completed */
|
||||
rqg = rqe->rqg; /* and the request group */
|
||||
rq = rqg->rq; /* and the complete request */
|
||||
|
||||
if ((bp->b_flags & B_ERROR) != 0) { /* transfer in error */
|
||||
if (bp->b_error != 0) /* did it return a number? */
|
||||
rq->error = bp->b_error; /* yes, put it in. */
|
||||
else if (rq->error == 0) /* no: do we have one already? */
|
||||
rq->error = EIO; /* no: catchall "I/O error" */
|
||||
if (rq->error == EIO) /* I/O error, */
|
||||
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* take the subdisk down */
|
||||
}
|
||||
/* Now update the statistics */
|
||||
if (bp->b_flags & B_READ) { /* read operation */
|
||||
DRIVE[rqe->driveno].reads++;
|
||||
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
|
||||
SD[rqe->sdno].reads++;
|
||||
SD[rqe->sdno].bytes_read += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].reads++;
|
||||
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
|
||||
} else { /* write operation */
|
||||
DRIVE[rqe->driveno].writes++;
|
||||
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
|
||||
SD[rqe->sdno].writes++;
|
||||
SD[rqe->sdno].bytes_written += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].writes++;
|
||||
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
|
||||
}
|
||||
ubp = rq->bp; /* user buffer */
|
||||
rqg->active--; /* one less request active */
|
||||
if (rqg->active == 0) /* request group finished, */
|
||||
rq->active--; /* one less */
|
||||
if (rq->active == 0) { /* request finished, */
|
||||
#if DEBUG
|
||||
if (debug & 4) {
|
||||
if (ubp->b_resid != 0) /* still something to transfer? */
|
||||
Debugger("resid");
|
||||
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ubp->b_bcount; i += 512) /* XXX debug */
|
||||
if (((char *) ubp->b_data)[i] != '<') { /* and not what we expected */
|
||||
printf("At 0x%x (offset 0x%x): '%c' (0x%x)\n",
|
||||
(int) (&((char *) ubp->b_data)[i]),
|
||||
i,
|
||||
((char *) ubp->b_data)[i],
|
||||
((char *) ubp->b_data)[i]);
|
||||
Debugger("complete_request checksum");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (rq->error) { /* did we have an error? */
|
||||
ubp->b_flags |= B_ERROR; /* yes, propagate to user */
|
||||
ubp->b_error = rq->error;
|
||||
} else
|
||||
ubp->b_resid = 0; /* completed our transfer */
|
||||
if (rq->isplex == 0) /* volume request, */
|
||||
VOL[rq->volplex.volno].active--; /* another request finished */
|
||||
biodone(ubp); /* top level buffer completed */
|
||||
freerq(rq); /* return the request storage */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Free a request block and anything hanging off it */
|
||||
void
|
||||
freerq(struct request *rq)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct rqgroup *rqg;
|
||||
struct rqgroup *nrqg; /* next in chain */
|
||||
int rqno;
|
||||
|
||||
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
|
||||
for (rqno = 0; rqno < rqg->count; rqno++)
|
||||
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
|
||||
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
|
||||
Free(rqg->rqe[rqno].b.b_data); /* free it */
|
||||
nrqg = rqg->next; /* note the next one */
|
||||
Free(rqg); /* and free this one */
|
||||
}
|
||||
Free(rq); /* free the request itself */
|
||||
}
|
||||
|
||||
void
|
||||
free_rqg(struct rqgroup *rqg)
|
||||
{
|
||||
if ((rqg->flags & XFR_GROUPOP) /* RAID 5 request */
|
||||
&&(rqg->rqe) /* got a buffer structure */
|
||||
&&(rqg->rqe->b.b_data)) /* and it has a buffer allocated */
|
||||
Free(rqg->rqe->b.b_data); /* free it */
|
||||
}
|
||||
|
||||
/* I/O on subdisk completed */
|
||||
void
|
||||
sdio_done(struct buf *bp)
|
||||
{
|
||||
struct sdbuf *sbp;
|
||||
|
||||
sbp = (struct sdbuf *) bp;
|
||||
if (sbp->b.b_flags & B_ERROR) { /* had an error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = sbp->b.b_error;
|
||||
}
|
||||
bp->b_resid = sbp->b.b_resid;
|
||||
biodone(sbp->bp); /* complete the caller's I/O */
|
||||
/* Now update the statistics */
|
||||
if (bp->b_flags & B_READ) { /* read operation */
|
||||
DRIVE[sbp->driveno].reads++;
|
||||
DRIVE[sbp->driveno].bytes_read += bp->b_bcount;
|
||||
SD[sbp->sdno].reads++;
|
||||
SD[sbp->sdno].bytes_read += bp->b_bcount;
|
||||
} else { /* write operation */
|
||||
DRIVE[sbp->driveno].writes++;
|
||||
DRIVE[sbp->driveno].bytes_written += bp->b_bcount;
|
||||
SD[sbp->sdno].writes++;
|
||||
SD[sbp->sdno].bytes_written += bp->b_bcount;
|
||||
}
|
||||
Free(sbp);
|
||||
}
|
886
sys/dev/vinum/vinumio.c
Normal file
886
sys/dev/vinum/vinumio.c
Normal file
@ -0,0 +1,886 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: io.c,v 1.16 1998/08/10 23:47:21 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#if __FreeBSD__ < 3 /* this is in sys/disklabel.h in 3.0 and on */
|
||||
#define DTYPE_VINUM 12 /* vinum volume */
|
||||
#endif
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
struct _ioctl_reply *ioctl_reply; /* data pointer, for returning error messages */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
extern struct proc *myproc;
|
||||
|
||||
/* Open the device associated with the drive, and set drive's vp */
|
||||
int
|
||||
open_drive(struct drive *drive, struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct nameidata nd;
|
||||
struct vattr va;
|
||||
int error;
|
||||
|
||||
if (drive->devicename[0] == '\0') /* no device name */
|
||||
sprintf(drive->devicename, "/dev/%s", drive->label.name); /* get it from the drive name */
|
||||
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, drive->devicename, p);
|
||||
error = vn_open(&nd, FREAD | FWRITE, 0); /* open the device */
|
||||
if (error != 0) { /* can't open? */
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = error;
|
||||
printf("vinum open_drive %s: failed with error %d\n", drive->devicename, error); /* XXX */
|
||||
return error;
|
||||
}
|
||||
drive->vp = nd.ni_vp;
|
||||
drive->p = p;
|
||||
|
||||
if (drive->vp->v_usecount > 1) { /* already in use? */
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = EBUSY;
|
||||
printf("vinum open_drive %s: Drive in use\n", drive->devicename); /* XXX */
|
||||
return EBUSY;
|
||||
}
|
||||
error = VOP_GETATTR(drive->vp, &va, NOCRED, p);
|
||||
if (error) {
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = error;
|
||||
printf("vinum open_drive %s: GETAATTR returns error %d\n", drive->devicename, error); /* XXX */
|
||||
return error;
|
||||
}
|
||||
drive->dev = va.va_rdev; /* device */
|
||||
|
||||
if (va.va_type != VBLK) { /* only consider block devices */
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1); /* this also closes the drive */
|
||||
drive->lasterror = ENOTBLK;
|
||||
printf("vinum open_drive %s: Not a block device\n", drive->devicename); /* XXX */
|
||||
return ENOTBLK;
|
||||
}
|
||||
drive->vp->v_numoutput = 0;
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set some variables in the drive struct
|
||||
* in more convenient form. Return error indication */
|
||||
int
|
||||
set_drive_parms(struct drive *drive)
|
||||
{
|
||||
drive->blocksize = BLKDEV_IOSIZE; /* XXX do we need this? */
|
||||
drive->secsperblock = drive->blocksize /* number of sectors per block */
|
||||
/ drive->partinfo.disklab->d_secsize;
|
||||
|
||||
/* Now update the label part */
|
||||
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
|
||||
#if __FreeBSD__ >= 3
|
||||
getmicrotime(&drive->label.date_of_birth); /* and current time */
|
||||
#else
|
||||
drive->label.date_of_birth = time; /* and current time */
|
||||
#endif
|
||||
drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
|
||||
*((u_int64_t) drive->partinfo.disklab->d_secsize);
|
||||
|
||||
/* number of sectors available for subdisks */
|
||||
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
|
||||
|
||||
/* XXX Bug in 3.0 as of January 1998: you can open
|
||||
* non-existent slices. They have a length of 0 */
|
||||
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
printf("vinum open_drive %s: Drive too small\n", drive->devicename); /* XXX */
|
||||
drive->lasterror = ENOSPC;
|
||||
return ENOSPC;
|
||||
}
|
||||
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
|
||||
drive->freelist = (struct drive_freelist *)
|
||||
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
|
||||
if (drive->freelist == NULL) /* can't malloc, dammit */
|
||||
return ENOSPC;
|
||||
drive->freelist_entries = 1; /* just (almost) the complete drive */
|
||||
drive->freelist[0].offset = DATASTART; /* starts here */
|
||||
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
|
||||
set_drive_state(drive->driveno, drive_up, 1); /* our drive is accessible */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initialize a drive: open the device and add device
|
||||
* information */
|
||||
int
|
||||
init_drive(struct drive *drive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
|
||||
if (drive->devicename[0] == '\0') { /* no device name yet, default to drive name */
|
||||
drive->lasterror = EINVAL;
|
||||
printf("vinum: Can't open drive without drive name\n"); /* XXX */
|
||||
return EINVAL;
|
||||
}
|
||||
error = open_drive(drive, myproc); /* open the drive */
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = VOP_IOCTL(drive->vp, /* get the partition information */
|
||||
DIOCGPART,
|
||||
(caddr_t) & drive->partinfo,
|
||||
FREAD,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error) {
|
||||
printf("vinum open_drive %s: Can't get partition information, error %d\n",
|
||||
drive->devicename,
|
||||
error); /* XXX */
|
||||
close_drive(drive);
|
||||
drive->lasterror = error;
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return error;
|
||||
}
|
||||
if (drive->partinfo.part->p_fstype != 0) { /* not plain */
|
||||
drive->lasterror = EFTYPE;
|
||||
printf("vinum open_drive %s: Wrong partition type for vinum\n", drive->devicename); /* XXX */
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return EFTYPE;
|
||||
}
|
||||
return set_drive_parms(drive); /* set various odds and ends */
|
||||
}
|
||||
|
||||
/* Close a drive if it's open. No errors */
|
||||
void
|
||||
close_drive(struct drive *drive)
|
||||
{
|
||||
if (drive->vp) {
|
||||
vn_close(drive->vp, FREAD | FWRITE, NOCRED, drive->p);
|
||||
drive->vp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove drive from the configuration.
|
||||
* Caller must ensure that it isn't active
|
||||
*/
|
||||
void
|
||||
remove_drive(int driveno)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct drive *drive = &vinum_conf.drive[driveno];
|
||||
long long int nomagic = VINUM_NOMAGIC; /* no magic number */
|
||||
|
||||
write_drive(drive, /* obliterate the magic, but leave a hint */
|
||||
(char *) &nomagic,
|
||||
8,
|
||||
VINUM_LABEL_OFFSET);
|
||||
close_drive(drive); /* and close it */
|
||||
drive->state = drive_unallocated; /* and forget everything we knew about it */
|
||||
save_config(); /* and save the updated configuration */
|
||||
}
|
||||
|
||||
/* Transfer drive data. Usually called from one of these defines;
|
||||
|
||||
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
|
||||
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
|
||||
*
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
driveio(struct drive *drive, void *buf, size_t length, off_t offset, int flag)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
int spl;
|
||||
|
||||
error = 0;
|
||||
|
||||
/* Get a buffer */
|
||||
bp = (struct buf *) Malloc(sizeof(struct buf)); /* get a buffer */
|
||||
CHECKALLOC(bp, "Can't allocate memory");
|
||||
|
||||
bzero(&buf, sizeof(buf));
|
||||
bp->b_flags = B_BUSY | flag; /* tell us when it's done */
|
||||
bp->b_iodone = drive_io_done; /* here */
|
||||
bp->b_proc = myproc; /* process */
|
||||
bp->b_dev = drive->vp->v_un.vu_specinfo->si_rdev; /* device */
|
||||
if (offset & (drive->partinfo.disklab->d_secsize - 1)) /* not on a block boundary */
|
||||
bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
|
||||
bp->b_data = buf;
|
||||
bp->b_vp = drive->vp; /* vnode */
|
||||
bp->b_bcount = length;
|
||||
bp->b_bufsize = length;
|
||||
|
||||
(*bdevsw[major(bp->b_dev)]->d_strategy) (bp); /* initiate the transfer */
|
||||
|
||||
spl = splbio();
|
||||
while ((bp->b_flags & B_DONE) == 0) {
|
||||
bp->b_flags |= B_CALL; /* wake me again */
|
||||
tsleep((caddr_t) bp, PRIBIO, "driveio", 0); /* and wait for it to complete */
|
||||
}
|
||||
splx(spl);
|
||||
if (bp->b_flags & B_ERROR) /* didn't work */
|
||||
error = bp->b_error; /* get the error return */
|
||||
Free(bp); /* then return the buffer */
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Read data from a drive
|
||||
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
read_drive(struct drive *drive, void *buf, size_t length, off_t offset)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
daddr_t nextbn;
|
||||
long bscale;
|
||||
|
||||
struct uio uio;
|
||||
struct iovec iov;
|
||||
daddr_t blocknum; /* block number */
|
||||
int blockoff; /* offset in block */
|
||||
int count; /* amount to transfer */
|
||||
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = length;
|
||||
|
||||
uio.uio_iov = &iov;
|
||||
uio.uio_iovcnt = length;
|
||||
uio.uio_offset = offset;
|
||||
uio.uio_resid = length;
|
||||
uio.uio_segflg = UIO_SYSSPACE;
|
||||
uio.uio_rw = UIO_READ;
|
||||
uio.uio_procp = myproc;
|
||||
|
||||
bscale = btodb(drive->blocksize); /* mask off offset from block number */
|
||||
do {
|
||||
blocknum = btodb(uio.uio_offset) & ~(bscale - 1); /* get the block number */
|
||||
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
|
||||
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
|
||||
uio.uio_resid);
|
||||
|
||||
/* XXX Check this. I think the test is wrong */
|
||||
if (drive->vp->v_lastr + bscale == blocknum) { /* did our last read finish in this block? */
|
||||
nextbn = blocknum + bscale; /* note the end of the transfer */
|
||||
error = breadn(drive->vp, /* and read with read-ahead */
|
||||
blocknum,
|
||||
(int) drive->blocksize,
|
||||
&nextbn,
|
||||
(int *) &drive->blocksize,
|
||||
1,
|
||||
NOCRED,
|
||||
&bp);
|
||||
} else /* random read: just read this block */
|
||||
error = bread(drive->vp, blocknum, (int) drive->blocksize, NOCRED, &bp);
|
||||
drive->vp->v_lastr = blocknum; /* note the last block we read */
|
||||
count = min(count, drive->blocksize - bp->b_resid);
|
||||
if (error) {
|
||||
brelse(bp);
|
||||
return error;
|
||||
}
|
||||
error = uiomove((char *) bp->b_data + blockoff, count, &uio); /* move the data */
|
||||
brelse(bp);
|
||||
}
|
||||
while (error == 0 && uio.uio_resid > 0 && count != 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Write data to a drive
|
||||
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
write_drive(struct drive *drive, void *buf, size_t length, off_t offset)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
struct uio uio;
|
||||
struct iovec iov;
|
||||
daddr_t blocknum; /* block number */
|
||||
int blockoff; /* offset in block */
|
||||
int count; /* amount to transfer */
|
||||
int blockshift;
|
||||
|
||||
if (drive->state == drive_down) /* currently down */
|
||||
return 0; /* ignore */
|
||||
if (drive->vp == NULL) {
|
||||
drive->lasterror = ENODEV;
|
||||
return ENODEV; /* not configured yet */
|
||||
}
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = length;
|
||||
|
||||
uio.uio_iov = &iov;
|
||||
uio.uio_iovcnt = length;
|
||||
uio.uio_offset = offset;
|
||||
uio.uio_resid = length;
|
||||
uio.uio_segflg = UIO_SYSSPACE;
|
||||
uio.uio_rw = UIO_WRITE;
|
||||
uio.uio_procp = myproc;
|
||||
|
||||
error = 0;
|
||||
blockshift = btodb(drive->blocksize) - 1; /* amount to shift block number
|
||||
* to get sector number */
|
||||
do {
|
||||
blocknum = btodb(uio.uio_offset) & ~blockshift; /* get the block number */
|
||||
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
|
||||
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
|
||||
uio.uio_resid);
|
||||
if (count == drive->blocksize) /* the whole block */
|
||||
bp = getblk(drive->vp, blocknum, drive->blocksize, 0, 0); /* just get it */
|
||||
else /* partial block: */
|
||||
error = bread(drive->vp, /* read it first */
|
||||
blocknum,
|
||||
drive->blocksize,
|
||||
NOCRED,
|
||||
&bp);
|
||||
count = min(count, drive->blocksize - bp->b_resid); /* how much will we transfer now? */
|
||||
if (error == 0)
|
||||
error = uiomove((char *) bp->b_data + blockoff, /* move the data to the block */
|
||||
count,
|
||||
&uio);
|
||||
if (error) {
|
||||
brelse(bp);
|
||||
drive->lasterror = error;
|
||||
switch (error) {
|
||||
case EIO:
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
break;
|
||||
|
||||
/* XXX Add other possibilities here */
|
||||
default:
|
||||
}
|
||||
return error;
|
||||
}
|
||||
if (count + blockoff == drive->blocksize)
|
||||
/* The transfer goes to the end of the block. There's
|
||||
* no need to wait for any more data to arrive. */
|
||||
bawrite(bp); /* start the write now */
|
||||
else
|
||||
bdwrite(bp); /* do a delayed write */
|
||||
}
|
||||
while (error == 0 && uio.uio_resid > 0 && count != 0);
|
||||
if (error)
|
||||
drive->lasterror = error;
|
||||
return error; /* OK */
|
||||
}
|
||||
|
||||
/* Wake up on completion */
|
||||
void
|
||||
drive_io_done(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
wakeup((caddr_t) bp); /* Wachet auf! */
|
||||
bp->b_flags &= ~B_CALL; /* don't do this again */
|
||||
}
|
||||
|
||||
/* Check a drive for a vinum header. If found,
|
||||
* update the drive information. We come here
|
||||
* with a partially populated drive structure
|
||||
* which includes the device name.
|
||||
*
|
||||
* Return information on what we found
|
||||
*/
|
||||
enum drive_label_info
|
||||
read_drive_label(struct drive *drive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
int result; /* result of our search */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
|
||||
error = init_drive(drive); /* find the drive */
|
||||
if (error) /* find the drive */
|
||||
return DL_CANT_OPEN; /* not ours */
|
||||
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
|
||||
CHECKALLOC(vhdr, "Can't allocate memory");
|
||||
|
||||
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
|
||||
if (drive->label.name[0] /* we have a name for this drive */
|
||||
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
|
||||
drive->lasterror = EINVAL;
|
||||
result = DL_WRONG_DRIVE; /* it's the wrong drive */
|
||||
} else {
|
||||
set_drive_parms(drive); /* and set other parameters */
|
||||
result = DL_OURS;
|
||||
}
|
||||
/* We copy the drive anyway so that we have
|
||||
* the correct name in the drive info. This
|
||||
* may not be the name specified */
|
||||
drive->label = vhdr->label; /* put in the label information */
|
||||
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
|
||||
result = DL_DELETED_LABEL;
|
||||
else
|
||||
result = DL_NOT_OURS; /* we could have it, but we don't yet */
|
||||
Free(vhdr); /* that's all. */
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Check a drive for a vinum header. If found,
|
||||
* read configuration information from the drive and
|
||||
* incorporate the data into the configuration.
|
||||
*
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
check_drive(char *drivename)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct nameidata nd; /* mount point credentials */
|
||||
char *config_text; /* read the config info from disk into here */
|
||||
volatile char *cptr; /* pointer into config information */
|
||||
char *eptr; /* end pointer into config information */
|
||||
int driveno;
|
||||
struct drive *drive;
|
||||
char *config_line; /* copy the config line to */
|
||||
|
||||
driveno = find_drive_by_dev(drivename, 1); /* doesn't exist, create it */
|
||||
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
|
||||
strcpy(drive->devicename, drivename); /* put in device name */
|
||||
|
||||
if (read_drive_label(drive) == DL_OURS) { /* ours! */
|
||||
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_text, "Can't allocate memory");
|
||||
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_line, "Can't allocate memory");
|
||||
|
||||
/* Read in both copies of the configuration information */
|
||||
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
|
||||
|
||||
if (error != 0) {
|
||||
printf("vinum: Can't read device %s, error %d\n", drive->devicename, error);
|
||||
Free(config_text);
|
||||
Free(config_line);
|
||||
free_drive(drive); /* give it back */
|
||||
return error;
|
||||
}
|
||||
/* XXX At this point, check that the two copies are the same, and do something useful if not.
|
||||
* In particular, consider which is newer, and what this means for the integrity of the
|
||||
* data on the drive */
|
||||
|
||||
/* Parse the configuration, and add it to the global configuration */
|
||||
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
|
||||
volatile int parse_status; /* return value from parse_config */
|
||||
|
||||
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
|
||||
*eptr++ = *cptr++;
|
||||
*eptr = '\0'; /* and delimit */
|
||||
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
|
||||
parse_status = parse_config(config_line, &keyword_set); /* parse the config line */
|
||||
if (parse_status < 0) { /* error in config */
|
||||
/* This config should have been parsed in user
|
||||
* space. If we run into problems here, something
|
||||
* serious is afoot. Complain and let the user
|
||||
* snarf the config to see what's wrong */
|
||||
printf("vinum: Config error on drive %s, aborting integration\n", nd.ni_dirp);
|
||||
Free(config_text);
|
||||
Free(config_line);
|
||||
free_drive(drive); /* give it back */
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
while (*cptr == '\n')
|
||||
cptr++; /* skip to next line */
|
||||
}
|
||||
Free(config_text);
|
||||
if ((vinum_conf.flags & VF_READING_CONFIG) == 0) /* not reading config */
|
||||
updateconfig(0); /* update object states */
|
||||
printf("vinum: read configuration from %s\n", drivename);
|
||||
return 0; /* it all worked */
|
||||
} else { /* no vinum label found */
|
||||
if (drive->lasterror) {
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return drive->lasterror;
|
||||
} else
|
||||
return ENODEV; /* not our device */
|
||||
}
|
||||
}
|
||||
|
||||
/* Kludge: kernel printf doesn't handle longs correctly XXX */
|
||||
static char *lltoa(long long l, char *s);
|
||||
static char *sappend(char *txt, char *s);
|
||||
|
||||
static char *
|
||||
lltoa(long long l, char *s)
|
||||
{
|
||||
if (l < 0) {
|
||||
*s++ = '-';
|
||||
l = -l;
|
||||
}
|
||||
if (l > 9) {
|
||||
s = lltoa(l / 10, s);
|
||||
l %= 10;
|
||||
}
|
||||
*s++ = l + '0';
|
||||
return s;
|
||||
}
|
||||
|
||||
static char *
|
||||
sappend(char *txt, char *s)
|
||||
{
|
||||
while (*s++ = *txt++);
|
||||
return s - 1;
|
||||
}
|
||||
|
||||
/* Format the configuration in text form into the buffer
|
||||
* at config. Don't go beyond len bytes
|
||||
* XXX this stinks. Fix soon. */
|
||||
void
|
||||
format_config(char *config, int len)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
int j;
|
||||
char *s = config;
|
||||
|
||||
bzero(config, len);
|
||||
|
||||
/* First write the drive configuration */
|
||||
for (i = 0; i < vinum_conf.drives_used; i++) {
|
||||
struct drive *drive;
|
||||
|
||||
drive = &vinum_conf.drive[i];
|
||||
if (drive->state != drive_unallocated) {
|
||||
sprintf(s,
|
||||
"drive %s state %s device %s\n",
|
||||
drive->label.name,
|
||||
drive_state(drive->state),
|
||||
drive->devicename);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Then the volume configuration */
|
||||
for (i = 0; i < vinum_conf.volumes_used; i++) {
|
||||
struct volume *vol;
|
||||
|
||||
vol = &vinum_conf.volume[i];
|
||||
if (vol->state != volume_unallocated) {
|
||||
if (vol->preferred_plex >= 0) /* preferences, */
|
||||
sprintf(s,
|
||||
"volume %s state %s readpol prefer %s",
|
||||
vol->name,
|
||||
volume_state(vol->state),
|
||||
vinum_conf.plex[vol->preferred_plex].name);
|
||||
else /* default round-robin */
|
||||
sprintf(s,
|
||||
"volume %s state %s",
|
||||
vol->name,
|
||||
volume_state(vol->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
s = sappend("\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Then the plex configuration */
|
||||
for (i = 0; i < vinum_conf.plexes_used; i++) {
|
||||
struct plex *plex;
|
||||
|
||||
plex = &vinum_conf.plex[i];
|
||||
if (plex->state != plex_unallocated) {
|
||||
sprintf(s, "plex name %s state %s org %s ",
|
||||
plex->name,
|
||||
plex_state(plex->state),
|
||||
plex_org(plex->organization));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if ((plex->organization == plex_striped)
|
||||
) {
|
||||
sprintf(s, "%db ", (int) plex->stripesize);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
}
|
||||
if (plex->volno >= 0) /* we have a volume */
|
||||
sprintf(s, "vol %s ", vinum_conf.volume[plex->volno].name);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
for (j = 0; j < plex->subdisks; j++) {
|
||||
sprintf(s, " sd %s", vinum_conf.sd[plex->sdnos[j]].name);
|
||||
}
|
||||
s = sappend("\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* And finally the subdisk configuration */
|
||||
for (i = 0; i < vinum_conf.subdisks_used; i++) {
|
||||
struct sd *sd = &vinum_conf.sd[i]; /* XXX */
|
||||
if (vinum_conf.sd[i].state != sd_unallocated) {
|
||||
sprintf(s,
|
||||
"sd name %s drive %s plex %s state %s len ",
|
||||
sd->name,
|
||||
vinum_conf.drive[sd->driveno].label.name,
|
||||
vinum_conf.plex[sd->plexno].name,
|
||||
sd_state(sd->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
s = lltoa(sd->sectors, s);
|
||||
s = sappend("b driveoffset ", s);
|
||||
s = lltoa(sd->driveoffset, s);
|
||||
s = sappend("b plexoffset ", s);
|
||||
s = lltoa(sd->plexoffset, s);
|
||||
s = sappend("b\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Write the configuration to all vinum slices */
|
||||
int
|
||||
save_config(void)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
int written_config; /* set when we firstnwrite the config to disk */
|
||||
int driveno;
|
||||
struct drive *drive; /* point to current drive info */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
char *config; /* point to config data */
|
||||
int wlabel_on; /* to set writing label on/off */
|
||||
|
||||
/* don't save the configuration while we're still working on it */
|
||||
if (vinum_conf.flags & VF_CONFIGURING)
|
||||
return 0;
|
||||
written_config = 0; /* no config written yet */
|
||||
/* Build a volume header */
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
|
||||
CHECKALLOC(vhdr, "Can't allocate config data");
|
||||
vhdr->magic = VINUM_MAGIC; /* magic number */
|
||||
vhdr->config_length = MAXCONFIG; /* length of following config info */
|
||||
|
||||
config = Malloc(MAXCONFIG); /* get space for the config data */
|
||||
CHECKALLOC(config, "Can't allocate config data");
|
||||
|
||||
format_config(config, MAXCONFIG);
|
||||
error = 0; /* no errors yet */
|
||||
for (driveno = 0; driveno < vinum_conf.drives_used; driveno++) {
|
||||
drive = &vinum_conf.drive[driveno]; /* point to drive */
|
||||
|
||||
if (drive->state != drive_down) {
|
||||
#if (__FreeBSD__ >= 3)
|
||||
getmicrotime(&drive->label.last_update); /* time of last update is now */
|
||||
#else
|
||||
drive->label.last_update = time; /* time of last update is now */
|
||||
#endif
|
||||
bcopy((char *) &drive->label, /* and the label info from the drive structure */
|
||||
(char *) &vhdr->label,
|
||||
sizeof(vhdr->label));
|
||||
if ((drive->state != drive_unallocated)
|
||||
&& (drive->state != drive_uninit)) {
|
||||
wlabel_on = 1; /* enable writing the label */
|
||||
error = VOP_IOCTL(drive->vp, /* make the label writeable */
|
||||
DIOCWLABEL,
|
||||
(caddr_t) & wlabel_on,
|
||||
FWRITE,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error == 0)
|
||||
error = write_drive(drive, vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (error == 0)
|
||||
error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET);
|
||||
wlabel_on = 0; /* enable writing the label */
|
||||
VOP_IOCTL(drive->vp, /* make the label non-writeable again */
|
||||
DIOCWLABEL,
|
||||
(caddr_t) & wlabel_on,
|
||||
FWRITE,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error) {
|
||||
printf("vinum: Can't write config to %s, error %d\n", drive->devicename, error);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
} else
|
||||
written_config = 1; /* we've written it on at least one drive */
|
||||
}
|
||||
}
|
||||
}
|
||||
Free(vhdr);
|
||||
Free(config);
|
||||
return written_config == 0; /* return 1 if we failed to write config */
|
||||
}
|
||||
|
||||
/* Disk labels are a mess. The correct way to access them
|
||||
* is with the DIOC[GSW]DINFO ioctls, but some programs, such
|
||||
* as newfs, access the disk directly, so we have to write
|
||||
* things there. We do this only on request. If a user
|
||||
* request tries to read it directly, we fake up one on the fly.
|
||||
*/
|
||||
|
||||
/* get_volume_label returns a label structure to lp, which
|
||||
* is allocated by the caller */
|
||||
void
|
||||
get_volume_label(struct volume *vol, struct disklabel *lp)
|
||||
{
|
||||
bzero(lp, sizeof(struct disklabel));
|
||||
|
||||
strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
|
||||
lp->d_type = DTYPE_VINUM;
|
||||
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
|
||||
lp->d_rpm = 14400 * vol->plexes; /* to keep them guessing */
|
||||
lp->d_interleave = 1;
|
||||
lp->d_flags = 0;
|
||||
|
||||
/* Fitting unto the vine, a vinum has a single
|
||||
* track with all its sectors */
|
||||
lp->d_secsize = DEV_BSIZE; /* bytes per sector */
|
||||
lp->d_nsectors = vol->size; /* data sectors per track */
|
||||
lp->d_ntracks = 1; /* tracks per cylinder */
|
||||
lp->d_ncylinders = 1; /* data cylinders per unit */
|
||||
lp->d_secpercyl = vol->size; /* data sectors per cylinder */
|
||||
lp->d_secperunit = vol->size; /* data sectors per unit */
|
||||
|
||||
lp->d_bbsize = BBSIZE;
|
||||
lp->d_sbsize = SBSIZE;
|
||||
|
||||
lp->d_magic = DISKMAGIC;
|
||||
lp->d_magic2 = DISKMAGIC;
|
||||
|
||||
/* Set up partitions a, b and c to be identical
|
||||
* and the size of the volume. a is UFS, b is
|
||||
* swap, c is nothing */
|
||||
lp->d_partitions[0].p_size = vol->size;
|
||||
lp->d_partitions[0].p_fsize = 1024;
|
||||
lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
|
||||
lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */
|
||||
lp->d_partitions[0].p_frag = 8; /* and fragments per block */
|
||||
lp->d_partitions[SWAP_PART].p_size = vol->size;
|
||||
lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
|
||||
lp->d_partitions[LABEL_PART].p_size = vol->size;
|
||||
lp->d_npartitions = LABEL_PART + 1;
|
||||
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
|
||||
lp->d_checksum = dkcksum(lp);
|
||||
}
|
||||
|
||||
int
|
||||
write_volume_label(int volno)
|
||||
{
|
||||
struct disklabel *lp;
|
||||
struct buf *bp;
|
||||
struct disklabel *dlp;
|
||||
struct volume *vol;
|
||||
int error;
|
||||
|
||||
lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
|
||||
if (lp == 0)
|
||||
return ENOMEM;
|
||||
|
||||
if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_used) /* invalid volume */
|
||||
return ENOENT;
|
||||
|
||||
vol = &VOL[volno]; /* volume in question */
|
||||
if (vol->state == volume_unallocated) /* nothing there */
|
||||
return ENOENT;
|
||||
|
||||
get_volume_label(vol, lp); /* get the label */
|
||||
|
||||
/* Now write to disk. This code is derived from the
|
||||
* system writedisklabel (), which does silly things
|
||||
* like reading the label and refusing to write
|
||||
* unless it's already there. */
|
||||
bp = geteblk((int) lp->d_secsize); /* get a buffer */
|
||||
bp->b_dev = minor(vol->devno) | (CDEV_MAJOR << MAJORDEV_SHIFT); /* our own raw volume */
|
||||
bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
|
||||
bp->b_bcount = lp->d_secsize;
|
||||
bzero(bp->b_data, lp->d_secsize);
|
||||
dlp = (struct disklabel *) bp->b_data;
|
||||
*dlp = *lp;
|
||||
bp->b_flags &= ~B_INVAL;
|
||||
bp->b_flags |= B_BUSY | B_WRITE;
|
||||
vinumstrategy(bp); /* write it out */
|
||||
error = biowait(bp);
|
||||
bp->b_flags |= B_INVAL | B_AGE;
|
||||
brelse(bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Initialize a subdisk */
|
||||
int
|
||||
initsd(int sdno)
|
||||
{
|
||||
return 0;
|
||||
}
|
132
sys/dev/vinum/vinumio.h
Normal file
132
sys/dev/vinum/vinumio.h
Normal file
@ -0,0 +1,132 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumio.h,v 1.10 1998/08/10 05:46:19 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define MAX_IOCTL_REPLY 256
|
||||
#define L 'F' /* ID letter of our ioctls */
|
||||
/* VINUM_CREATE returns a buffer of this kind */
|
||||
struct _ioctl_reply {
|
||||
int error;
|
||||
char msg[MAX_IOCTL_REPLY];
|
||||
};
|
||||
|
||||
/* ioctl requests */
|
||||
#define BUFSIZE 1024 /* size of buffer, including continuations */
|
||||
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
|
||||
#define VINUM_GETCONFIG _IOR(L, 65, struct _vinum_conf) /* get global config */
|
||||
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct drive) /* get drive config */
|
||||
#define VINUM_SDCONFIG _IOWR(L, 67, struct sd) /* get subdisk config */
|
||||
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct plex) /* get plex config */
|
||||
#define VINUM_VOLCONFIG _IOWR(L, 69, struct volume) /* get volume config */
|
||||
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct sd) /* get sd config for plex (plex, sdno) */
|
||||
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
|
||||
#define VINUM_SAVECONFIG _IOC(0, L, 72, 0) /* release locks, update, write config to disk */
|
||||
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
|
||||
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
|
||||
#ifdef DEBUG
|
||||
|
||||
struct debuginfo {
|
||||
int changeit;
|
||||
int param;
|
||||
};
|
||||
|
||||
#define VINUM_DEBUG _IOWR(L, 75, struct debuginfo) /* call the debugger from ioctl () */
|
||||
#endif
|
||||
|
||||
enum objecttype {
|
||||
drive_object,
|
||||
sd_object,
|
||||
plex_object,
|
||||
volume_object,
|
||||
invalid_object
|
||||
};
|
||||
|
||||
/* Start an object. Pass two integers:
|
||||
* msg [0] index in vinum_conf.<object>
|
||||
* msg [1] type of object (see below)
|
||||
*
|
||||
* Return ioctl_reply
|
||||
*/
|
||||
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
|
||||
|
||||
/* The state to set with VINUM_SETSTATE. Since
|
||||
* each object has a different set of states, we
|
||||
* need to translate later */
|
||||
enum objectstate {
|
||||
object_down,
|
||||
object_initializing,
|
||||
object_up
|
||||
};
|
||||
|
||||
/* This structure is used for modifying objects
|
||||
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
|
||||
* VINUM_DETACH, VINUM_REPLACE
|
||||
*/
|
||||
struct vinum_ioctl_msg {
|
||||
int index;
|
||||
enum objecttype type;
|
||||
enum objectstate state; /* state to set (VINUM_SETSTATE) */
|
||||
int force; /* do it even if it doesn't make sense */
|
||||
int recurse; /* recurse (VINUM_REMOVE) */
|
||||
int otherobject; /* superordinate object (attach),
|
||||
* replacement object (replace) */
|
||||
int rename; /* rename object (attach) */
|
||||
int64_t offset; /* offset of subdisk (for attach) */
|
||||
};
|
||||
|
||||
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
|
||||
#define VINUM_STARTCONFIG _IOC(0, L, 78, 0) /* start a configuration operation */
|
||||
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
|
||||
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
|
||||
#define VINUM_LABEL _IOC(IOC_IN | IOC_OUT, L, 81, MAX_IOCTL_REPLY) /* label a volume */
|
||||
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
|
||||
#define VINUM_REMOVE _IOC(IOC_IN | IOC_OUT, L, 83, MAX_IOCTL_REPLY) /* remove an object */
|
||||
#define VINUM_GETUNMAPPED _IOWR(L, 84, struct plexregion) /* get unmapped element (plex, re) */
|
||||
#define VINUM_GETDEFECTIVE _IOWR(L, 85, struct plexregion) /* get defective element (plex, re) */
|
||||
#define VINUM_RESETSTATS _IOC(IOC_IN | IOC_OUT, L, 86, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_ATTACH _IOC(IOC_IN | IOC_OUT, L, 87, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_DETACH _IOC(IOC_IN | IOC_OUT, L, 88, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
|
||||
struct vinum_rename_msg {
|
||||
int index;
|
||||
int recurse; /* rename subordinate objects too */
|
||||
enum objecttype type;
|
||||
char newname[MAXNAME]; /* new name to give to object */
|
||||
};
|
||||
|
||||
#define VINUM_RENAME _IOC(IOC_IN | IOC_OUT, L, 89, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_REPLACE _IOC(IOC_IN | IOC_OUT, L, 90, MAX_IOCTL_REPLY) /* reset object stats */
|
787
sys/dev/vinum/vinumioctl.c
Normal file
787
sys/dev/vinum/vinumioctl.c
Normal file
@ -0,0 +1,787 @@
|
||||
/* XXX replace all the checks on object validity with
|
||||
* calls to valid<object> */
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumioctl.c,v 1.1 1998/08/14 08:46:10 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "sys/sysproto.h" /* for sync(2) */
|
||||
#ifdef DEBUG
|
||||
#include <sys/reboot.h>
|
||||
#endif
|
||||
|
||||
jmp_buf command_fail; /* return on a failed command */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
struct proc *myproc;
|
||||
|
||||
int vinum_inactive(void);
|
||||
void free_vinum(int);
|
||||
void attachobject(struct vinum_ioctl_msg *);
|
||||
void detachobject(struct vinum_ioctl_msg *);
|
||||
void renameobject(struct vinum_rename_msg *);
|
||||
void replaceobject(struct vinum_ioctl_msg *);
|
||||
|
||||
/* ioctl routine */
|
||||
int
|
||||
vinumioctl(dev_t dev,
|
||||
#if __FreeBSD__ >= 3
|
||||
u_long cmd,
|
||||
#else
|
||||
int cmd,
|
||||
#endif
|
||||
caddr_t data,
|
||||
int flag,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
unsigned int objno;
|
||||
int error = 0;
|
||||
struct volume *vol;
|
||||
unsigned int index; /* for transferring config info */
|
||||
unsigned int sdno; /* for transferring config info */
|
||||
int fe; /* free list element number */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
|
||||
|
||||
struct devcode *device = (struct devcode *) &dev;
|
||||
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
myproc = p; /* save pointer to process */
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error) /* bombed out */
|
||||
return 0; /* the reply will contain meaningful info */
|
||||
switch (cmd) {
|
||||
/* XXX #ifdef DEBUG */
|
||||
case VINUM_DEBUG:
|
||||
boothowto |= RB_GDB; /* serial debug line */
|
||||
if (((struct debuginfo *) data)->changeit) /* change debug settings */
|
||||
debug = (((struct debuginfo *) data)->param);
|
||||
else
|
||||
Debugger("vinum debug");
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
/* XXX #endif */
|
||||
|
||||
case VINUM_CREATE: /* create a vinum object */
|
||||
error = lock_config(); /* get the config for us alone */
|
||||
if (error) /* can't do it, */
|
||||
return error; /* give up */
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error == 0) { /* first time, */
|
||||
parse_user_config((char *) data, &keyword_set); /* update the config */
|
||||
ioctl_reply->error = 0; /* no error if we make it here */
|
||||
} else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
|
||||
ioctl_reply->error = EINVAL; /* note that something's up */
|
||||
ioctl_reply->msg[0] = '\0'; /* no message? */
|
||||
}
|
||||
unlock_config();
|
||||
return 0; /* must be 0 to return the real error info */
|
||||
|
||||
case VINUM_GETCONFIG: /* get the configuration information */
|
||||
bcopy(&vinum_conf, data, sizeof(vinum_conf));
|
||||
return 0;
|
||||
|
||||
/* start configuring the subsystem */
|
||||
case VINUM_STARTCONFIG:
|
||||
return start_config(); /* just lock it */
|
||||
|
||||
/* Move the individual parts of the config to user space.
|
||||
|
||||
* Specify the index of the object in the first word of data,
|
||||
* and return the object there
|
||||
*/
|
||||
case VINUM_DRIVECONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.drives_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&DRIVE[index], data, sizeof(struct drive)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_SDCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.subdisks_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&SD[index], data, sizeof(struct sd)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.plexes_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&PLEX[index], data, sizeof(struct plex)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_VOLCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.volumes_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&VOL[index], data, sizeof(struct volume)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXSDCONFIG:
|
||||
index = *(int *) data; /* get the plex index */
|
||||
sdno = ((int *) data)[1]; /* and the sd index */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
|
||||
data,
|
||||
sizeof(struct sd));
|
||||
return 0;
|
||||
|
||||
case VINUM_SAVECONFIG:
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
finish_config(1); /* finish the configuration and update it */
|
||||
error = save_config(); /* save configuration to disk */
|
||||
} else
|
||||
error = EINVAL; /* queue up for this one, please */
|
||||
return error;
|
||||
|
||||
case VINUM_RELEASECONFIG: /* release the config */
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
finish_config(0); /* finish the configuration, don't change it */
|
||||
error = save_config(); /* save configuration to disk */
|
||||
} else
|
||||
error = EINVAL; /* release what config? */
|
||||
return error;
|
||||
|
||||
case VINUM_INIT:
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETCONFIG:
|
||||
if (vinum_inactive() && (vinum_conf.opencount < 2)) { /* if we're not active */
|
||||
/* Note the open count. We may be called from v, so we'll be open.
|
||||
* Keep the count so we don't underflow */
|
||||
int oc = vinum_conf.opencount;
|
||||
free_vinum(1); /* clean up everything */
|
||||
printf("vinum: CONFIGURATION OBLITERATED\n");
|
||||
vinum_conf.opencount = oc;
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
}
|
||||
return EBUSY;
|
||||
|
||||
case VINUM_SETSTATE:
|
||||
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
|
||||
return 0;
|
||||
|
||||
case VINUM_MEMINFO:
|
||||
vinum_meminfo(data);
|
||||
return 0;
|
||||
|
||||
case VINUM_MALLOCINFO:
|
||||
return vinum_mallocinfo(data);
|
||||
|
||||
case VINUM_LABEL: /* label a volume */
|
||||
ioctl_reply->error = write_volume_label(*(int *) data); /* index of the volume to label */
|
||||
ioctl_reply->msg[0] = '\0'; /* no message */
|
||||
return 0;
|
||||
|
||||
case VINUM_REMOVE:
|
||||
remove((struct vinum_ioctl_msg *) data); /* remove an object */
|
||||
return 0;
|
||||
|
||||
case VINUM_GETFREELIST: /* get a drive free list element */
|
||||
index = *(int *) data; /* get the drive index */
|
||||
fe = ((int *) data)[1]; /* and the free list element */
|
||||
if ((index >= (unsigned) vinum_conf.drives_used) /* plex doesn't exist */
|
||||
||(DRIVE[index].state == drive_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&DRIVE[index].freelist[fe],
|
||||
data,
|
||||
sizeof(struct drive_freelist));
|
||||
return 0;
|
||||
|
||||
case VINUM_GETDEFECTIVE: /* get a plex defective area element */
|
||||
index = *(int *) data; /* get the plex index */
|
||||
fe = ((int *) data)[1]; /* and the region number */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(PLEX[index].state == plex_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= PLEX[index].defective_regions) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&PLEX[index].defective_region[fe],
|
||||
data,
|
||||
sizeof(struct plexregion));
|
||||
return 0;
|
||||
|
||||
case VINUM_GETUNMAPPED: /* get a plex unmapped area element */
|
||||
index = *(int *) data; /* get the plex index */
|
||||
fe = ((int *) data)[1]; /* and the region number */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(PLEX[index].state == plex_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= PLEX[index].unmapped_regions) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&PLEX[index].unmapped_region[fe],
|
||||
data,
|
||||
sizeof(struct plexregion));
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETSTATS:
|
||||
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
|
||||
return 0;
|
||||
|
||||
/* attach an object to a superordinate object */
|
||||
case VINUM_ATTACH:
|
||||
attachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* detach an object from a superordinate object */
|
||||
case VINUM_DETACH:
|
||||
detachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* rename an object */
|
||||
case VINUM_RENAME:
|
||||
renameobject((struct vinum_rename_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* replace an object */
|
||||
case VINUM_REPLACE:
|
||||
replaceobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
default:
|
||||
/* FALLTHROUGH */
|
||||
}
|
||||
|
||||
default:
|
||||
#if __FreeBSD__>=3
|
||||
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %lx\n",
|
||||
device->type,
|
||||
device->sd,
|
||||
device->plex,
|
||||
device->major,
|
||||
device->volume,
|
||||
cmd); /* XXX */
|
||||
|
||||
#else
|
||||
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %x\n",
|
||||
device->type,
|
||||
device->sd,
|
||||
device->plex,
|
||||
device->major,
|
||||
device->volume,
|
||||
cmd); /* XXX */
|
||||
|
||||
#endif
|
||||
return EINVAL;
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
case VINUM_PLEX_TYPE:
|
||||
return EAGAIN; /* try again next week */
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
objno = SDNO(dev);
|
||||
|
||||
switch (cmd) {
|
||||
case VINUM_INITSD: /* initialize subdisk */
|
||||
return initsd(objno);
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
case VINUM_VOLUME_TYPE:
|
||||
objno = VOLNO(dev);
|
||||
|
||||
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_used) /* not a valid volume */
|
||||
return ENXIO;
|
||||
vol = &VOL[objno];
|
||||
if (vol->state != volume_up) /* not up, */
|
||||
return EIO; /* I/O error */
|
||||
|
||||
switch (cmd) {
|
||||
case DIOCGDINFO: /* get disk label */
|
||||
get_volume_label(vol, (struct disklabel *) data);
|
||||
break;
|
||||
|
||||
/* Care! DIOCGPART returns *pointers* to
|
||||
* the caller, so we need to store this crap as well.
|
||||
* And yes, we need it. */
|
||||
case DIOCGPART: /* get partition information */
|
||||
get_volume_label(vol, &vol->label);
|
||||
((struct partinfo *) data)->disklab = &vol->label;
|
||||
((struct partinfo *) data)->part = &vol->label.d_partitions[0];
|
||||
break;
|
||||
|
||||
/* We don't have this stuff on hardware,
|
||||
* so just pretend to do it so that
|
||||
* utilities don't get upset. */
|
||||
case DIOCWDINFO: /* write partition info */
|
||||
case DIOCSDINFO: /* set partition info */
|
||||
return 0; /* not a titty */
|
||||
|
||||
case DIOCWLABEL: /* set or reset label writeable */
|
||||
if ((flag & FWRITE) == 0) /* not writeable? */
|
||||
return EACCES; /* no, die */
|
||||
if (*(int *) data != 0) /* set it? */
|
||||
vol->flags |= VF_WLABEL; /* yes */
|
||||
else
|
||||
vol->flags &= ~VF_WLABEL; /* no, reset */
|
||||
break;
|
||||
|
||||
default:
|
||||
return ENOTTY; /* not my kind of ioctl */
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0; /* XXX */
|
||||
}
|
||||
|
||||
/* The following four functions check the supplied
|
||||
* object index and return a pointer to the object
|
||||
* if it exists. Otherwise they longjump out via
|
||||
* throw_rude_remark */
|
||||
struct drive *
|
||||
validdrive(int driveno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((driveno < vinum_conf.drives_used)
|
||||
&& (DRIVE[driveno].state != drive_unallocated))
|
||||
return &DRIVE[driveno];
|
||||
strcpy(reply->msg, "No such drive");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct sd *
|
||||
validsd(int sdno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((sdno < vinum_conf.subdisks_used)
|
||||
&& (SD[sdno].state != sd_unallocated))
|
||||
return &SD[sdno];
|
||||
strcpy(reply->msg, "No such subdisk");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct plex *
|
||||
validplex(int plexno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((plexno < vinum_conf.plexes_used)
|
||||
&& (PLEX[plexno].state != plex_unallocated))
|
||||
return &PLEX[plexno];
|
||||
strcpy(reply->msg, "No such plex");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct volume *
|
||||
validvol(int volno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((volno < vinum_conf.volumes_used)
|
||||
&& (VOL[volno].state != volume_unallocated))
|
||||
return &VOL[volno];
|
||||
strcpy(reply->msg, "No such volume");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* reset an object's stats */
|
||||
void
|
||||
resetstats(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object:
|
||||
if (msg->index < vinum_conf.drives_used) {
|
||||
struct drive *drive = &DRIVE[msg->index];
|
||||
if (drive->state != drive_unallocated) {
|
||||
drive->reads = 0; /* number of reads on this drive */
|
||||
drive->writes = 0; /* number of writes on this drive */
|
||||
drive->bytes_read = 0; /* number of bytes read */
|
||||
drive->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case sd_object:
|
||||
if (msg->index < vinum_conf.subdisks_used) {
|
||||
struct sd *sd = &SD[msg->index];
|
||||
if (sd->state != sd_unallocated) {
|
||||
sd->reads = 0; /* number of reads on this subdisk */
|
||||
sd->writes = 0; /* number of writes on this subdisk */
|
||||
sd->bytes_read = 0; /* number of bytes read */
|
||||
sd->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
if (msg->index < vinum_conf.plexes_used) {
|
||||
struct plex *plex = &PLEX[msg->index];
|
||||
if (plex->state != plex_unallocated) {
|
||||
plex->reads = 0;
|
||||
plex->writes = 0; /* number of writes on this plex */
|
||||
plex->bytes_read = 0; /* number of bytes read */
|
||||
plex->bytes_written = 0; /* number of bytes written */
|
||||
plex->multiblock = 0; /* requests that needed more than one block */
|
||||
plex->multistripe = 0; /* requests that needed more than one stripe */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
if (msg->index < vinum_conf.volumes_used) {
|
||||
struct volume *vol = &VOL[msg->index];
|
||||
if (vol->state != volume_unallocated) {
|
||||
vol->bytes_read = 0; /* number of bytes read */
|
||||
vol->bytes_written = 0; /* number of bytes written */
|
||||
vol->reads = 0; /* number of reads on this volume */
|
||||
vol->writes = 0; /* number of writes on this volume */
|
||||
vol->recovered_reads = 0; /* reads recovered from another plex */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case invalid_object: /* can't get this */
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* attach an object to a superior object */
|
||||
void
|
||||
attachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL) /* not a valid subdisk */
|
||||
return;
|
||||
plex = validplex(msg->otherobject, reply);
|
||||
if (plex) {
|
||||
if (sd->plexno >= 0) { /* already belong to a plex */
|
||||
reply->error = EBUSY; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd->plexoffset = msg->offset; /* this is where we want it */
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
|
||||
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
if (plex->organization != plex_concat) { /* can't attach to striped and raid-5 */
|
||||
reply->error = EINVAL; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
vol = validvol(msg->otherobject, reply); /* and volume information */
|
||||
if (vol) {
|
||||
if ((vol->plexes == MAXPLEX) /* we have too many already */
|
||||
||(plex->volno >= 0)) { /* or the plex has an owner */
|
||||
reply->error = EINVAL; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
set_plex_state(plex->plexno, plex_down, setstate_force); /* make sure it's down */
|
||||
give_plex_to_volume(msg->otherobject, msg->index); /* and give it to the volume */
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
if (plex->state == plex_reviving)
|
||||
reply->error = EAGAIN; /* need to revive it */
|
||||
else
|
||||
reply->error = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* detach an object from a superior object */
|
||||
void
|
||||
detachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
int sdno;
|
||||
int plexno;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL)
|
||||
return;
|
||||
if (sd->plexno < 0) { /* doesn't belong to a plex */
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Subdisk is not attached");
|
||||
return;
|
||||
} else { /* valid plex number */
|
||||
plex = &PLEX[sd->plexno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((plex->state == plex_up) /* and the plex is up */
|
||||
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
|
||||
reply->error = EBUSY; /* we need this sd */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd->plexno = -1; /* anonymous sd */
|
||||
if (plex->subdisks == 1) { /* this was the only subdisk */
|
||||
Free(plex->sdnos); /* free the subdisk array */
|
||||
plex->sdnos = NULL; /* and note the fact */
|
||||
plex->subdisks_allocated = 0; /* no subdisk space */
|
||||
} else {
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
|
||||
break;
|
||||
}
|
||||
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
|
||||
bcopy(&plex->sdnos[sdno + 1],
|
||||
&plex->sdnos[sdno],
|
||||
(plex->subdisks - 1 - sdno) * sizeof(int));
|
||||
}
|
||||
plex->subdisks--;
|
||||
rebuild_plex_unmappedlist(plex); /* rebuild the unmapped list */
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* this subdisk is named after the plex */
|
||||
bcopy(sd->name,
|
||||
&sd->name[3],
|
||||
min(strlen(sd->name), MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
update_plex_config(plex->plexno, 0);
|
||||
if ((plex->organization == plex_striped) /* we've just mutilated our plex, */
|
||||
||(plex->organization == plex_striped)) /* the data no longer matches */
|
||||
set_plex_state(plex->plexno,
|
||||
plex_down,
|
||||
setstate_force | setstate_configuring);
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
if (plex->volno >= 0) {
|
||||
int volno = plex->volno;
|
||||
|
||||
vol = &VOL[volno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((vol->state == volume_up) /* and the volume is up */
|
||||
&&(vol->plexes == 1))) { /* and this is the last plex */
|
||||
/* XXX As elsewhere, check whether we will lose
|
||||
* mapping by removing this plex */
|
||||
reply->error = EBUSY; /* we need this plex */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex->volno = -1; /* anonymous plex */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
if (vol->plex[plexno] == msg->index) /* found our plex */
|
||||
break;
|
||||
}
|
||||
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
|
||||
bcopy(&vol[plexno + 1], &vol[plexno], (vol->plexes - 1 - plexno) * sizeof(int));
|
||||
vol->plexes--;
|
||||
if (!bcmp(vol->name, plex->name, strlen(vol->name))) { /* this plex is named after the volume */
|
||||
/* First, check if the subdisks are the same */
|
||||
if (msg->recurse) {
|
||||
int sdno;
|
||||
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]];
|
||||
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* subdisk is named after the plex */
|
||||
bcopy(sd->name, &sd->name[3], min(strlen(sd->name), MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
bcopy(plex->name, &plex->name[3], min(strlen(plex->name), MAXPLEXNAME - 3));
|
||||
bcopy("ex-", plex->name, 3);
|
||||
plex->name[MAXPLEXNAME - 1] = '\0';
|
||||
}
|
||||
update_plex_config(plex->plexno, 0);
|
||||
update_volume_config(volno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
} else {
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Plex is not attached");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
renameobject(struct vinum_rename_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
drive = validdrive(msg->index, reply);
|
||||
if (drive) {
|
||||
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case sd_object: /* you can't attach a subdisk to anything */
|
||||
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd) {
|
||||
bcopy(msg->newname, sd->name, MAXSDNAME);
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object: /* you can't attach a plex to anything */
|
||||
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex = validplex(msg->index, reply);
|
||||
if (plex) {
|
||||
bcopy(msg->newname, plex->name, MAXPLEXNAME);
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case volume_object: /* you can't attach a volume to anything */
|
||||
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
vol = validvol(msg->index, reply);
|
||||
if (vol) {
|
||||
bcopy(msg->newname, vol->name, MAXVOLNAME);
|
||||
update_volume_config(msg->index, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case invalid_object:
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace one object with another */
|
||||
void
|
||||
replaceobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
reply->error = ENODEV; /* until I know how to do this */
|
||||
strcpy(reply->msg, "replace not implemented yet");
|
||||
/* save_config (); */
|
||||
}
|
120
sys/dev/vinum/vinumkw.h
Normal file
120
sys/dev/vinum/vinumkw.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumkw.h,v 1.7 1998/08/07 02:35:51 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* Command keywords that vinum knows. These include both user-level
|
||||
* and kernel-level stuff */
|
||||
|
||||
/* Our complete vocabulary. The names of the commands are
|
||||
* the same as the identifier without the kw_ at the beginning
|
||||
* (i.e. kw_create defines the "create" keyword). Preprocessor
|
||||
* magic in parser.c does the rest. */
|
||||
enum keyword {
|
||||
kw_create,
|
||||
kw_modify,
|
||||
kw_list,
|
||||
kw_l = kw_list,
|
||||
kw_ld, /* list drive */
|
||||
kw_ls, /* list subdisk */
|
||||
kw_lp, /* list plex */
|
||||
kw_lv, /* list volume */
|
||||
kw_set,
|
||||
kw_rm,
|
||||
kw_start,
|
||||
kw_stop,
|
||||
kw_drive,
|
||||
kw_sd,
|
||||
kw_subdisk = kw_sd,
|
||||
kw_plex,
|
||||
kw_volume,
|
||||
kw_vol = kw_volume,
|
||||
kw_read,
|
||||
kw_readpol,
|
||||
kw_org,
|
||||
kw_name,
|
||||
kw_concat,
|
||||
kw_striped,
|
||||
kw_raid5,
|
||||
kw_driveoffset,
|
||||
kw_plexoffset,
|
||||
kw_len,
|
||||
kw_length = kw_len,
|
||||
kw_state,
|
||||
kw_setupstate,
|
||||
kw_d, /* flag names */
|
||||
kw_f,
|
||||
kw_r,
|
||||
kw_s,
|
||||
kw_v,
|
||||
kw_round, /* round robin */
|
||||
kw_prefer, /* prefer plex */
|
||||
kw_device,
|
||||
kw_init,
|
||||
kw_label,
|
||||
kw_resetconfig,
|
||||
kw_writethrough,
|
||||
kw_writeback,
|
||||
kw_raw,
|
||||
kw_resetstats,
|
||||
kw_attach,
|
||||
kw_detach,
|
||||
kw_rename,
|
||||
kw_printconfig,
|
||||
kw_replace,
|
||||
kw_detached,
|
||||
#ifdef DEBUG
|
||||
kw_debug, /* go into debugger */
|
||||
kw_info,
|
||||
#endif
|
||||
kw_invalid_keyword = -1
|
||||
};
|
||||
|
||||
struct _keywords {
|
||||
char *name;
|
||||
enum keyword keyword;
|
||||
};
|
||||
|
||||
struct keywordset {
|
||||
int size;
|
||||
struct _keywords *k;
|
||||
};
|
||||
|
||||
extern struct _keywords keywords[];
|
||||
extern struct _keywords flag_keywords[];
|
||||
|
||||
extern struct keywordset keyword_set;
|
||||
extern struct keywordset flag_set;
|
137
sys/dev/vinum/vinumlock.c
Normal file
137
sys/dev/vinum/vinumlock.c
Normal file
@ -0,0 +1,137 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: lock.c,v 1.6 1998/07/28 06:32:57 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
|
||||
/* Lock routines. Currently, we lock either an individual volume
|
||||
* or the global configuration. I don't think tsleep and
|
||||
* wakeup are SMP safe. FIXME XXX */
|
||||
|
||||
/* Lock a volume, wait if it's in use */
|
||||
int
|
||||
lockvol(struct volume *vol)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((vol->flags & VF_LOCKED) != 0) {
|
||||
vol->flags |= VF_LOCKING;
|
||||
/* It would seem to make more sense to sleep on
|
||||
* the address 'vol'. Unfortuntaly we can't
|
||||
* guarantee that this address won't change due to
|
||||
* table expansion. The address we choose won't change. */
|
||||
if ((error = tsleep(&vinum_conf.volume + vol->devno,
|
||||
PRIBIO | PCATCH,
|
||||
"volock",
|
||||
0)) != 0)
|
||||
return error;
|
||||
}
|
||||
vol->flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock a volume and let the next one at it */
|
||||
void
|
||||
unlockvol(struct volume *vol)
|
||||
{
|
||||
vol->flags &= ~VF_LOCKED;
|
||||
if ((vol->flags & VF_LOCKING) != 0) {
|
||||
vol->flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf.volume + vol->devno);
|
||||
}
|
||||
}
|
||||
|
||||
/* Lock a plex, wait if it's in use */
|
||||
int
|
||||
lockplex(struct plex *plex)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((plex->flags & VF_LOCKED) != 0) {
|
||||
plex->flags |= VF_LOCKING;
|
||||
/* It would seem to make more sense to sleep on
|
||||
* the address 'plex'. Unfortuntaly we can't
|
||||
* guarantee that this address won't change due to
|
||||
* table expansion. The address we choose won't change. */
|
||||
if ((error = tsleep(&vinum_conf.plex + plex->sdnos[0],
|
||||
PRIBIO | PCATCH,
|
||||
"plexlk",
|
||||
0)) != 0)
|
||||
return error;
|
||||
}
|
||||
plex->flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock a plex and let the next one at it */
|
||||
void
|
||||
unlockplex(struct plex *plex)
|
||||
{
|
||||
plex->flags &= ~VF_LOCKED;
|
||||
if ((plex->flags & VF_LOCKING) != 0) {
|
||||
plex->flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf.plex + plex->plexno);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Get a lock for the global config, wait if it's not available */
|
||||
int
|
||||
lock_config(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((vinum_conf.flags & VF_LOCKED) != 0) {
|
||||
vinum_conf.flags |= VF_LOCKING;
|
||||
if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0)
|
||||
return error;
|
||||
}
|
||||
vinum_conf.flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock and wake up any waiters */
|
||||
void
|
||||
unlock_config(void)
|
||||
{
|
||||
vinum_conf.flags &= ~VF_LOCKED;
|
||||
if ((vinum_conf.flags & VF_LOCKING) != 0) {
|
||||
vinum_conf.flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf);
|
||||
}
|
||||
}
|
186
sys/dev/vinum/vinummemory.c
Normal file
186
sys/dev/vinum/vinummemory.c
Normal file
@ -0,0 +1,186 @@
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: memory.c,v 1.16 1998/08/08 04:43:22 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#define USES_VM
|
||||
#include "vinumhdr.h"
|
||||
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
void freedatabuf(struct mc *me);
|
||||
caddr_t allocdatabuf(struct mc *me);
|
||||
|
||||
void
|
||||
expand_table(void **table, int oldsize, int newsize)
|
||||
{
|
||||
if (newsize > oldsize) {
|
||||
int *temp;
|
||||
|
||||
temp = (int *) Malloc(newsize); /* allocate a new table */
|
||||
CHECKALLOC(temp, "vinum: Can't expand table\n");
|
||||
if (*table != NULL) { /* already something there, */
|
||||
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
|
||||
Free(*table);
|
||||
}
|
||||
*table = temp;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef DEBUG
|
||||
/* increase the size of a request block */
|
||||
void
|
||||
expandrq(struct plexrq *prq)
|
||||
{
|
||||
expand_table((void **) &prq->rqe,
|
||||
prq->requests * sizeof(struct rqelement),
|
||||
(prq->requests + RQELTS) * sizeof(struct rqelement));
|
||||
bzero(&prq->rqe[prq->requests], RQELTS * sizeof(struct rqelement)); /* clear the new part */
|
||||
prq->rqcount += RQELTS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if DEBUG /* XXX debug */
|
||||
#define MALLOCENTRIES 16384
|
||||
int malloccount = 0;
|
||||
int highwater = 0; /* highest index ever allocated */
|
||||
static struct mc malloced[MALLOCENTRIES];
|
||||
|
||||
static total_malloced;
|
||||
|
||||
caddr_t
|
||||
MMalloc(int size, char *file, int line)
|
||||
{
|
||||
caddr_t result;
|
||||
int i;
|
||||
static int seq = 0;
|
||||
int s;
|
||||
struct mc me; /* information to pass to allocdatabuf */
|
||||
|
||||
if (malloccount >= MALLOCENTRIES) { /* too many */
|
||||
printf("vinum: can't allocate table space to trace memory allocation");
|
||||
return 0; /* can't continue */
|
||||
}
|
||||
result = malloc(size, M_DEVBUF, M_WAITOK); /* use malloc for smaller and irregular stuff */
|
||||
if (result == NULL)
|
||||
printf("vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
|
||||
else {
|
||||
me.flags = 0; /* allocation via malloc */
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if (((result + size) > malloced[i].address)
|
||||
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
|
||||
Debugger("Malloc overlap");
|
||||
}
|
||||
if (result) {
|
||||
i = malloccount++;
|
||||
total_malloced += size;
|
||||
malloced[i].address = result;
|
||||
malloced[i].size = size;
|
||||
malloced[i].line = line;
|
||||
malloced[i].seq = seq++;
|
||||
malloced[i].flags = me.flags;
|
||||
malloced[i].databuf = me.databuf; /* only used with kva alloc */
|
||||
bcopy(file, malloced[i].file, min(strlen(file) + 1, 16));
|
||||
}
|
||||
if (malloccount > highwater)
|
||||
highwater = malloccount;
|
||||
splx(s);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
FFree(void *mem, char *file, int line)
|
||||
{
|
||||
int i;
|
||||
int s;
|
||||
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if ((caddr_t) mem == malloced[i].address) { /* found it */
|
||||
bzero(mem, malloced[i].size); /* XXX */
|
||||
free(mem, M_DEVBUF);
|
||||
malloccount--;
|
||||
total_malloced -= malloced[i].size;
|
||||
if (i < malloccount) /* more coming after */
|
||||
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
|
||||
splx(s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
splx(s);
|
||||
printf("Freeing unallocated data at 0x%08x from %s, line %d\n", (int) mem, file, line);
|
||||
Debugger("Free");
|
||||
}
|
||||
|
||||
void
|
||||
vinum_meminfo(caddr_t data)
|
||||
{
|
||||
struct meminfo *m = (struct meminfo *) data;
|
||||
|
||||
m->mallocs = malloccount;
|
||||
m->total_malloced = total_malloced;
|
||||
m->malloced = malloced;
|
||||
m->highwater = highwater;
|
||||
}
|
||||
|
||||
int
|
||||
vinum_mallocinfo(caddr_t data)
|
||||
{
|
||||
struct mc *m = (struct mc *) data;
|
||||
unsigned int ent = *(int *) data; /* 1st word is index */
|
||||
|
||||
if (ent >= malloccount)
|
||||
return ENOENT;
|
||||
m->address = malloced[ent].address;
|
||||
m->size = malloced[ent].size;
|
||||
m->line = malloced[ent].line;
|
||||
m->seq = malloced[ent].seq;
|
||||
bcopy(malloced[ent].file, m->file, 16);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
206
sys/dev/vinum/vinumparser.c
Normal file
206
sys/dev/vinum/vinumparser.c
Normal file
@ -0,0 +1,206 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: parser.c,v 1.11 1998/08/10 08:50:42 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file contains the parser for the configuration routines. It's used
|
||||
* both in the kernel and in the user interface program, thus the separate file. */
|
||||
|
||||
/* Go through a text and split up into text tokens. These are either non-blank
|
||||
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
|
||||
* " characters may be escaped by \, which otherwise has no special meaning.
|
||||
*
|
||||
* Delimit by following with a \0, and return pointers to the starts at token [].
|
||||
* Return the number of tokens found as the return value.
|
||||
*
|
||||
* This method has the restriction that a closing " or ' must be followed by
|
||||
* grey space.
|
||||
*
|
||||
* Error conditions are end of line before end of quote, or no space after
|
||||
* a closing quote. In this case, tokenize() returns -1. */
|
||||
|
||||
#include <sys/param.h>
|
||||
#ifdef KERNEL
|
||||
#undef KERNEL /* XXX */
|
||||
#define REALLYKERNEL
|
||||
#else
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
/* All this mess for a single struct definition */
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/device.h>
|
||||
#include <sys/disk.h>
|
||||
#include "sys/buf.h"
|
||||
|
||||
#include <vinumvar.h>
|
||||
#include "vinumkw.h"
|
||||
#include "vinumio.h"
|
||||
#include "vinumext.h"
|
||||
|
||||
#ifdef REALLYKERNEL
|
||||
#define isspace(c) ((c == ' ') || (c == '\t')) /* check for white space */
|
||||
#else /* get it from the headers */
|
||||
#include <ctype.h>
|
||||
#endif
|
||||
|
||||
/* enum keyword is defined in vinumvar.h */
|
||||
|
||||
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
|
||||
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
|
||||
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
|
||||
|
||||
/* Normal keywords. These are all the words that vinum knows. */
|
||||
struct _keywords keywords[] =
|
||||
{keypair(drive),
|
||||
keypair(sd),
|
||||
keypair(subdisk),
|
||||
keypair(plex),
|
||||
keypair(volume),
|
||||
keypair(vol),
|
||||
keypair(setupstate),
|
||||
keypair(readpol),
|
||||
keypair(org),
|
||||
keypair(name),
|
||||
keypair(writethrough),
|
||||
keypair(writeback),
|
||||
keypair(raw),
|
||||
keypair(device),
|
||||
keypair(concat),
|
||||
keypair(raid5),
|
||||
keypair(striped),
|
||||
keypair(plexoffset),
|
||||
keypair(driveoffset),
|
||||
keypair(length),
|
||||
keypair(len),
|
||||
keypair(state),
|
||||
keypair(round),
|
||||
keypair(prefer),
|
||||
keypair(rename),
|
||||
keypair(detached),
|
||||
#ifndef KERNEL /* for vinum(8) only */
|
||||
#ifdef DEBUG
|
||||
keypair(debug),
|
||||
#endif
|
||||
keypair(attach),
|
||||
keypair(detach),
|
||||
keypair(printconfig),
|
||||
keypair(replace),
|
||||
keypair(create),
|
||||
keypair(read),
|
||||
keypair(modify),
|
||||
keypair(list),
|
||||
keypair(l),
|
||||
keypair(ld),
|
||||
keypair(ls),
|
||||
keypair(lp),
|
||||
keypair(lv),
|
||||
keypair(info),
|
||||
keypair(set),
|
||||
keypair(rm),
|
||||
keypair(init),
|
||||
keypair(label),
|
||||
keypair(resetconfig),
|
||||
keypair(start),
|
||||
keypair(stop),
|
||||
keypair(resetstats)
|
||||
#endif
|
||||
};
|
||||
struct keywordset keyword_set = KEYWORDSET(keywords);
|
||||
|
||||
#ifndef KERNEL
|
||||
struct _keywords flag_keywords[] =
|
||||
{flagkeypair(f),
|
||||
flagkeypair(d),
|
||||
flagkeypair(v),
|
||||
flagkeypair(s),
|
||||
flagkeypair(r)
|
||||
};
|
||||
struct keywordset flag_set = KEYWORDSET(flag_keywords);
|
||||
|
||||
#endif
|
||||
|
||||
int
|
||||
tokenize(char *cptr, char *token[])
|
||||
{
|
||||
char delim; /* delimiter for searching for the partner */
|
||||
int tokennr; /* index of this token */
|
||||
tokennr = 0; /* none found yet */
|
||||
|
||||
for (;;) {
|
||||
while (isspace(*cptr))
|
||||
cptr++; /* skip initial white space */
|
||||
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
|
||||
return tokennr; /* return number of tokens found */
|
||||
delim = *cptr;
|
||||
token[tokennr] = cptr; /* point to it */
|
||||
tokennr++; /* one more */
|
||||
/* XXX this is broken. It leaves superfluous \\ characters in the text */
|
||||
if ((delim == '\'') || (delim == '"')) { /* delimitered */
|
||||
for (;;) {
|
||||
cptr++;
|
||||
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
|
||||
cptr++; /* move on past */
|
||||
if (!isspace(*cptr)) /* error, no space after closing quote */
|
||||
return -1;
|
||||
*cptr++ = '\0'; /* delimit */
|
||||
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
|
||||
return -1;
|
||||
}
|
||||
} else { /* not quoted */
|
||||
while ((*cptr != '\0') && (!isspace(*cptr)) && (*cptr != '\n'))
|
||||
cptr++;
|
||||
if (*cptr != '\0') /* not end of the line, */
|
||||
*cptr++ = '\0'; /* delimit and move to the next */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Find a keyword and return an index */
|
||||
enum keyword
|
||||
get_keyword(char *name, struct keywordset *keywordset)
|
||||
{
|
||||
int i;
|
||||
struct _keywords *keywords = keywordset->k; /* point to the keywords */
|
||||
for (i = 0; i < keywordset->size; i++)
|
||||
if (!strcmp(name, keywords[i].name))
|
||||
return (enum keyword) keywords[i].keyword;
|
||||
return kw_invalid_keyword;
|
||||
}
|
882
sys/dev/vinum/vinumrequest.c
Normal file
882
sys/dev/vinum/vinumrequest.c
Normal file
@ -0,0 +1,882 @@
|
||||
/* XXX to do:
|
||||
|
||||
* Decide where we need splbio ()
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: request.c,v 1.17 1998/08/13 06:04:47 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
extern struct proc *myproc;
|
||||
|
||||
enum requeststatus bre(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskstart,
|
||||
daddr_t diskend);
|
||||
enum requeststatus bre5(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskstart,
|
||||
daddr_t diskend);
|
||||
enum requeststatus build_read_request(struct request *rq, int volplexno);
|
||||
enum requeststatus build_write_request(struct request *rq);
|
||||
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
|
||||
void freerq(struct request *rq);
|
||||
void free_rqg(struct rqgroup *rqg);
|
||||
int find_alternate_sd(struct request *rq);
|
||||
int check_range_covered(struct request *);
|
||||
void complete_rqe(struct buf *bp);
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
int abortrequest(struct request *rq, int error);
|
||||
void sdio(struct buf *bp);
|
||||
void sdio_done(struct buf *bp);
|
||||
int vinum_bounds_check(struct buf *bp, struct volume *vol);
|
||||
caddr_t allocdatabuf(struct rqelement *rqe);
|
||||
void freedatabuf(struct rqelement *rqe);
|
||||
|
||||
void
|
||||
vinumstrategy(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int volno;
|
||||
struct volume *vol = NULL;
|
||||
int s;
|
||||
struct devcode *device = (struct devcode *) &bp->b_dev; /* decode device number */
|
||||
enum requeststatus status;
|
||||
|
||||
switch (device->type) {
|
||||
case VINUM_SD_TYPE:
|
||||
sdio(bp);
|
||||
return;
|
||||
|
||||
/* In fact, vinum doesn't handle drives: they're
|
||||
* handled directly by the disk drivers */
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return;
|
||||
|
||||
case VINUM_VOLUME_TYPE: /* volume I/O */
|
||||
volno = VOLNO(bp->b_dev);
|
||||
vol = &VOL[volno];
|
||||
if (vol->state != volume_up) { /* can't access this volume */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */
|
||||
biodone(bp); /* have nothing to do with this */
|
||||
return;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
/* Plex I/O is pretty much the same as volume I/O
|
||||
* for a single plex. Indicate this by passing a NULL
|
||||
* pointer (set above) for the volume */
|
||||
case VINUM_PLEX_TYPE:
|
||||
bp->b_resid = bp->b_bcount; /* transfer everything */
|
||||
vinumstart(bp, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start a transfer. Return -1 on error,
|
||||
* 0 if OK, 1 if we need to retry.
|
||||
* Parameter reviveok is set when doing
|
||||
* transfers for revives: it allows transfers to
|
||||
* be started immediately when a revive is in
|
||||
* progress. During revive, normal transfers
|
||||
* are queued if they share address space with
|
||||
* a currently active revive operation. */
|
||||
int
|
||||
vinumstart(struct buf *bp, int reviveok)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int plexno;
|
||||
int maxplex; /* maximum number of plexes to handle */
|
||||
struct volume *vol;
|
||||
struct rqgroup *rqg; /* current plex's requests */
|
||||
struct rqelement *rqe; /* individual element */
|
||||
struct request *rq; /* build up our request here */
|
||||
int rqno; /* index in request list */
|
||||
enum requeststatus status;
|
||||
|
||||
/* XXX In these routines, we're assuming that
|
||||
* we will always be called with bp->b_bcount
|
||||
* which is a multiple of the sector size. This
|
||||
* is a reasonable assumption, since we are only
|
||||
* called from system routines. Should we check
|
||||
* anyway? */
|
||||
|
||||
if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */
|
||||
bp->b_error = EINVAL; /* invalid size */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return -1;
|
||||
}
|
||||
rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
|
||||
if (rq == NULL) { /* can't do it */
|
||||
bp->b_error = ENOMEM; /* can't get memory */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return -1;
|
||||
}
|
||||
bzero(rq, sizeof(struct request));
|
||||
|
||||
/* Note the volume ID. This can be NULL, which
|
||||
* the request building functions use as an
|
||||
* indication for single plex I/O */
|
||||
rq->bp = bp; /* and the user buffer struct */
|
||||
|
||||
if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */
|
||||
rq->volplex.volno = VOLNO(bp->b_dev); /* get the volume number */
|
||||
vol = &VOL[rq->volplex.volno]; /* and point to it */
|
||||
vol->active++; /* one more active request */
|
||||
maxplex = vol->plexes; /* consider all its plexes */
|
||||
} else {
|
||||
vol = NULL; /* no volume */
|
||||
rq->volplex.plexno = PLEXNO(bp->b_dev); /* point to the plex */
|
||||
rq->isplex = 1; /* note that it's a plex */
|
||||
maxplex = 1; /* just the one plex */
|
||||
}
|
||||
|
||||
if (bp->b_flags & B_READ) {
|
||||
/* This is a read request. Decide
|
||||
* which plex to read from.
|
||||
*
|
||||
* There's a potential race condition here,
|
||||
* since we're not locked, and we could end
|
||||
* up multiply incrementing the round-robin
|
||||
* counter. This doesn't have any serious
|
||||
* effects, however. */
|
||||
if (vol != NULL) {
|
||||
vol->reads++;
|
||||
vol->bytes_read += bp->b_bcount;
|
||||
plexno = vol->preferred_plex; /* get the plex to use */
|
||||
if (plexno < 0) { /* round robin */
|
||||
plexno = vol->last_plex_read;
|
||||
vol->last_plex_read++;
|
||||
if (vol->last_plex_read == vol->plexes) /* got the the end? */
|
||||
vol->last_plex_read = 0; /* wrap around */
|
||||
}
|
||||
status = build_read_request(rq, plexno); /* build a request */
|
||||
} else {
|
||||
daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */
|
||||
status = bre(rq, /* build a request list */
|
||||
rq->volplex.plexno,
|
||||
&diskaddr,
|
||||
diskaddr + (bp->b_bcount / DEV_BSIZE));
|
||||
}
|
||||
|
||||
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
|
||||
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
|
||||
if (status == REQUEST_DOWN) { /* not enough subdisks */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
}
|
||||
biodone(bp);
|
||||
freerq(rq);
|
||||
return -1;
|
||||
}
|
||||
return launch_requests(rq, reviveok); /* now start the requests if we can */
|
||||
} else
|
||||
/* This is a write operation. We write to all
|
||||
* plexes. If this is a RAID 5 plex, we must also
|
||||
* update the parity stripe. */
|
||||
{
|
||||
if (vol != NULL) {
|
||||
vol->writes++;
|
||||
vol->bytes_written += bp->b_bcount;
|
||||
status = build_write_request(rq); /* Not all the subdisks are up */
|
||||
} else { /* plex I/O */
|
||||
daddr_t diskstart;
|
||||
|
||||
diskstart = bp->b_blkno; /* start offset of transfer */
|
||||
status = bre(rq,
|
||||
PLEXNO(bp->b_dev),
|
||||
&diskstart,
|
||||
bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */
|
||||
}
|
||||
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
|
||||
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
|
||||
if (status == REQUEST_DOWN) { /* not enough subdisks */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
}
|
||||
if ((bp->b_flags & B_DONE) == 0)
|
||||
biodone(bp);
|
||||
freerq(rq);
|
||||
return -1;
|
||||
}
|
||||
return launch_requests(rq, reviveok); /* start the requests */
|
||||
}
|
||||
}
|
||||
|
||||
/* Call the low-level strategy routines to
|
||||
* perform the requests in a struct request */
|
||||
int
|
||||
launch_requests(struct request *rq, int reviveok)
|
||||
{
|
||||
struct rqgroup *rqg;
|
||||
int rqno; /* loop index */
|
||||
struct rqelement *rqe; /* current element */
|
||||
int s;
|
||||
|
||||
/* First find out whether we're reviving, and the
|
||||
* request contains a conflict. If so, we hang
|
||||
* the request off plex->waitlist of the first
|
||||
* plex we find which is reviving */
|
||||
if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */
|
||||
&&(!reviveok)) { /* and we don't want to do it now, */
|
||||
struct volume *vol = &VOL[VOLNO(rq->bp->b_dev)];
|
||||
struct plex *plex;
|
||||
int plexno;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) { /* find the reviving plex */
|
||||
plex = &PLEX[vol->plex[plexno]];
|
||||
if (plex->state == plex_reviving) /* found it */
|
||||
break;
|
||||
}
|
||||
if (plexno < vol->plexes) { /* found it? */
|
||||
struct request *waitlist = plex->waitlist; /* point to the waiting list */
|
||||
|
||||
while (waitlist->next != NULL) /* find the end */
|
||||
waitlist = waitlist->next;
|
||||
waitlist->next = rq; /* hook our request there */
|
||||
return 0; /* and get out of here */
|
||||
} else /* bad vinum, bad */
|
||||
printf("vinum: can't find reviving plex for volume %s\n", vol->name);
|
||||
}
|
||||
rq->active = 0; /* nothing yet */
|
||||
/* XXX This is probably due to a bug */
|
||||
if (rq->rqg == NULL) { /* no request */
|
||||
abortrequest(rq, EINVAL);
|
||||
return -1;
|
||||
}
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf("Request: %x\nWrite dev 0x%x, offset 0x%x, length %ld\n",
|
||||
(u_int) rq,
|
||||
rq->bp->b_dev,
|
||||
rq->bp->b_blkno,
|
||||
rq->bp->b_bcount); /* XXX */
|
||||
vinum_conf.lastrq = (int) rq;
|
||||
vinum_conf.lastbuf = rq->bp;
|
||||
#endif
|
||||
for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */
|
||||
rqg->active = rqg->count; /* they're all active */
|
||||
rq->active++; /* one more active request group */
|
||||
for (rqno = 0; rqno < rqg->count; rqno++) {
|
||||
rqe = &rqg->rqe[rqno];
|
||||
if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */
|
||||
rqg->active--; /* one less active request */
|
||||
else {
|
||||
struct drive *drive = &DRIVE[rqe->driveno]; /* drive to access */
|
||||
if ((rqe->b.b_flags & B_READ) == 0)
|
||||
rqe->b.b_vp->v_numoutput++; /* one more output going */
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
|
||||
rqe->b.b_flags & B_READ ? "Read" : "Write",
|
||||
rqe->b.b_dev,
|
||||
rqe->sdno,
|
||||
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
|
||||
rqe->b.b_blkno,
|
||||
rqe->b.b_bcount); /* XXX */
|
||||
if (debug & DEBUG_NUMOUTPUT)
|
||||
printf(" vinumstart sd %d numoutput %ld\n",
|
||||
rqe->sdno,
|
||||
rqe->b.b_vp->v_numoutput);
|
||||
#endif
|
||||
/* fire off the request */
|
||||
s = splbio();
|
||||
(*bdevsw[major(rqe->b.b_dev)]->d_strategy) (&rqe->b);
|
||||
splx(s);
|
||||
}
|
||||
/* XXX Do we need caching? Think about this more */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* define the low-level requests needed to perform a
|
||||
* high-level I/O operation for a specific plex 'plexno'.
|
||||
*
|
||||
* Return 0 if all subdisks involved in the request are up, 1 if some
|
||||
* subdisks are not up, and -1 if the request is at least partially
|
||||
* outside the bounds of the subdisks.
|
||||
*
|
||||
* Modify the pointer *diskstart to point to the end address. On
|
||||
* read, return on the first bad subdisk, so that the caller
|
||||
* (build_read_request) can try alternatives.
|
||||
*
|
||||
* On entry to this routine, the rqg structures are not assigned. The
|
||||
* assignment is performed by expandrq(). Strictly speaking, the
|
||||
* elements rqe->sdno of all entries should be set to -1, since 0
|
||||
* (from bzero) is a valid subdisk number. We avoid this problem by
|
||||
* initializing the ones we use, and not looking at the others (index
|
||||
* >= rqg->requests).
|
||||
*/
|
||||
enum requeststatus
|
||||
bre(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskaddr,
|
||||
daddr_t diskend)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
struct rqgroup *rqg;
|
||||
struct buf *bp; /* user's bp */
|
||||
struct plex *plex;
|
||||
enum requeststatus status; /* return value */
|
||||
daddr_t plexoffset; /* offset of transfer in plex */
|
||||
daddr_t stripebase; /* base address of stripe (1st subdisk) */
|
||||
daddr_t stripeoffset; /* offset in stripe */
|
||||
daddr_t blockoffset; /* offset in stripe on subdisk */
|
||||
struct rqelement *rqe; /* point to this request information */
|
||||
daddr_t diskstart = *diskaddr; /* remember where this transfer starts */
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
status = REQUEST_OK; /* return value: OK until proven otherwise */
|
||||
plex = &PLEX[plexno]; /* point to the plex */
|
||||
|
||||
switch (plex->organization) {
|
||||
case plex_concat:
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
if ((*diskaddr < (sd->plexoffset + sd->sectors)) /* The request starts before the end of this */
|
||||
&&(diskend > sd->plexoffset)) { /* subdisk and ends after the start of this sd */
|
||||
if ((sd->state != sd_up) || (plex->state != plex_up)) {
|
||||
enum requeststatus s;
|
||||
|
||||
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
|
||||
if (s) /* give up? */
|
||||
return s; /* yup */
|
||||
}
|
||||
rqg = allocrqg(rq, 1); /* space for the request */
|
||||
if (rqg == NULL) { /* malloc failed */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM;
|
||||
}
|
||||
rqg->plexno = plexno;
|
||||
|
||||
rqe = &rqg->rqe[0]; /* point to the element */
|
||||
rqe->rqg = rqg; /* group */
|
||||
rqe->sdno = sd->sdno; /* put in the subdisk number */
|
||||
plexoffset = max(sd->plexoffset, *diskaddr); /* start offset in plex */
|
||||
rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
|
||||
rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */
|
||||
rqe->dataoffset = 0;
|
||||
rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */
|
||||
sd->sectors - rqe->sdoffset);
|
||||
rqe->groupoffset = 0; /* no groups for concatenated plexes */
|
||||
rqe->grouplen = 0;
|
||||
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
|
||||
rqe->flags = 0;
|
||||
rqe->driveno = sd->driveno;
|
||||
*diskaddr += rqe->datalen; /* bump the address */
|
||||
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
|
||||
deallocrqg(rqg);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
}
|
||||
}
|
||||
if (*diskaddr > diskend) /* we're finished, */
|
||||
break; /* get out of here */
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_striped:
|
||||
{
|
||||
while (*diskaddr < diskend) { /* until we get it all sorted out */
|
||||
/* The offset of the start address from
|
||||
* the start of the stripe */
|
||||
stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
|
||||
|
||||
/* The plex-relative address of the
|
||||
* start of the stripe */
|
||||
stripebase = *diskaddr - stripeoffset;
|
||||
|
||||
/* The number of the subdisk in which
|
||||
* the start is located */
|
||||
sdno = stripeoffset / plex->stripesize;
|
||||
|
||||
/* The offset from the beginning of the stripe
|
||||
* on this subdisk */
|
||||
blockoffset = stripeoffset % plex->stripesize;
|
||||
|
||||
sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */
|
||||
if ((sd->state != sd_up) || (plex->state != plex_up)) {
|
||||
enum requeststatus s;
|
||||
|
||||
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
|
||||
if (s) /* give up? */
|
||||
return s; /* yup */
|
||||
}
|
||||
rqg = allocrqg(rq, 1); /* space for the request */
|
||||
if (rqg == NULL) { /* malloc failed */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM;
|
||||
}
|
||||
rqg->plexno = plexno;
|
||||
|
||||
rqe = &rqg->rqe[0]; /* point to the element */
|
||||
rqe->rqg = rqg;
|
||||
rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
|
||||
rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */
|
||||
rqe->dataoffset = 0;
|
||||
rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */
|
||||
plex->stripesize - blockoffset); /* and the amount left in this stripe */
|
||||
rqe->groupoffset = 0; /* no groups for striped plexes */
|
||||
rqe->grouplen = 0;
|
||||
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
|
||||
rqe->flags = 0;
|
||||
rqe->sdno = sd->sdno; /* put in the subdisk number */
|
||||
rqe->driveno = sd->driveno;
|
||||
|
||||
if (rqe->sdoffset >= sd->sectors) { /* starts beyond the end of the subdisk? */
|
||||
deallocrqg(rqg);
|
||||
return REQUEST_EOF;
|
||||
} else if (rqe->sdoffset + rqe->datalen > sd->sectors) /* ends beyond the end of the subdisk? */
|
||||
rqe->datalen = sd->sectors - rqe->sdoffset; /* yes, truncate */
|
||||
|
||||
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
|
||||
deallocrqg(rqg);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
}
|
||||
*diskaddr += rqe->datalen; /* look at the remainder */
|
||||
if (*diskaddr < diskend) { /* didn't finish the request on this stripe */
|
||||
plex->multiblock++; /* count another one */
|
||||
if (sdno == plex->subdisks - 1) /* last subdisk, */
|
||||
plex->multistripe++; /* another stripe as well */
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
printf("vinum: invalid plex type in bre");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Build up a request structure for reading volumes.
|
||||
* This function is not needed for plex reads, since there's
|
||||
* no recovery if a plex read can't be satisified. */
|
||||
enum requeststatus
|
||||
build_read_request(struct request *rq, /* request */
|
||||
int plexindex)
|
||||
{ /* index in the volume's plex table */
|
||||
BROKEN_GDB;
|
||||
struct buf *bp;
|
||||
daddr_t startaddr; /* offset of previous part of transfer */
|
||||
daddr_t diskaddr; /* offset of current part of transfer */
|
||||
daddr_t diskend; /* and end offset of transfer */
|
||||
int plexno; /* plex index in vinum_conf */
|
||||
struct rqgroup *rqg; /* point to the request we're working on */
|
||||
struct volume *vol; /* volume in question */
|
||||
off_t oldstart; /* note where we started */
|
||||
int recovered = 0; /* set if we recover a read */
|
||||
enum requeststatus status = REQUEST_OK;
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
diskaddr = bp->b_blkno; /* start offset of transfer */
|
||||
diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */
|
||||
rqg = &rq->rqg[plexindex]; /* plex request */
|
||||
vol = &VOL[rq->volplex.volno]; /* point to volume */
|
||||
|
||||
while (diskaddr < diskend) { /* build up request components */
|
||||
startaddr = diskaddr;
|
||||
status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
|
||||
switch (status) {
|
||||
case REQUEST_OK:
|
||||
continue;
|
||||
|
||||
case REQUEST_RECOVERED:
|
||||
recovered = 1;
|
||||
break;
|
||||
|
||||
case REQUEST_EOF:
|
||||
case REQUEST_ENOMEM:
|
||||
return status;
|
||||
|
||||
/* if we get here, we have either had a failure or
|
||||
* a RAID 5 recovery. We don't want to use the
|
||||
* recovery, because it's expensive, so first we
|
||||
* check if we have alternatives */
|
||||
case REQUEST_DOWN: /* can't access the plex */
|
||||
if (vol != NULL) { /* and this is volume I/O */
|
||||
/* Try to satisfy the request
|
||||
* from another plex */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
diskaddr = startaddr; /* start at the beginning again */
|
||||
oldstart = startaddr; /* and note where that was */
|
||||
if (plexno != plexindex) { /* don't try this plex again */
|
||||
bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
|
||||
if (diskaddr > oldstart) { /* we satisfied another part */
|
||||
recovered = 1; /* we recovered from the problem */
|
||||
status = REQUEST_OK; /* don't complain about it */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (plexno == (vol->plexes - 1)) /* couldn't satisfy the request */
|
||||
return REQUEST_DOWN; /* failed */
|
||||
}
|
||||
} else
|
||||
return REQUEST_DOWN; /* bad luck */
|
||||
}
|
||||
if (recovered)
|
||||
vol->recovered_reads += recovered; /* adjust our recovery count */
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Build up a request structure for writes.
|
||||
* Return 0 if all subdisks involved in the request are up, 1 if some
|
||||
* subdisks are not up, and -1 if the request is at least partially
|
||||
* outside the bounds of the subdisks. */
|
||||
enum requeststatus
|
||||
build_write_request(struct request *rq)
|
||||
{ /* request */
|
||||
BROKEN_GDB;
|
||||
struct buf *bp;
|
||||
daddr_t diskstart; /* offset of current part of transfer */
|
||||
daddr_t diskend; /* and end offset of transfer */
|
||||
int plexno; /* plex index in vinum_conf */
|
||||
struct volume *vol; /* volume in question */
|
||||
enum requeststatus status;
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
vol = &VOL[rq->volplex.volno]; /* point to volume */
|
||||
diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */
|
||||
status = REQUEST_OK;
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
diskstart = bp->b_blkno; /* start offset of transfer */
|
||||
status = min(status, bre(rq, /* build requests for the plex */
|
||||
vol->plex[plexno],
|
||||
&diskstart,
|
||||
diskend));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Fill in the struct buf part of a request element. */
|
||||
enum requeststatus
|
||||
build_rq_buffer(struct rqelement *rqe, struct plex *plex)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct sd *sd; /* point to subdisk */
|
||||
struct volume *vol;
|
||||
struct buf *bp;
|
||||
struct buf *ubp; /* user (high level) buffer header */
|
||||
|
||||
vol = &VOL[rqe->rqg->rq->volplex.volno];
|
||||
sd = &SD[rqe->sdno]; /* point to subdisk */
|
||||
bp = &rqe->b;
|
||||
ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */
|
||||
|
||||
/* Initialize the buf struct */
|
||||
bzero(&rqe->b, sizeof(struct buf));
|
||||
bp->b_proc = ubp->b_proc; /* process pointer */
|
||||
bp->b_flags = ubp->b_flags & (B_NOCACHE | B_READ | B_ASYNC); /* copy these flags from user bp */
|
||||
bp->b_flags |= B_CALL | B_BUSY; /* inform us when it's done */
|
||||
if (plex->state == plex_reviving)
|
||||
bp->b_flags |= B_ORDERED; /* keep request order if we're reviving */
|
||||
bp->b_iodone = complete_rqe; /* by calling us here */
|
||||
bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */
|
||||
bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */
|
||||
bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */
|
||||
bp->b_resid = bp->b_bcount; /* and it's still all waiting */
|
||||
bp->b_bufsize = bp->b_bcount; /* and buffer size */
|
||||
bp->b_vp = DRIVE[rqe->driveno].vp; /* drive vnode */
|
||||
bp->b_rcred = FSCRED; /* we have the file system credentials */
|
||||
bp->b_wcred = FSCRED; /* we have the file system credentials */
|
||||
|
||||
if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */
|
||||
bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */
|
||||
if (bp->b_data == NULL) { /* failed */
|
||||
Debugger("XXX");
|
||||
abortrequest(rqe->rqg->rq, ENOMEM);
|
||||
return REQUEST_ENOMEM; /* no memory */
|
||||
}
|
||||
} else
|
||||
/* Point directly to user buffer data. This means
|
||||
* that we don't need to do anything when we have
|
||||
* finished the transfer */
|
||||
bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
|
||||
return 0;
|
||||
}
|
||||
/* Abort a request: free resources and complete the
|
||||
* user request with the specified error */
|
||||
int
|
||||
abortrequest(struct request *rq, int error)
|
||||
{
|
||||
struct buf *bp = rq->bp; /* user buffer */
|
||||
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = error;
|
||||
freerq(rq); /* free everything we're doing */
|
||||
biodone(bp);
|
||||
return error; /* and give up */
|
||||
}
|
||||
|
||||
/* Check that our transfer will cover the
|
||||
* complete address space of the user request.
|
||||
*
|
||||
* Return 1 if it can, otherwise 0 */
|
||||
int
|
||||
check_range_covered(struct request *rq)
|
||||
{
|
||||
/* XXX */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Perform I/O on a subdisk */
|
||||
void
|
||||
sdio(struct buf *bp)
|
||||
{
|
||||
int s; /* spl */
|
||||
struct sd *sd;
|
||||
struct sdbuf *sbp;
|
||||
daddr_t endoffset;
|
||||
struct drive *drive;
|
||||
|
||||
sd = &SD[SDNO(bp->b_dev)]; /* point to the subdisk */
|
||||
drive = &DRIVE[sd->driveno];
|
||||
|
||||
if (drive->state != drive_up) { /* XXX until we get the states fixed */
|
||||
set_sd_state(SDNO(bp->b_dev), sd_obsolete, setstate_force);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = EIO;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
/* XXX decide which states we will really accept here. up
|
||||
* implies it could be involved with a plex, in which
|
||||
* case we don't want to dick with it */
|
||||
if ((sd->state != sd_up)
|
||||
&& (sd->state != sd_initializing)
|
||||
&& (sd->state != sd_reborn)) { /* we can't access it */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_flags = EIO;
|
||||
if (bp->b_flags & B_BUSY) /* XXX why isn't this always the case? */
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
/* Get a buffer */
|
||||
sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
|
||||
if (sbp == NULL) {
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
bcopy(bp, &sbp->b, sizeof(struct buf)); /* start with the user's buffer */
|
||||
sbp->b.b_flags |= B_CALL; /* tell us when it's done */
|
||||
sbp->b.b_iodone = sdio_done; /* here */
|
||||
sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */
|
||||
sbp->b.b_vp = DRIVE[sd->driveno].vp; /* vnode */
|
||||
sbp->b.b_blkno += sd->driveoffset;
|
||||
sbp->bp = bp; /* note the address of the original header */
|
||||
sbp->sdno = sd->sdno; /* note for statistics */
|
||||
sbp->driveno = sd->driveno;
|
||||
endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */
|
||||
if (endoffset > sd->sectors) { /* beyond the end */
|
||||
sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
|
||||
if (sbp->b.b_bcount <= 0) { /* nothing to transfer */
|
||||
bp->b_resid = bp->b_bcount; /* nothing transferred */
|
||||
/* XXX Grrr. This doesn't seem to work. Return
|
||||
* an error after all */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOSPC;
|
||||
biodone(bp);
|
||||
Free(sbp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if ((sbp->b.b_flags & B_READ) == 0) /* write */
|
||||
sbp->b.b_vp->v_numoutput++; /* one more output going */
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
|
||||
sbp->b.b_flags & B_READ ? "Read" : "Write",
|
||||
sbp->b.b_dev,
|
||||
sbp->sdno,
|
||||
(u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
|
||||
(int) sbp->b.b_blkno,
|
||||
sbp->b.b_bcount); /* XXX */
|
||||
if (debug & DEBUG_NUMOUTPUT)
|
||||
printf(" vinumstart sd %d numoutput %ld\n",
|
||||
sbp->sdno,
|
||||
sbp->b.b_vp->v_numoutput);
|
||||
#endif
|
||||
s = splbio();
|
||||
(*bdevsw[major(sbp->b.b_dev)]->d_strategy) (&sbp->b);
|
||||
splx(s);
|
||||
}
|
||||
|
||||
/* Simplified version of bounds_check_with_label
|
||||
* Determine the size of the transfer, and make sure it is
|
||||
* within the boundaries of the partition. Adjust transfer
|
||||
* if needed, and signal errors or early completion.
|
||||
*
|
||||
* Volumes are simpler than disk slices: they only contain
|
||||
* one component (though we call them a, b and c to make
|
||||
* system utilities happy), and they always take up the
|
||||
* complete space of the "partition".
|
||||
*
|
||||
* I'm still not happy with this: why should the label be
|
||||
* protected? If it weren't so damned difficult to write
|
||||
* one in the first pleace (because it's protected), it wouldn't
|
||||
* be a problem.
|
||||
*/
|
||||
int
|
||||
vinum_bounds_check(struct buf *bp, struct volume *vol)
|
||||
{
|
||||
int maxsize = vol->size; /* size of the partition (sectors) */
|
||||
int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
|
||||
|
||||
/* Would this transfer overwrite the disk label? */
|
||||
if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */
|
||||
#if LABELSECTOR != 0
|
||||
&& bp->b_blkno + size > LABELSECTOR /* and finishes after */
|
||||
#endif
|
||||
&& (!(vol->flags & VF_RAW)) /* and it's not raw */
|
||||
&&major(bp->b_dev) == BDEV_MAJOR /* and it's the block device */
|
||||
&& (bp->b_flags & B_READ) == 0 /* and it's a write */
|
||||
&& (!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */
|
||||
bp->b_error = EROFS; /* read-only */
|
||||
bp->b_flags |= B_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (size == 0) /* no transfer specified, */
|
||||
return 0; /* treat as EOF */
|
||||
/* beyond partition? */
|
||||
if (bp->b_blkno < 0 /* negative start */
|
||||
|| bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */
|
||||
/* if exactly at end of disk, return an EOF */
|
||||
if (bp->b_blkno == maxsize) {
|
||||
bp->b_resid = bp->b_bcount;
|
||||
return 0;
|
||||
}
|
||||
/* or truncate if part of it fits */
|
||||
size = maxsize - bp->b_blkno;
|
||||
if (size <= 0) { /* nothing to transfer */
|
||||
bp->b_error = EINVAL;
|
||||
bp->b_flags |= B_ERROR;
|
||||
return -1;
|
||||
}
|
||||
bp->b_bcount = size << DEV_BSHIFT;
|
||||
}
|
||||
bp->b_pblkno = bp->b_blkno;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Allocate a request group and hook
|
||||
* it in in the list for rq */
|
||||
struct rqgroup *
|
||||
allocrqg(struct request *rq, int elements)
|
||||
{
|
||||
struct rqgroup *rqg; /* the one we're going to allocate */
|
||||
int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
|
||||
|
||||
rqg = (struct rqgroup *) Malloc(size);
|
||||
if (rqg != NULL) { /* malloc OK, */
|
||||
if (rq->rqg) /* we already have requests */
|
||||
rq->lrqg->next = rqg; /* hang it off the end */
|
||||
else /* first request */
|
||||
rq->rqg = rqg; /* at the start */
|
||||
rq->lrqg = rqg; /* this one is the last in the list */
|
||||
|
||||
bzero(rqg, size); /* no old junk */
|
||||
rqg->rq = rq; /* point back to the parent request */
|
||||
rqg->count = elements; /* number of requests in the group */
|
||||
} else
|
||||
Debugger("XXX");
|
||||
return rqg;
|
||||
}
|
||||
|
||||
/* Deallocate a request group out of a chain. We do
|
||||
* this by linear search: the chain is short, this
|
||||
* almost never happens, and currently it can only
|
||||
* happen to the first member of the chain. */
|
||||
void
|
||||
deallocrqg(struct rqgroup *rqg)
|
||||
{
|
||||
struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */
|
||||
|
||||
if (rqg->rq->rqg == rqg) /* we're first in line */
|
||||
rqg->rq->rqg = rqg->next; /* unhook ourselves */
|
||||
else {
|
||||
while (rqgc->next != rqg) /* find the group */
|
||||
rqgc = rqgc->next;
|
||||
rqgc->next = rqg->next;
|
||||
}
|
||||
Free(rqgc);
|
||||
}
|
||||
|
||||
/* Character device interface */
|
||||
int
|
||||
vinumread(dev_t dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
return (physio(vinumstrategy, NULL, dev, 1, minphys, uio));
|
||||
}
|
||||
|
||||
int
|
||||
vinumwrite(dev_t dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
return (physio(vinumstrategy, NULL, dev, 0, minphys, uio));
|
||||
}
|
128
sys/dev/vinum/vinumrevive.c
Normal file
128
sys/dev/vinum/vinumrevive.c
Normal file
@ -0,0 +1,128 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: revive.c,v 1.1 1998/08/14 06:16:59 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
|
||||
/* revive a block of a plex. Return an error
|
||||
* indication. EAGAIN means successful copy, but
|
||||
* that more blocks remain to be copied.
|
||||
* XXX We should specify a block size here. At the moment,
|
||||
* just take a default value. FIXME */
|
||||
int
|
||||
revive_block(int plexno)
|
||||
{
|
||||
struct plex *plex = &PLEX[plexno];
|
||||
struct buf *bp;
|
||||
int error = EAGAIN;
|
||||
int size;
|
||||
int s; /* priority level */
|
||||
|
||||
if (plex->revive_blocksize == 0) {
|
||||
if (plex->stripesize != 0) /* we're striped, don't revive more than */
|
||||
plex->revive_blocksize = min(DEFAULT_REVIVE_BLOCKSIZE, plex->stripesize); /* one block at a time */
|
||||
else
|
||||
plex->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
|
||||
}
|
||||
size = min(plex->revive_blocksize, plex->length - plex->revived) << DEV_BSHIFT;
|
||||
|
||||
s = splbio();
|
||||
/* Get a buffer */
|
||||
bp = geteblk(size);
|
||||
if (bp == NULL) {
|
||||
splx(s);
|
||||
return ENOMEM;
|
||||
}
|
||||
if (bp->b_qindex != 0) /* on a queue, */
|
||||
bremfree(bp); /* remove it */
|
||||
splx(s);
|
||||
|
||||
/* Amount to transfer: block size, unless it
|
||||
* would overlap the end */
|
||||
bp->b_bufsize = size;
|
||||
bp->b_bcount = bp->b_bufsize;
|
||||
bp->b_resid = 0x0;
|
||||
bp->b_blkno = plex->revived; /* we've got this far */
|
||||
|
||||
/* XXX what about reviving anonymous plexes? */
|
||||
|
||||
/* First, read the data from the volume. We don't
|
||||
* care which plex, that's bre's job */
|
||||
bp->b_dev = VINUMBDEV(plex->volno, 0, 0, VINUM_VOLUME_TYPE); /* create the device number */
|
||||
bp->b_flags = B_BUSY | B_READ;
|
||||
vinumstart(bp, 1);
|
||||
biowait(bp);
|
||||
if (bp->b_flags & B_ERROR)
|
||||
error = bp->b_error;
|
||||
else
|
||||
/* Now write to the plex */
|
||||
{
|
||||
s = splbio();
|
||||
if (bp->b_qindex != 0) /* on a queue, */
|
||||
bremfree(bp); /* remove it */
|
||||
splx(s);
|
||||
bp->b_dev = VINUMBDEV(plex->volno, plex->volplexno, 0, VINUM_PLEX_TYPE); /* create the device number */
|
||||
|
||||
bp->b_flags = B_BUSY; /* make this a write */
|
||||
bp->b_resid = 0x0;
|
||||
vinumstart(bp, 1);
|
||||
biowait(bp);
|
||||
if (bp->b_flags & B_ERROR)
|
||||
error = bp->b_error;
|
||||
else {
|
||||
plex->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
|
||||
if (plex->revived >= plex->length) { /* finished */
|
||||
plex->revived = 0;
|
||||
plex->state = plex_up; /* do we need to do more? */
|
||||
if (plex->volno >= 0) /* we have a volume, */
|
||||
set_volume_state(plex->volno, volume_up, 0);
|
||||
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
|
||||
save_config(); /* and save the updated configuration */
|
||||
error = 0; /* we're done */
|
||||
}
|
||||
}
|
||||
while (plex->waitlist) { /* we have waiting requests */
|
||||
launch_requests(plex->waitlist, 1); /* do them now */
|
||||
plex->waitlist = plex->waitlist->next; /* and move on to the next */
|
||||
}
|
||||
}
|
||||
if (bp->b_qindex == 0) /* not on a queue, */
|
||||
brelse(bp); /* is this kosher? */
|
||||
return error;
|
||||
}
|
755
sys/dev/vinum/vinumstate.c
Normal file
755
sys/dev/vinum/vinumstate.c
Normal file
@ -0,0 +1,755 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: state.c,v 2.6 1998/08/19 08:04:47 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
|
||||
/* Update drive state */
|
||||
/* Return 1 if the state changes, otherwise 0 */
|
||||
int
|
||||
set_drive_state(int driveno, enum drivestate state, int flags)
|
||||
{
|
||||
struct drive *drive = &DRIVE[driveno];
|
||||
int oldstate = drive->state;
|
||||
int sdno;
|
||||
|
||||
if (drive->state == drive_unallocated) /* no drive to do anything with, */
|
||||
return 0;
|
||||
|
||||
if (state != oldstate) { /* don't change it if it's not different */
|
||||
if (state == drive_down) { /* the drive's going down */
|
||||
if (flags || (drive->opencount == 0)) { /* we can do it */
|
||||
close_drive(drive);
|
||||
drive->state = state;
|
||||
printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
|
||||
} else
|
||||
return 0; /* don't do it */
|
||||
}
|
||||
drive->state = state; /* set the state */
|
||||
if (((drive->state == drive_up)
|
||||
|| ((drive->state == drive_coming_up)))
|
||||
&& (drive->vp == NULL)) /* should be open, but we're not */
|
||||
init_drive(drive); /* which changes the state again */
|
||||
if ((state != oldstate) /* state has changed */
|
||||
&&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
|
||||
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
|
||||
if (SD[sdno].driveno == driveno) /* belongs to this drive */
|
||||
set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
|
||||
}
|
||||
save_config(); /* and save the updated configuration */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try to set the subdisk state. Return 1 if state changed to
|
||||
* what we wanted, -1 if it changed to something else, and 0
|
||||
* if no change.
|
||||
*
|
||||
* This routine is called both from the user (up, down states
|
||||
* only) and internally.
|
||||
*/
|
||||
int
|
||||
set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
|
||||
{
|
||||
struct sd *sd = &SD[sdno];
|
||||
int oldstate = sd->state;
|
||||
int status = 1; /* status to return */
|
||||
|
||||
if (state == oldstate)
|
||||
return 0; /* no change */
|
||||
|
||||
if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
|
||||
return 0;
|
||||
|
||||
if (sd->driveoffset < 0) { /* not allocated space */
|
||||
sd->state = sd_down;
|
||||
if (state != sd_down)
|
||||
return -1;
|
||||
} else { /* space allocated */
|
||||
switch (state) {
|
||||
case sd_down:
|
||||
if ((!flags & setstate_force) /* but gently */
|
||||
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
|
||||
return 0; /* don't do it */
|
||||
break;
|
||||
|
||||
case sd_up:
|
||||
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
|
||||
return 0; /* not even by force */
|
||||
switch (sd->state) {
|
||||
case sd_obsolete:
|
||||
case sd_down: /* been down, no data lost */
|
||||
if ((sd->plexno) /* we're associated with a plex */
|
||||
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||||
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||||
break;
|
||||
/* XXX Get this right: make sure that other plexes in
|
||||
* the volume cover this address space, otherwise
|
||||
* we make this one sd_up */
|
||||
sd->state = sd_reborn; /* here it is again */
|
||||
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||||
status = -1;
|
||||
break;
|
||||
|
||||
case sd_init: /* brand new */
|
||||
if (flags & setstate_configuring) /* we're doing this while configuring */
|
||||
break;
|
||||
sd->state = sd_empty; /* nothing in it */
|
||||
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||||
status = -1;
|
||||
break;
|
||||
|
||||
case sd_initializing:
|
||||
break; /* go on and do it */
|
||||
|
||||
case sd_empty:
|
||||
if ((sd->plexno) /* we're associated with a plex */
|
||||
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||||
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||||
break;
|
||||
return 0; /* can't do it */
|
||||
|
||||
default: /* can't do it */
|
||||
/* There's no way to bring subdisks up directly from
|
||||
* other states. First they need to be initialized
|
||||
* or revived */
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default: /* other ones, only internal with force */
|
||||
if (flags & setstate_force == 0) /* no force? What's this? */
|
||||
return 0; /* don't do it */
|
||||
}
|
||||
}
|
||||
sd->state = state;
|
||||
printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
|
||||
if ((flags & setstate_norecurse) == 0)
|
||||
set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Called from request routines when they find
|
||||
* a subdisk which is not kosher. Decide whether
|
||||
* it warrants changing the state. Return
|
||||
* REQUEST_DOWN if we can't use the subdisk,
|
||||
* REQUEST_OK if we can. */
|
||||
enum requeststatus
|
||||
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
|
||||
{
|
||||
struct plex *plex = &PLEX[sd->plexno];
|
||||
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
|
||||
|
||||
/* first, see if the plex wants to be accessed */
|
||||
switch (plex->state) {
|
||||
case plex_reviving:
|
||||
/* When writing, we'll write anything that starts
|
||||
* up to the current revive pointer, but we'll
|
||||
* only accept a read which finishes before the
|
||||
* current revive pointer.
|
||||
*/
|
||||
if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
|
||||
||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
|
||||
if (writeop) { /* writing to a consistent down disk */
|
||||
if (DRIVE[sd->driveno].state == drive_up)
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||||
else
|
||||
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||||
}
|
||||
return REQUEST_DOWN; /* that part of the plex is still down */
|
||||
} else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
|
||||
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case plex_up:
|
||||
case plex_degraded:
|
||||
case plex_flaky:
|
||||
/* We can access the plex: let's see
|
||||
* how the subdisk feels */
|
||||
switch (sd->state) {
|
||||
case sd_up:
|
||||
return REQUEST_OK;
|
||||
|
||||
case sd_reborn:
|
||||
if (writeop)
|
||||
return REQUEST_OK; /* always write to a reborn disk */
|
||||
/* Handle the mapping. We don't want to reject
|
||||
* a read request to a reborn subdisk if that's
|
||||
* all we have. XXX */
|
||||
return REQUEST_DOWN;
|
||||
|
||||
case sd_down:
|
||||
case sd_crashed:
|
||||
if (writeop) { /* writing to a consistent down disk */
|
||||
if (DRIVE[sd->driveno].state == drive_up)
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||||
else
|
||||
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||||
}
|
||||
return REQUEST_DOWN; /* and it's down one way or another */
|
||||
|
||||
default:
|
||||
return REQUEST_DOWN;
|
||||
}
|
||||
|
||||
default:
|
||||
return REQUEST_DOWN;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
add_defective_region(struct plex *plex, off_t offset, size_t length)
|
||||
{
|
||||
/* XXX get this ordered, and coalesce regions if necessary */
|
||||
if (++plex->defective_regions > plex->defective_region_count)
|
||||
EXPAND(plex->defective_region,
|
||||
struct plexregion,
|
||||
plex->defective_region_count,
|
||||
PLEX_REGION_TABLE_SIZE);
|
||||
plex->defective_region[plex->defective_regions - 1].offset = offset;
|
||||
plex->defective_region[plex->defective_regions - 1].length = length;
|
||||
}
|
||||
|
||||
void
|
||||
add_unmapped_region(struct plex *plex, off_t offset, size_t length)
|
||||
{
|
||||
if (++plex->unmapped_regions > plex->unmapped_region_count)
|
||||
EXPAND(plex->unmapped_region,
|
||||
struct plexregion,
|
||||
plex->unmapped_region_count,
|
||||
PLEX_REGION_TABLE_SIZE);
|
||||
plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
|
||||
plex->unmapped_region[plex->unmapped_regions - 1].length = length;
|
||||
}
|
||||
|
||||
/* Rebuild a plex free list and set state if
|
||||
* we have a configuration error */
|
||||
void
|
||||
rebuild_plex_unmappedlist(struct plex *plex)
|
||||
{
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
int lastsdend = 0; /* end offset of last subdisk */
|
||||
|
||||
if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
|
||||
Free(plex->unmapped_region);
|
||||
plex->unmapped_region = NULL;
|
||||
plex->unmapped_regions = 0;
|
||||
plex->unmapped_region_count = 0;
|
||||
}
|
||||
if (plex->defective_region != NULL) {
|
||||
Free(plex->defective_region);
|
||||
plex->defective_region = NULL;
|
||||
plex->defective_regions = 0;
|
||||
plex->defective_region_count = 0;
|
||||
}
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
if (sd->plexoffset < lastsdend) { /* overlap */
|
||||
printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
|
||||
set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
|
||||
} else if (sd->plexoffset > lastsdend) /* gap */
|
||||
add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
|
||||
else if (sd->state < sd_reborn) /* this part defective */
|
||||
add_defective_region(plex, sd->plexoffset, sd->sectors);
|
||||
lastsdend = sd->plexoffset + sd->sectors;
|
||||
}
|
||||
}
|
||||
|
||||
/* return a state map for the subdisks of a plex */
|
||||
enum sdstates
|
||||
sdstatemap(struct plex *plex, int *sddowncount)
|
||||
{
|
||||
int sdno;
|
||||
enum sdstates statemap = 0; /* note the states we find */
|
||||
|
||||
*sddowncount = 0; /* no subdisks down yet */
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
|
||||
|
||||
switch (sd->state) {
|
||||
case sd_empty:
|
||||
statemap |= sd_emptystate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_init:
|
||||
statemap |= sd_initstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_down:
|
||||
statemap |= sd_downstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_crashed:
|
||||
statemap |= sd_crashedstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_obsolete:
|
||||
statemap |= sd_obsolete;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_stale:
|
||||
statemap |= sd_stalestate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_reborn:
|
||||
statemap |= sd_rebornstate;
|
||||
break;
|
||||
|
||||
case sd_up:
|
||||
statemap |= sd_upstate;
|
||||
break;
|
||||
|
||||
default:
|
||||
statemap |= sd_otherstate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return statemap;
|
||||
}
|
||||
|
||||
/* determine the state of the volume relative to this plex */
|
||||
enum volplexstate
|
||||
vpstate(struct plex *plex)
|
||||
{
|
||||
struct volume *vol;
|
||||
enum volplexstate state = volplex_onlyusdown; /* state to return */
|
||||
int plexno;
|
||||
|
||||
if (plex->volno < 0) /* not associated with a volume */
|
||||
return volplex_onlyusdown; /* assume the worst */
|
||||
|
||||
vol = &VOL[plex->volno]; /* point to our volume */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
|
||||
if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
|
||||
state |= volplex_onlyus; /* yes */
|
||||
} else {
|
||||
if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
|
||||
state |= volplex_otherup; /* and when they were up, they were up */
|
||||
else
|
||||
state |= volplex_alldown; /* and when they were down, they were down */
|
||||
}
|
||||
}
|
||||
return state; /* and when they were only halfway up */
|
||||
} /* they were neither up nor down */
|
||||
|
||||
/* Check if all bits b are set in a */
|
||||
int allset(int a, int b);
|
||||
|
||||
int
|
||||
allset(int a, int b)
|
||||
{
|
||||
return (a & b) == b;
|
||||
}
|
||||
|
||||
/* Update the state of a plex dependent on its subdisks.
|
||||
* Also rebuild the unmapped_region and defective_region table */
|
||||
int
|
||||
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
|
||||
{
|
||||
int sddowncount = 0; /* number of down subdisks */
|
||||
struct plex *plex = &PLEX[plexno]; /* point to our plex */
|
||||
enum plexstate oldstate = plex->state;
|
||||
enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
|
||||
enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
|
||||
|
||||
if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
|
||||
return 0; /* no change */
|
||||
|
||||
if (plex->state == plex_unallocated) /* no plex to do anything with, */
|
||||
return 0;
|
||||
|
||||
switch (state) {
|
||||
case plex_up:
|
||||
if ((plex->state == plex_initializing) /* we're initializing */
|
||||
&&(statemap != sd_upstate)) /* but SDs aren't up yet */
|
||||
return 0; /* do nothing */
|
||||
|
||||
/* We don't really care what our state was before
|
||||
* if we want to come up. We rely entirely on the
|
||||
* state of our subdisks and our volume */
|
||||
switch (vps) {
|
||||
case volplex_onlyusdown:
|
||||
case volplex_alldown: /* another plex is down, and so are we */
|
||||
if (statemap == sd_upstate) { /* all subdisks ready for action */
|
||||
if ((plex->state == plex_init) /* we're brand spanking new */
|
||||
&&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
|
||||
/* Conceptually, an empty plex does not contain valid data,
|
||||
* but normally we'll see this state when we have just
|
||||
* created a plex, and it's either consistent from earlier,
|
||||
* or we don't care about the previous contents (we're going
|
||||
* to create a file system or use it for swap).
|
||||
*
|
||||
* We need to do this in one swell foop: on the next call
|
||||
* we will no longer be just empty.
|
||||
*
|
||||
* We'll still come back to this function for the remaining
|
||||
* plexes in the volume. They'll be up already, so that
|
||||
* doesn't change anything, but it's not worth the additional
|
||||
* code to stop doing it. */
|
||||
struct volume *vol = &VOL[plex->volno];
|
||||
int plexno;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++)
|
||||
PLEX[vol->plex[plexno]].state = plex_up;
|
||||
}
|
||||
plex->state = plex_up; /* bring up up, anyway */
|
||||
} else
|
||||
plex->state = plex_down;
|
||||
break;
|
||||
|
||||
case volplex_onlyusup: /* only we are up: others are down */
|
||||
case volplex_onlyus: /* we're up and alone */
|
||||
if ((statemap == sd_upstate) /* subdisks all up */
|
||||
||(statemap == sd_emptystate)) /* or all empty */
|
||||
plex->state = plex_up; /* go for it */
|
||||
else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||||
plex->state = plex_flaky;
|
||||
else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
|
||||
plex->state = plex_degraded; /* so far no corruption */
|
||||
else
|
||||
plex->state = plex_faulty;
|
||||
break;
|
||||
|
||||
case volplex_otherup: /* another plex is up */
|
||||
case volplex_otherupdown: /* other plexes are up and down */
|
||||
if ((statemap == sd_upstate) /* subdisks all up */
|
||||
||(statemap == sd_emptystate) /* or all empty */
|
||||
) {
|
||||
/* Is the data in all subdisks valid? */
|
||||
if (statemap == statemap & (sd_downstate | sd_rebornstate | sd_upstate))
|
||||
break; /* yes, we can bring the plex up */
|
||||
plex->state = plex_reviving; /* we need reviving */
|
||||
return EAGAIN;
|
||||
} else
|
||||
plex->state = plex_faulty; /* still in error */
|
||||
break;
|
||||
|
||||
case volplex_allup: /* all plexes are up */
|
||||
case volplex_someup:
|
||||
if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||||
break; /* no change */
|
||||
else
|
||||
plex->state = plex_degraded; /* we're not all there */
|
||||
}
|
||||
|
||||
if (plex->state != oldstate)
|
||||
break;
|
||||
return 0; /* no change */
|
||||
|
||||
case plex_down: /* want to take it down */
|
||||
if (((vps == volplex_onlyus) /* we're the only one up */
|
||||
||(vps == volplex_onlyusup)) /* we're the only one up */
|
||||
&&(!(flags & setstate_force))) /* and we don't want to use force */
|
||||
return 0; /* can't do it */
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
/* This is only requested by the driver.
|
||||
* Trust ourselves */
|
||||
case plex_faulty:
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
case plex_initializing:
|
||||
/* XXX consider what safeguards we need here */
|
||||
if ((flags & setstate_force) == 0)
|
||||
return 0;
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
/* What's this? */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
|
||||
/* Now see what we have left, and whether
|
||||
* we're taking the volume down */
|
||||
if (plex->volno >= 0) { /* we have a volume */
|
||||
struct volume *vol = &VOL[plex->volno];
|
||||
|
||||
vps = vpstate(plex); /* get our combined state again */
|
||||
if ((flags & setstate_norecurse) == 0) { /* we can recurse */
|
||||
if ((vol->state == volume_up)
|
||||
&& (vps == volplex_alldown)) /* and we're all down */
|
||||
set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
|
||||
else if ((vol->state == volume_down)
|
||||
&& (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
|
||||
set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
|
||||
}
|
||||
}
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Update the state of a plex dependent on its plexes.
|
||||
* Also rebuild the unmapped_region and defective_region table */
|
||||
int
|
||||
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
|
||||
{
|
||||
int plexno;
|
||||
enum plexstates {
|
||||
plex_downstate = 1, /* found a plex which is down */
|
||||
plex_degradedstate = 2, /* found a plex which is halfway up */
|
||||
plex_upstate = 4 /* found a plex which is completely up */
|
||||
};
|
||||
|
||||
int plexstatemap = 0; /* note the states we find */
|
||||
struct volume *vol = &VOL[volno]; /* point to our volume */
|
||||
|
||||
if (vol->state == state) /* we're there already */
|
||||
return 0; /* no change */
|
||||
if (vol->state == volume_unallocated) /* no volume to do anything with, */
|
||||
return 0;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
|
||||
switch (plex->state) {
|
||||
case plex_degraded:
|
||||
case plex_flaky:
|
||||
case plex_reviving:
|
||||
plexstatemap |= plex_degradedstate;
|
||||
break;
|
||||
|
||||
case plex_up:
|
||||
plexstatemap |= plex_upstate;
|
||||
break;
|
||||
|
||||
default:
|
||||
plexstatemap |= plex_downstate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == volume_up) { /* want to come up */
|
||||
if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
|
||||
vol->state = volume_up; /* did it */
|
||||
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
/* Here we should check whether we have enough
|
||||
* coverage for the complete volume. Writeme XXX */
|
||||
} else if (state == volume_down) { /* want to go down */
|
||||
if ((vol->opencount == 0) /* not open */
|
||||
||(flags & setstate_force != 0)) { /* or we're forcing */
|
||||
vol->state = volume_down;
|
||||
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0; /* no change */
|
||||
}
|
||||
|
||||
/* Start an object, in other words do what we can to get it up.
|
||||
* This is called from vinumioctl (VINUMSTART).
|
||||
* Return error indications via ioctl_reply
|
||||
*/
|
||||
void
|
||||
start_object(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int status;
|
||||
int realstatus; /* what we really have */
|
||||
int objindex = data->index; /* data gets overwritten */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||||
|
||||
switch (data->type) {
|
||||
case drive_object:
|
||||
status = set_drive_state(objindex, drive_up, setstate_none);
|
||||
realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case sd_object:
|
||||
status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
|
||||
realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
if (PLEX[objindex].state == plex_reviving) { /* reviving, */
|
||||
ioctl_reply->error = revive_block(objindex); /* revive another block */
|
||||
ioctl_reply->msg[0] = '\0'; /* no comment */
|
||||
return;
|
||||
}
|
||||
status = set_plex_state(objindex, plex_up, setstate_none);
|
||||
realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
status = set_volume_state(objindex, volume_up, setstate_none);
|
||||
realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
default:
|
||||
ioctl_reply->error = EINVAL;
|
||||
strcpy(ioctl_reply->msg, "Invalid object type");
|
||||
return;
|
||||
}
|
||||
/* There's no point in saying anything here:
|
||||
* the userland program does it better */
|
||||
ioctl_reply->msg[0] = '\0';
|
||||
if (realstatus == 0) /* couldn't do it */
|
||||
ioctl_reply->error = EINVAL;
|
||||
else
|
||||
ioctl_reply->error = 0;
|
||||
}
|
||||
|
||||
/* Stop an object, in other words do what we can to get it down
|
||||
* This is called from vinumioctl (VINUMSTOP).
|
||||
* Return error indications via ioctl_reply.
|
||||
*/
|
||||
void
|
||||
stop_object(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int status = 1;
|
||||
int objindex = data->index; /* save the number from change */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||||
|
||||
switch (data->type) {
|
||||
case drive_object:
|
||||
status = set_drive_state(objindex, drive_down, data->force);
|
||||
break;
|
||||
|
||||
case sd_object:
|
||||
status = set_sd_state(objindex, sd_down, data->force);
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
status = set_plex_state(objindex, plex_down, data->force);
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
status = set_volume_state(objindex, volume_down, data->force);
|
||||
break;
|
||||
|
||||
default:
|
||||
ioctl_reply->error = EINVAL;
|
||||
strcpy(ioctl_reply->msg, "Invalid object type");
|
||||
return;
|
||||
}
|
||||
ioctl_reply->msg[0] = '\0';
|
||||
if (status == 0) /* couldn't do it */
|
||||
ioctl_reply->error = EINVAL;
|
||||
else
|
||||
ioctl_reply->error = 0;
|
||||
}
|
||||
|
||||
/* VINUM_SETSTATE ioctl: set an object state
|
||||
* msg is the message passed by the user */
|
||||
void
|
||||
setstate(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
|
||||
|
||||
switch (msg->state) {
|
||||
case object_down:
|
||||
stop_object(msg);
|
||||
break;
|
||||
|
||||
case object_initializing:
|
||||
switch (msg->type) {
|
||||
case sd_object:
|
||||
sd = &SD[msg->index];
|
||||
if ((msg->index >= vinum_conf.subdisks_used)
|
||||
|| (sd->state == sd_unallocated)) {
|
||||
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||||
ioctl_reply->error = EFAULT;
|
||||
return;
|
||||
}
|
||||
set_sd_state(msg->index, sd_initializing, msg->force);
|
||||
if (sd->state != sd_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
} else
|
||||
ioctl_reply->error = 0;
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
plex = &PLEX[msg->index];
|
||||
if ((msg->index >= vinum_conf.plexes_used)
|
||||
|| (plex->state == plex_unallocated)) {
|
||||
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||||
ioctl_reply->error = EFAULT;
|
||||
return;
|
||||
}
|
||||
set_plex_state(msg->index, plex_initializing, msg->force);
|
||||
if (plex->state != plex_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
} else {
|
||||
ioctl_reply->error = 0;
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
|
||||
if (sd->state != sd_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
strcpy(ioctl_reply->msg, "Invalid object");
|
||||
ioctl_reply->error = EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
case object_up:
|
||||
start_object(msg);
|
||||
}
|
||||
}
|
213
sys/dev/vinum/vinumstate.h
Normal file
213
sys/dev/vinum/vinumstate.h
Normal file
@ -0,0 +1,213 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumstate.h,v 1.11 1998/08/04 06:22:49 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file gets read by makestatetext to create text files
|
||||
* with the names of the states, so don't change the file
|
||||
* format */
|
||||
|
||||
enum volumestate {
|
||||
volume_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
volume_uninit,
|
||||
/* mentioned elsewhere but not defined */
|
||||
|
||||
volume_down,
|
||||
|
||||
/* The volume is up and functional, but not all plexes may be available */
|
||||
volume_up,
|
||||
volume_laststate = volume_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum plexstate {
|
||||
/* An empty entry, not a plex at all. */
|
||||
plex_unallocated,
|
||||
|
||||
/* The plex has been allocated, but there configuration
|
||||
* is not complete */
|
||||
plex_init,
|
||||
|
||||
/* A plex which has gone completely down because of
|
||||
* I/O errors. */
|
||||
plex_faulty,
|
||||
|
||||
/* A plex which has been taken down by the
|
||||
* administrator. */
|
||||
plex_down,
|
||||
|
||||
/* A plex which is currently being brought up after
|
||||
* being not up. This involves copying data from
|
||||
* another plex */
|
||||
plex_reviving,
|
||||
|
||||
/* A plex which is being initialized */
|
||||
plex_initializing,
|
||||
|
||||
/* *** The remaining states represent plexes which are
|
||||
* at least partially up. Keep these separate so that
|
||||
* they can be checked more easily. */
|
||||
|
||||
/* A plex entry which is at least partially up. Not
|
||||
* all subdisks are available, and an inconsistency
|
||||
* has occurred. If no other plex is uncorrupted,
|
||||
* the volume is no longer consistent. */
|
||||
plex_corrupt,
|
||||
|
||||
plex_firstup = plex_corrupt, /* first "up" state */
|
||||
|
||||
/* A plex entry which is at least partially up. Not
|
||||
* all subdisks are available, but so far no
|
||||
* inconsistency has occurred (this will change with
|
||||
* the first write to the address space occupied by
|
||||
* a defective subdisk). A RAID 5 plex with one subdisk
|
||||
* down will remain degraded even after a write */
|
||||
plex_degraded,
|
||||
|
||||
/* A plex which is really up, but which has a reborn
|
||||
* subdisk which we don't completely trust, and
|
||||
* which we don't want to read if we can avoid it */
|
||||
plex_flaky,
|
||||
|
||||
/* A plex entry which is completely up. All subdisks
|
||||
* are up. */
|
||||
plex_up,
|
||||
|
||||
plex_laststate = plex_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
/* subdisk states */
|
||||
enum sdstate {
|
||||
/* An empty entry, not a subdisk at all. */
|
||||
sd_unallocated,
|
||||
|
||||
/* A subdisk entry which has not been created
|
||||
* completely. Some fields may be empty.
|
||||
*/
|
||||
sd_uninit,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, but the disk hasn't
|
||||
* been updated.
|
||||
*/
|
||||
sd_init,
|
||||
|
||||
/* A subdisk entry which has been created completely and
|
||||
* which is currently being initialized */
|
||||
sd_initializing,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, and the disk has been
|
||||
* updated, but there is no data on the disk.
|
||||
*/
|
||||
sd_empty,
|
||||
|
||||
/* *** The following states represent invalid data */
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down, and as a result updates have been
|
||||
* missed.
|
||||
*/
|
||||
sd_obsolete,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down, updates have been lost, and then
|
||||
* the drive came up again.
|
||||
*/
|
||||
sd_stale,
|
||||
|
||||
/* *** The following states represent valid, inaccessible data */
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down. No attempt has been made to write
|
||||
* to the subdisk since the crash.
|
||||
*/
|
||||
sd_crashed,
|
||||
|
||||
/* A subdisk entry which was up, which contained
|
||||
* valid data, and which was taken down by the
|
||||
* administrator. The data is valid. */
|
||||
sd_down,
|
||||
|
||||
/* *** The following states represent accessible subdisks
|
||||
* with valid data */
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down and up again. No updates were lost,
|
||||
* but it is possible that the subdisk has been
|
||||
* damaged. We won't read from this subdisk if we
|
||||
* have a choice. If this is the only subdisk which
|
||||
* covers this address space in the plex, we set its
|
||||
* state to sd_up under these circumstances, so this
|
||||
* status implies that there is another subdisk to
|
||||
* fulfil the request.
|
||||
*/
|
||||
sd_reborn,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data is valid.
|
||||
*/
|
||||
sd_up,
|
||||
|
||||
sd_laststate = sd_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum drivestate {
|
||||
drive_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
drive_uninit,
|
||||
/* just mentioned in some other config entry */
|
||||
|
||||
drive_down,
|
||||
/* not accessible */
|
||||
|
||||
drive_coming_up,
|
||||
/* in the process of being brought up */
|
||||
|
||||
drive_up,
|
||||
/* up and running */
|
||||
|
||||
drive_laststate = drive_up /* last value, for table dimensions */
|
||||
};
|
211
sys/dev/vinum/vinumutil.c
Normal file
211
sys/dev/vinum/vinumutil.c
Normal file
@ -0,0 +1,211 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: util.c,v 1.7 1998/08/07 09:23:10 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file contains utility routines used both in kernel and user context */
|
||||
|
||||
#include "vinumhdr.h"
|
||||
#include "statetexts.h"
|
||||
#ifndef REALLYKERNEL
|
||||
#include <stdio.h>
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
#endif
|
||||
|
||||
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
|
||||
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
|
||||
/* Return drive state as a string */
|
||||
char *
|
||||
drive_state(enum drivestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(drive)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return drivestatetext[state];
|
||||
}
|
||||
|
||||
/* Return volume state as a string */
|
||||
char *
|
||||
volume_state(enum volumestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(vol)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return volstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex state as a string */
|
||||
char *
|
||||
plex_state(enum plexstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(plex)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return plexstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex organization as a string */
|
||||
char *
|
||||
plex_org(enum plexorg org)
|
||||
{
|
||||
switch (org) {
|
||||
case plex_disorg: /* disorganized */
|
||||
return "disorg";
|
||||
break;
|
||||
|
||||
case plex_concat: /* concatenated plex */
|
||||
return "concat";
|
||||
break;
|
||||
|
||||
case plex_striped: /* striped plex */
|
||||
return "striped";
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
sprintf(numeric_state, "Invalid org %d", (int) org);
|
||||
return numeric_state;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return sd state as a string */
|
||||
char *
|
||||
sd_state(enum sdstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(sd)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return sdstatetext[state];
|
||||
}
|
||||
|
||||
/* Now convert in the other direction */
|
||||
/* These are currently used only internally,
|
||||
* so we don't do too much error checking */
|
||||
enum drivestate
|
||||
DriveState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(drive); i++)
|
||||
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
|
||||
return (enum drivestate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum sdstate
|
||||
SdState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(sd); i++)
|
||||
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
|
||||
return (enum sdstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum plexstate
|
||||
PlexState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(plex); i++)
|
||||
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
|
||||
return (enum plexstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum volumestate
|
||||
VolState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(vol); i++)
|
||||
if (strcmp(text, volstatetext[i]) == 0) /* found it */
|
||||
return (enum volstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Take a number with an optional scale factor and convert
|
||||
* it to a number of bytes.
|
||||
*
|
||||
* The scale factors are:
|
||||
*
|
||||
* b blocks (of 512 bytes)
|
||||
* k kilobytes (1024 bytes)
|
||||
* m megabytes (of 1024 * 1024 bytes)
|
||||
* g gigabytes (of 1024 * 1024 * 1024 bytes)
|
||||
*/
|
||||
u_int64_t
|
||||
sizespec(char *spec)
|
||||
{
|
||||
u_int64_t size;
|
||||
char *s;
|
||||
|
||||
size = 0;
|
||||
s = spec;
|
||||
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
|
||||
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
|
||||
size = size * 10 + *s++ - '0'; /* convert it */
|
||||
switch (*s) {
|
||||
case '\0':
|
||||
return size;
|
||||
|
||||
case 'B':
|
||||
case 'b':
|
||||
return size * 512;
|
||||
|
||||
case 'K':
|
||||
case 'k':
|
||||
return size * 1024;
|
||||
|
||||
case 'M':
|
||||
case 'm':
|
||||
return size * 1024 * 1024;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return size * 1024 * 1024 * 1024;
|
||||
}
|
||||
}
|
||||
#ifdef REALLYKERNEL
|
||||
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
|
||||
#else
|
||||
fprintf(stderr, "Invalid length specification: %s", spec);
|
||||
longjmp(command_fail, -1);
|
||||
#endif
|
||||
/* NOTREACHED */
|
||||
return -1;
|
||||
}
|
510
sys/dev/vinum/vinumvar.h
Normal file
510
sys/dev/vinum/vinumvar.h
Normal file
@ -0,0 +1,510 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumvar.h,v 1.15 1998/08/14 06:36:41 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* XXX gdb can't find our global pointers, so use this kludge to
|
||||
* point to them locally. Remove after testing */
|
||||
#define BROKEN_GDB struct _vinum_conf *VC = &vinum_conf
|
||||
|
||||
#include <sys/time.h>
|
||||
#include "vinumstate.h"
|
||||
/* Some configuration maxima. They're an enum because
|
||||
* we can't define global constants. Sorry about that.
|
||||
*
|
||||
* These aren't as bad as they look: most of them
|
||||
* are soft limits. Only the MAXCONFIG parameter is set in stone
|
||||
*/
|
||||
|
||||
enum constants {
|
||||
VINUM_HEADER = 512, /* size of header on disk */
|
||||
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
|
||||
/* XXX Do we still need this? */
|
||||
MINVINUMSLICE = 1048576, /* minimum size of a slice */
|
||||
|
||||
CDEV_MAJOR = 91, /* major number for character device */
|
||||
BDEV_MAJOR = 25, /* and block device */
|
||||
|
||||
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
|
||||
|
||||
/* type field in minor number */
|
||||
VINUM_VOLUME_TYPE = 0,
|
||||
VINUM_PLEX_TYPE = 1,
|
||||
VINUM_SD_TYPE = 2,
|
||||
VINUM_DRIVE_TYPE = 3,
|
||||
VINUM_SUPERDEV_TYPE = 4, /* super device. */
|
||||
|
||||
/* Shifts for the individual fields in the device */
|
||||
VINUM_TYPE_SHIFT = 28,
|
||||
VINUM_VOL_SHIFT = 0,
|
||||
VINUM_PLEX_SHIFT = 16,
|
||||
VINUM_SD_SHIFT = 20,
|
||||
VINUM_VOL_WIDTH = 8,
|
||||
VINUM_PLEX_WIDTH = 3,
|
||||
VINUM_SD_WIDTH = 8,
|
||||
MAJORDEV_SHIFT = 8,
|
||||
|
||||
|
||||
/* Create a block device number */
|
||||
#define VINUMBDEV(v,p,s,t) ((BDEV_MAJOR << MAJORDEV_SHIFT) \
|
||||
| (v << VINUM_VOL_SHIFT) \
|
||||
| (p << VINUM_PLEX_SHIFT) \
|
||||
| (s << VINUM_SD_SHIFT) \
|
||||
| (t << VINUM_TYPE_SHIFT) )
|
||||
|
||||
/* And a character device number */
|
||||
#define VINUMCDEV(v,p,s,t) ((CDEV_MAJOR << MAJORDEV_SHIFT) \
|
||||
| (v << VINUM_VOL_SHIFT) \
|
||||
| (p << VINUM_PLEX_SHIFT) \
|
||||
| (s << VINUM_SD_SHIFT) \
|
||||
| (t << VINUM_TYPE_SHIFT) )
|
||||
|
||||
/* extract device type */
|
||||
#define DEVTYPE(x) ((x >> VINUM_TYPE_SHIFT) & 7)
|
||||
|
||||
/* extract volume number */
|
||||
#define VOLNO(x) (x & ((1 << VINUM_VOL_WIDTH) - 1))
|
||||
|
||||
/* extract plex number */
|
||||
#define PLEXNO(x) (VOL [VOLNO (x)].plex [(x >> VINUM_PLEX_SHIFT) & ((1 << VINUM_PLEX_WIDTH) - 1)])
|
||||
|
||||
/* extract subdisk number */
|
||||
#define SDNO(x) (PLEX [PLEXNO (x)].sdnos [(x >> VINUM_SD_SHIFT) & ((1 << VINUM_SD_WIDTH) - 1)])
|
||||
|
||||
/* extract drive number */
|
||||
#define DRIVENO(x) (SD [SDNO (x)].driveno)
|
||||
|
||||
VINUM_SUPERDEV = VINUMBDEV(0, 0, 0, VINUM_SUPERDEV_TYPE), /* superdevice number */
|
||||
|
||||
/* the number of object entries to cater for initially, and also the
|
||||
* value by which they are incremented. It doesn't take long
|
||||
* to extend them, so theoretically we could start with 1 of each, but
|
||||
* it's untidy to allocate such small areas. These values are
|
||||
* probably too small.
|
||||
*/
|
||||
|
||||
INITIAL_DRIVES = 4,
|
||||
INITIAL_VOLUMES = 4,
|
||||
INITIAL_PLEXES = 8,
|
||||
INITIAL_SUBDISKS = 16,
|
||||
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
|
||||
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
|
||||
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
|
||||
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
|
||||
INITIAL_LOCKS = 8, /* number of locks to allocate to a volume */
|
||||
DEFAULT_REVIVE_BLOCKSIZE = 32768, /* size of block to transfer in one op */
|
||||
};
|
||||
|
||||
/* device numbers */
|
||||
|
||||
/*
|
||||
* 31 30 28 27 20 19 18 16 15 8 7 0
|
||||
* |-----------------------------------------------------------------------------------------------|
|
||||
* |X | Type | Subdisk number | X| Plex | Major number | volume number |
|
||||
* |-----------------------------------------------------------------------------------------------|
|
||||
*
|
||||
* 0x2 03 1 19 06
|
||||
*/
|
||||
struct devcode {
|
||||
/* CARE. These fields assume a big-endian word. On a
|
||||
* little-endian system, they're the wrong way around */
|
||||
unsigned volume:8; /* up to 256 volumes */
|
||||
unsigned major:8; /* this is where the major number fits */
|
||||
unsigned plex:3; /* up to 8 plexes per volume */
|
||||
unsigned unused:1; /* up for grabs */
|
||||
unsigned sd:8; /* up to 256 subdisks per plex */
|
||||
unsigned type:3; /* type of object */
|
||||
/* type field
|
||||
VINUM_VOLUME = 0,
|
||||
VINUM_PLEX = 1,
|
||||
VINUM_SUBDISK = 2,
|
||||
VINUM_DRIVE = 3,
|
||||
VINUM_SUPERDEV = 4, */
|
||||
unsigned signbit:1; /* to make 32 bits */
|
||||
};
|
||||
|
||||
#define VINUM_DIR "/dev/vinum"
|
||||
#define VINUM_RDIR "/dev/rvinum"
|
||||
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control"
|
||||
#define MAXDRIVENAME 32 /* maximum length of a device name */
|
||||
#define MAXSDNAME 64 /* maximum length of a subdisk name */
|
||||
#define MAXPLEXNAME 64 /* maximum length of a plex name */
|
||||
#define MAXVOLNAME 64 /* maximum length of a volume name */
|
||||
#define MAXNAME 64 /* maximum length of any name */
|
||||
#define MAXVOLPLEX 8 /* maximum number of plexes in a volume */
|
||||
|
||||
/* Flags for all objects. Most of them only apply to
|
||||
* specific objects, but we have space for all in any
|
||||
* 32 bit flags word. */
|
||||
enum objflags {
|
||||
VF_LOCKED = 1, /* somebody has locked access to this object */
|
||||
VF_LOCKING = 2, /* we want access to this object */
|
||||
VF_WRITETHROUGH = 8, /* volume: write through */
|
||||
VF_INITED = 0x10, /* unit has been initialized */
|
||||
VF_WLABEL = 0x20, /* label area is writable */
|
||||
VF_LABELLING = 0x40, /* unit is currently being labelled */
|
||||
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
|
||||
VF_RAW = 0x100, /* raw volume (no file system) */
|
||||
VF_LOADED = 0x200, /* module is loaded */
|
||||
VF_CONFIGURING = 0x400, /* somebody is changing the config */
|
||||
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
|
||||
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
|
||||
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
|
||||
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
|
||||
VF_KERNELOP = 0x8000, /* we're performing ops from kernel space */
|
||||
};
|
||||
|
||||
/* Global configuration information for the vinum subsystem */
|
||||
struct _vinum_conf {
|
||||
/* Pointers to vinum structures */
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *volume;
|
||||
|
||||
/* the number allocated */
|
||||
int drives_allocated;
|
||||
int subdisks_allocated;
|
||||
int plexes_allocated;
|
||||
int volumes_allocated;
|
||||
|
||||
/* and the number currently in use */
|
||||
int drives_used;
|
||||
int subdisks_used;
|
||||
int plexes_used;
|
||||
int volumes_used;
|
||||
|
||||
int flags;
|
||||
int opencount; /* number of times we've been opened */
|
||||
#if DEBUG
|
||||
int lastrq;
|
||||
struct buf *lastbuf;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Use these defines to simplify code */
|
||||
#define DRIVE vinum_conf.drive
|
||||
#define SD vinum_conf.sd
|
||||
#define PLEX vinum_conf.plex
|
||||
#define VOL vinum_conf.volume
|
||||
#define VFLAGS vinum_conf.flags
|
||||
|
||||
/* Slice header
|
||||
|
||||
* Vinum drives start with this structure:
|
||||
*
|
||||
* Sector
|
||||
* |--------------------------------------|
|
||||
* | PDP-11 memorial boot block | 0
|
||||
* |--------------------------------------|
|
||||
* | Disk label, maybe | 1
|
||||
* |--------------------------------------|
|
||||
* | Slice definition (vinum_hdr) | 2
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, first copy | 3
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, second copy | 3 + size of config
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
*/
|
||||
|
||||
/* Sizes and offsets of our information */
|
||||
enum {
|
||||
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
|
||||
VINUMHEADERLEN = 512, /* size of vinum label */
|
||||
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
|
||||
MAXCONFIG = 65536, /* and size of config copy */
|
||||
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
|
||||
};
|
||||
|
||||
/* hostname is 256 bytes long, but we don't need to shlep
|
||||
* multiple copies in vinum. We use the host name just
|
||||
* to identify this system, and 32 bytes should be ample
|
||||
* for that purpose */
|
||||
#define VINUMHOSTNAMELEN 32
|
||||
|
||||
struct vinum_label {
|
||||
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
|
||||
char name[MAXDRIVENAME]; /* our name of the drive */
|
||||
struct timeval date_of_birth; /* the time it was created */
|
||||
struct timeval last_update; /* and the time of last update */
|
||||
off_t drive_size; /* total size in bytes of the drive.
|
||||
* This value includes the headers */
|
||||
};
|
||||
|
||||
struct vinum_hdr {
|
||||
long long magic; /* we're long on magic numbers */
|
||||
/* XXX Get these right for big-endian */
|
||||
#define VINUM_MAGIC 22322600044678729LL /* should be this */
|
||||
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
|
||||
int config_length; /* size in bytes of each copy of the
|
||||
* configuration info.
|
||||
* This must be a multiple of the sector size. */
|
||||
|
||||
struct vinum_label label; /* unique label */
|
||||
};
|
||||
|
||||
/* Information returned from read_drive_label */
|
||||
enum drive_label_info {
|
||||
DL_CANT_OPEN, /* invalid partition */
|
||||
DL_NOT_OURS, /* valid partition, but no vinum label */
|
||||
DL_DELETED_LABEL, /* valid partition, deleted label found */
|
||||
DL_WRONG_DRIVE, /* drive name doesn't match */
|
||||
DL_OURS /* valid partition and label found */
|
||||
};
|
||||
|
||||
/*** Drive definitions ***/
|
||||
/* A drive corresponds to a disk slice. We use a different term to show
|
||||
* the difference in usage: it doesn't have to be a slice, and could
|
||||
* theroretically be a complete, unpartitioned disk */
|
||||
|
||||
struct drive {
|
||||
enum drivestate state; /* current state */
|
||||
int subdisks_allocated; /* number of entries in sd */
|
||||
int subdisks_used; /* and the number used */
|
||||
int blocksize; /* size of fs blocks */
|
||||
u_int64_t sectors_available; /* number of sectors still available */
|
||||
int secsperblock;
|
||||
int lasterror; /* last error on drive */
|
||||
int driveno; /* index of drive in vinum_conf */
|
||||
int opencount; /* number of up subdisks */
|
||||
u_int64_t reads; /* number of reads on this drive */
|
||||
u_int64_t writes; /* number of writes on this drive */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
dev_t dev; /* and device number */
|
||||
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
|
||||
struct vnode *vp; /* vnode pointer */
|
||||
struct proc *p;
|
||||
struct vinum_label label; /* and the label information */
|
||||
struct partinfo partinfo; /* partition information */
|
||||
int freelist_size; /* number of entries alloced in free list */
|
||||
int freelist_entries; /* number of entries used in free list */
|
||||
struct drive_freelist { /* sorted list of free space on drive */
|
||||
u_int64_t offset;
|
||||
long sectors;
|
||||
} *freelist;
|
||||
};
|
||||
|
||||
/*** Subdisk definitions ***/
|
||||
|
||||
struct sd {
|
||||
enum sdstate state; /* state */
|
||||
/* offsets in blocks */
|
||||
int64_t driveoffset; /* offset on drive */
|
||||
int64_t plexoffset; /* offset in plex */
|
||||
u_int64_t sectors; /* and length in sectors */
|
||||
int plexno; /* index of plex, if it belongs */
|
||||
int driveno; /* index of the drive on which it is located */
|
||||
int sdno; /* our index in vinum_conf */
|
||||
int pid; /* pid of process which opened us */
|
||||
u_int64_t reads; /* number of reads on this subdisk */
|
||||
u_int64_t writes; /* number of writes on this subdisk */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
char name[MAXSDNAME]; /* name of subdisk */
|
||||
};
|
||||
|
||||
/*** Plex definitions ***/
|
||||
|
||||
/* kinds of plex organization */
|
||||
enum plexorg {
|
||||
plex_disorg, /* disorganized */
|
||||
plex_concat, /* concatenated plex */
|
||||
plex_striped, /* striped plex */
|
||||
plex_raid5 /* RAID5 plex */
|
||||
};
|
||||
|
||||
/* Region in plex (either defective or unmapped) */
|
||||
struct plexregion {
|
||||
u_int64_t offset; /* start of region */
|
||||
u_int64_t length; /* length */
|
||||
};
|
||||
|
||||
struct plex {
|
||||
enum plexorg organization; /* Plex organization */
|
||||
enum plexstate state; /* and current state */
|
||||
u_int64_t length; /* total length of plex (max offset) */
|
||||
int flags;
|
||||
int stripesize; /* size of stripe or raid band, in sectors */
|
||||
int subdisks; /* number of associated subdisks */
|
||||
int subdisks_allocated; /* number of subdisks allocated space for */
|
||||
int *sdnos; /* list of component subdisks */
|
||||
int plexno; /* index of plex in vinum_conf */
|
||||
int volno; /* index of volume */
|
||||
int volplexno; /* number of plex in volume */
|
||||
int pid; /* pid of process which opened us */
|
||||
/* Lock information */
|
||||
int locks; /* number of locks used */
|
||||
int alloclocks; /* number of locks allocated */
|
||||
struct rangelock *lock; /* ranges of locked addresses */
|
||||
/* Statistics */
|
||||
u_int64_t reads; /* number of reads on this plex */
|
||||
u_int64_t writes; /* number of writes on this plex */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t multiblock; /* requests that needed more than one block */
|
||||
u_int64_t multistripe; /* requests that needed more than one stripe */
|
||||
/* revive parameters */
|
||||
u_int64_t revived; /* block number of current revive request */
|
||||
int revive_blocksize; /* revive block size (bytes) */
|
||||
int revive_interval; /* and time to wait between transfers */
|
||||
struct request *waitlist; /* list of requests waiting on revive op */
|
||||
/* geometry control */
|
||||
int defective_regions; /* number of regions which are defective */
|
||||
int defective_region_count; /* number of entries in defective_region */
|
||||
struct plexregion *defective_region; /* list of offset/length pairs: defective sds */
|
||||
int unmapped_regions; /* number of regions which are missing */
|
||||
int unmapped_region_count; /* number of entries in unmapped_region */
|
||||
struct plexregion *unmapped_region; /* list of offset/length pairs: missing sds */
|
||||
char name[MAXPLEXNAME]; /* name of plex */
|
||||
};
|
||||
|
||||
/*** Volume definitions ***/
|
||||
|
||||
#define MAXPLEX 8 /* maximum number of plexes */
|
||||
|
||||
|
||||
struct volume {
|
||||
enum volumestate state; /* current state */
|
||||
int plexes; /* number of plexes */
|
||||
int preferred_plex; /* plex to read from, -1 for round-robin */
|
||||
int last_plex_read; /* index of plex used for last read,
|
||||
* for round-robin */
|
||||
dev_t devno; /* device number */
|
||||
int flags; /* status and configuration flags */
|
||||
int opencount; /* number of opens (all the same process) */
|
||||
int openflags; /* flags supplied to last open(2) */
|
||||
u_int64_t size; /* size of volume */
|
||||
int disk; /* disk index */
|
||||
int blocksize; /* logical block size */
|
||||
int active; /* number of outstanding requests active */
|
||||
int subops; /* and the number of suboperations */
|
||||
pid_t pid; /* pid of locker */
|
||||
/* Statistics */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t reads; /* number of reads on this volume */
|
||||
u_int64_t writes; /* number of writes on this volume */
|
||||
u_int64_t recovered_reads; /* reads recovered from another plex */
|
||||
/* Unlike subdisks in the plex, space for the plex pointers is static */
|
||||
int plex[MAXPLEX]; /* index of plexes */
|
||||
char name[MAXVOLNAME]; /* name of volume */
|
||||
struct disklabel label; /* for DIOCGPART */
|
||||
};
|
||||
|
||||
/* Table expansion. Expand table, which contains oldcount
|
||||
* entries of type element, by increment entries, and change
|
||||
* oldcount accordingly */
|
||||
#define EXPAND(table, element, oldcount, increment) \
|
||||
{ \
|
||||
expand_table ((void **) &table, \
|
||||
oldcount * sizeof (element), \
|
||||
(oldcount + increment) * sizeof (element) ); \
|
||||
oldcount += increment; \
|
||||
}
|
||||
|
||||
/* Information on vinum's memory usage */
|
||||
struct meminfo {
|
||||
int mallocs; /* number of malloced blocks */
|
||||
int total_malloced; /* total amount malloced */
|
||||
int highwater; /* maximum number of mallocs */
|
||||
struct mc *malloced; /* pointer to kernel table */
|
||||
};
|
||||
|
||||
struct mc {
|
||||
int seq;
|
||||
int size;
|
||||
short line;
|
||||
short flags;
|
||||
#define ALLOC_KVA 1 /* allocated via kva calls */
|
||||
int *databuf; /* really vm_object_t */
|
||||
caddr_t address;
|
||||
char file[16];
|
||||
};
|
||||
|
||||
/* These enums are used by the state transition
|
||||
* routines. They're in bit map format:
|
||||
*
|
||||
* Bit 0: Other plexes in the volume are down
|
||||
* Bit 1: Other plexes in the volume are up
|
||||
* Bit 2: The current plex is up
|
||||
* Maybe they should be local to
|
||||
* state.c */
|
||||
enum volplexstate {
|
||||
volplex_onlyusdown = 0, /* we're the only plex, and we're down */
|
||||
volplex_alldown, /* 1: another plex is down, and so are we */
|
||||
volplex_otherup, /* 2: another plex is up */
|
||||
volplex_otherupdown, /* other plexes are up and down */
|
||||
volplex_onlyus, /* 4: we're up and alone */
|
||||
volplex_onlyusup, /* only we are up, others are down */
|
||||
volplex_allup, /* all plexes are up */
|
||||
volplex_someup /* some plexes are up, including us */
|
||||
};
|
||||
|
||||
/* state map for plex */
|
||||
enum sdstates {
|
||||
sd_emptystate = 1,
|
||||
sd_downstate = 2, /* found an SD which is down */
|
||||
sd_crashedstate = 4, /* found an SD which is crashed */
|
||||
sd_obsoletestate = 8, /* found an SD which is obsolete */
|
||||
sd_stalestate = 16, /* found an SD which is stale */
|
||||
sd_rebornstate = 32, /* found an SD which is reborn */
|
||||
sd_upstate = 64, /* found an SD which is up */
|
||||
sd_initstate = 128, /* found an SD which is init */
|
||||
sd_otherstate = 256 /* found an SD in some other state */
|
||||
};
|
||||
|
||||
/* This is really just a parameter to pass to
|
||||
* set_<foo>_state, but since it needs to be known
|
||||
* in the external definitions, we need to define
|
||||
* it here */
|
||||
enum setstateflags {
|
||||
setstate_none = 0, /* no flags */
|
||||
setstate_force = 1, /* force the state change */
|
||||
setstate_configuring = 2, /* we're currently configuring, don't save */
|
||||
setstate_recursing = 4, /* we're called from another setstate function */
|
||||
setstate_norecurse = 8 /* don't call other setstate functions */
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
/* Debugging stuff */
|
||||
#define DEBUG_ADDRESSES 1
|
||||
#define DEBUG_NUMOUTPUT 2
|
||||
#endif
|
37
sys/modules/vinum/COPYRIGHT
Normal file
37
sys/modules/vinum/COPYRIGHT
Normal file
@ -0,0 +1,37 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
|
||||
*/
|
26
sys/modules/vinum/Makefile
Normal file
26
sys/modules/vinum/Makefile
Normal file
@ -0,0 +1,26 @@
|
||||
# $Id: Makefile.lkm.lite,v 1.2 1998/08/13 06:07:29 grog Exp grog $
|
||||
|
||||
.PATH: ${.CURDIR}/../../sys/dev/ccd
|
||||
KMOD= vinum_mod
|
||||
SRCS= vinum.c vinum.h vnode_if.h parser.c config.c io.c util.c vinumhdr.h request.h \
|
||||
state.c memory.c request.c lock.c vinumext.h vinumio.h vinumkw.h \
|
||||
vinumstate.h vinumvar.h revive.c vinumioctl.c interrupt.c
|
||||
NOMAN=
|
||||
PSEUDO_LKM=
|
||||
CFLAGS = -I. -O -g -I/usr/include/machine -DDEBUG -Wall -Wno-unused -Wno-parentheses
|
||||
|
||||
CLEANFILES+= vinum.h vnode_if.h vnode_if.c
|
||||
|
||||
all:
|
||||
|
||||
# We don't need this, but the Makefile wants it
|
||||
vinum.h:
|
||||
touch $@
|
||||
|
||||
state.h: maketabs vinumstate.h
|
||||
./maketabs >state.h
|
||||
|
||||
maketabs: maketabs.c
|
||||
${CC} -g -o maketabs maketabs.c
|
||||
|
||||
.include <bsd.kmod.mk>
|
1712
sys/modules/vinum/config.c
Normal file
1712
sys/modules/vinum/config.c
Normal file
File diff suppressed because it is too large
Load Diff
190
sys/modules/vinum/interrupt.c
Normal file
190
sys/modules/vinum/interrupt.c
Normal file
@ -0,0 +1,190 @@
|
||||
/* interrupt.c: bottom half of the driver */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: interrupt.c,v 1.1 1998/08/13 06:12:27 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
void freerq(struct request *rq);
|
||||
void free_rqg(struct rqgroup *rqg);
|
||||
void complete_rqe(struct buf *bp);
|
||||
void sdio_done(struct buf *bp);
|
||||
|
||||
/* Take a completed buffer, transfer the data back if
|
||||
* it's a read, and complete the high-level request
|
||||
* if this is the last subrequest.
|
||||
*
|
||||
* The bp parameter is in fact a struct rqelement, which
|
||||
* includes a couple of extras at the end.
|
||||
*/
|
||||
void
|
||||
complete_rqe(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct rqelement *rqe;
|
||||
struct request *rq;
|
||||
struct rqgroup *rqg;
|
||||
struct buf *ubp; /* user buffer */
|
||||
|
||||
rqe = (struct rqelement *) bp; /* point to the element element that completed */
|
||||
rqg = rqe->rqg; /* and the request group */
|
||||
rq = rqg->rq; /* and the complete request */
|
||||
|
||||
if ((bp->b_flags & B_ERROR) != 0) { /* transfer in error */
|
||||
if (bp->b_error != 0) /* did it return a number? */
|
||||
rq->error = bp->b_error; /* yes, put it in. */
|
||||
else if (rq->error == 0) /* no: do we have one already? */
|
||||
rq->error = EIO; /* no: catchall "I/O error" */
|
||||
if (rq->error == EIO) /* I/O error, */
|
||||
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* take the subdisk down */
|
||||
}
|
||||
/* Now update the statistics */
|
||||
if (bp->b_flags & B_READ) { /* read operation */
|
||||
DRIVE[rqe->driveno].reads++;
|
||||
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
|
||||
SD[rqe->sdno].reads++;
|
||||
SD[rqe->sdno].bytes_read += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].reads++;
|
||||
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
|
||||
} else { /* write operation */
|
||||
DRIVE[rqe->driveno].writes++;
|
||||
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
|
||||
SD[rqe->sdno].writes++;
|
||||
SD[rqe->sdno].bytes_written += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].writes++;
|
||||
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
|
||||
}
|
||||
ubp = rq->bp; /* user buffer */
|
||||
rqg->active--; /* one less request active */
|
||||
if (rqg->active == 0) /* request group finished, */
|
||||
rq->active--; /* one less */
|
||||
if (rq->active == 0) { /* request finished, */
|
||||
#if DEBUG
|
||||
if (debug & 4) {
|
||||
if (ubp->b_resid != 0) /* still something to transfer? */
|
||||
Debugger("resid");
|
||||
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ubp->b_bcount; i += 512) /* XXX debug */
|
||||
if (((char *) ubp->b_data)[i] != '<') { /* and not what we expected */
|
||||
printf("At 0x%x (offset 0x%x): '%c' (0x%x)\n",
|
||||
(int) (&((char *) ubp->b_data)[i]),
|
||||
i,
|
||||
((char *) ubp->b_data)[i],
|
||||
((char *) ubp->b_data)[i]);
|
||||
Debugger("complete_request checksum");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (rq->error) { /* did we have an error? */
|
||||
ubp->b_flags |= B_ERROR; /* yes, propagate to user */
|
||||
ubp->b_error = rq->error;
|
||||
} else
|
||||
ubp->b_resid = 0; /* completed our transfer */
|
||||
if (rq->isplex == 0) /* volume request, */
|
||||
VOL[rq->volplex.volno].active--; /* another request finished */
|
||||
biodone(ubp); /* top level buffer completed */
|
||||
freerq(rq); /* return the request storage */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Free a request block and anything hanging off it */
|
||||
void
|
||||
freerq(struct request *rq)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct rqgroup *rqg;
|
||||
struct rqgroup *nrqg; /* next in chain */
|
||||
int rqno;
|
||||
|
||||
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
|
||||
for (rqno = 0; rqno < rqg->count; rqno++)
|
||||
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
|
||||
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
|
||||
Free(rqg->rqe[rqno].b.b_data); /* free it */
|
||||
nrqg = rqg->next; /* note the next one */
|
||||
Free(rqg); /* and free this one */
|
||||
}
|
||||
Free(rq); /* free the request itself */
|
||||
}
|
||||
|
||||
void
|
||||
free_rqg(struct rqgroup *rqg)
|
||||
{
|
||||
if ((rqg->flags & XFR_GROUPOP) /* RAID 5 request */
|
||||
&&(rqg->rqe) /* got a buffer structure */
|
||||
&&(rqg->rqe->b.b_data)) /* and it has a buffer allocated */
|
||||
Free(rqg->rqe->b.b_data); /* free it */
|
||||
}
|
||||
|
||||
/* I/O on subdisk completed */
|
||||
void
|
||||
sdio_done(struct buf *bp)
|
||||
{
|
||||
struct sdbuf *sbp;
|
||||
|
||||
sbp = (struct sdbuf *) bp;
|
||||
if (sbp->b.b_flags & B_ERROR) { /* had an error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = sbp->b.b_error;
|
||||
}
|
||||
bp->b_resid = sbp->b.b_resid;
|
||||
biodone(sbp->bp); /* complete the caller's I/O */
|
||||
/* Now update the statistics */
|
||||
if (bp->b_flags & B_READ) { /* read operation */
|
||||
DRIVE[sbp->driveno].reads++;
|
||||
DRIVE[sbp->driveno].bytes_read += bp->b_bcount;
|
||||
SD[sbp->sdno].reads++;
|
||||
SD[sbp->sdno].bytes_read += bp->b_bcount;
|
||||
} else { /* write operation */
|
||||
DRIVE[sbp->driveno].writes++;
|
||||
DRIVE[sbp->driveno].bytes_written += bp->b_bcount;
|
||||
SD[sbp->sdno].writes++;
|
||||
SD[sbp->sdno].bytes_written += bp->b_bcount;
|
||||
}
|
||||
Free(sbp);
|
||||
}
|
886
sys/modules/vinum/io.c
Normal file
886
sys/modules/vinum/io.c
Normal file
@ -0,0 +1,886 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: io.c,v 1.16 1998/08/10 23:47:21 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#if __FreeBSD__ < 3 /* this is in sys/disklabel.h in 3.0 and on */
|
||||
#define DTYPE_VINUM 12 /* vinum volume */
|
||||
#endif
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
struct _ioctl_reply *ioctl_reply; /* data pointer, for returning error messages */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
extern struct proc *myproc;
|
||||
|
||||
/* Open the device associated with the drive, and set drive's vp */
|
||||
int
|
||||
open_drive(struct drive *drive, struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct nameidata nd;
|
||||
struct vattr va;
|
||||
int error;
|
||||
|
||||
if (drive->devicename[0] == '\0') /* no device name */
|
||||
sprintf(drive->devicename, "/dev/%s", drive->label.name); /* get it from the drive name */
|
||||
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, drive->devicename, p);
|
||||
error = vn_open(&nd, FREAD | FWRITE, 0); /* open the device */
|
||||
if (error != 0) { /* can't open? */
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = error;
|
||||
printf("vinum open_drive %s: failed with error %d\n", drive->devicename, error); /* XXX */
|
||||
return error;
|
||||
}
|
||||
drive->vp = nd.ni_vp;
|
||||
drive->p = p;
|
||||
|
||||
if (drive->vp->v_usecount > 1) { /* already in use? */
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = EBUSY;
|
||||
printf("vinum open_drive %s: Drive in use\n", drive->devicename); /* XXX */
|
||||
return EBUSY;
|
||||
}
|
||||
error = VOP_GETATTR(drive->vp, &va, NOCRED, p);
|
||||
if (error) {
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
drive->lasterror = error;
|
||||
printf("vinum open_drive %s: GETAATTR returns error %d\n", drive->devicename, error); /* XXX */
|
||||
return error;
|
||||
}
|
||||
drive->dev = va.va_rdev; /* device */
|
||||
|
||||
if (va.va_type != VBLK) { /* only consider block devices */
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1); /* this also closes the drive */
|
||||
drive->lasterror = ENOTBLK;
|
||||
printf("vinum open_drive %s: Not a block device\n", drive->devicename); /* XXX */
|
||||
return ENOTBLK;
|
||||
}
|
||||
drive->vp->v_numoutput = 0;
|
||||
#if __FreeBSD__ == 2 /* pre-4.4BSD Lite/2 parameters */
|
||||
VOP_UNLOCK(drive->vp);
|
||||
#else
|
||||
VOP_UNLOCK(drive->vp, 0, p);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set some variables in the drive struct
|
||||
* in more convenient form. Return error indication */
|
||||
int
|
||||
set_drive_parms(struct drive *drive)
|
||||
{
|
||||
drive->blocksize = BLKDEV_IOSIZE; /* XXX do we need this? */
|
||||
drive->secsperblock = drive->blocksize /* number of sectors per block */
|
||||
/ drive->partinfo.disklab->d_secsize;
|
||||
|
||||
/* Now update the label part */
|
||||
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
|
||||
#if __FreeBSD__ >= 3
|
||||
getmicrotime(&drive->label.date_of_birth); /* and current time */
|
||||
#else
|
||||
drive->label.date_of_birth = time; /* and current time */
|
||||
#endif
|
||||
drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
|
||||
*((u_int64_t) drive->partinfo.disklab->d_secsize);
|
||||
|
||||
/* number of sectors available for subdisks */
|
||||
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
|
||||
|
||||
/* XXX Bug in 3.0 as of January 1998: you can open
|
||||
* non-existent slices. They have a length of 0 */
|
||||
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
printf("vinum open_drive %s: Drive too small\n", drive->devicename); /* XXX */
|
||||
drive->lasterror = ENOSPC;
|
||||
return ENOSPC;
|
||||
}
|
||||
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
|
||||
drive->freelist = (struct drive_freelist *)
|
||||
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
|
||||
if (drive->freelist == NULL) /* can't malloc, dammit */
|
||||
return ENOSPC;
|
||||
drive->freelist_entries = 1; /* just (almost) the complete drive */
|
||||
drive->freelist[0].offset = DATASTART; /* starts here */
|
||||
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
|
||||
set_drive_state(drive->driveno, drive_up, 1); /* our drive is accessible */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initialize a drive: open the device and add device
|
||||
* information */
|
||||
int
|
||||
init_drive(struct drive *drive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
|
||||
if (drive->devicename[0] == '\0') { /* no device name yet, default to drive name */
|
||||
drive->lasterror = EINVAL;
|
||||
printf("vinum: Can't open drive without drive name\n"); /* XXX */
|
||||
return EINVAL;
|
||||
}
|
||||
error = open_drive(drive, myproc); /* open the drive */
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = VOP_IOCTL(drive->vp, /* get the partition information */
|
||||
DIOCGPART,
|
||||
(caddr_t) & drive->partinfo,
|
||||
FREAD,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error) {
|
||||
printf("vinum open_drive %s: Can't get partition information, error %d\n",
|
||||
drive->devicename,
|
||||
error); /* XXX */
|
||||
close_drive(drive);
|
||||
drive->lasterror = error;
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return error;
|
||||
}
|
||||
if (drive->partinfo.part->p_fstype != 0) { /* not plain */
|
||||
drive->lasterror = EFTYPE;
|
||||
printf("vinum open_drive %s: Wrong partition type for vinum\n", drive->devicename); /* XXX */
|
||||
close_drive(drive);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return EFTYPE;
|
||||
}
|
||||
return set_drive_parms(drive); /* set various odds and ends */
|
||||
}
|
||||
|
||||
/* Close a drive if it's open. No errors */
|
||||
void
|
||||
close_drive(struct drive *drive)
|
||||
{
|
||||
if (drive->vp) {
|
||||
vn_close(drive->vp, FREAD | FWRITE, NOCRED, drive->p);
|
||||
drive->vp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove drive from the configuration.
|
||||
* Caller must ensure that it isn't active
|
||||
*/
|
||||
void
|
||||
remove_drive(int driveno)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct drive *drive = &vinum_conf.drive[driveno];
|
||||
long long int nomagic = VINUM_NOMAGIC; /* no magic number */
|
||||
|
||||
write_drive(drive, /* obliterate the magic, but leave a hint */
|
||||
(char *) &nomagic,
|
||||
8,
|
||||
VINUM_LABEL_OFFSET);
|
||||
close_drive(drive); /* and close it */
|
||||
drive->state = drive_unallocated; /* and forget everything we knew about it */
|
||||
save_config(); /* and save the updated configuration */
|
||||
}
|
||||
|
||||
/* Transfer drive data. Usually called from one of these defines;
|
||||
|
||||
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
|
||||
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
|
||||
*
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
driveio(struct drive *drive, void *buf, size_t length, off_t offset, int flag)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
int spl;
|
||||
|
||||
error = 0;
|
||||
|
||||
/* Get a buffer */
|
||||
bp = (struct buf *) Malloc(sizeof(struct buf)); /* get a buffer */
|
||||
CHECKALLOC(bp, "Can't allocate memory");
|
||||
|
||||
bzero(&buf, sizeof(buf));
|
||||
bp->b_flags = B_BUSY | flag; /* tell us when it's done */
|
||||
bp->b_iodone = drive_io_done; /* here */
|
||||
bp->b_proc = myproc; /* process */
|
||||
bp->b_dev = drive->vp->v_un.vu_specinfo->si_rdev; /* device */
|
||||
if (offset & (drive->partinfo.disklab->d_secsize - 1)) /* not on a block boundary */
|
||||
bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
|
||||
bp->b_data = buf;
|
||||
bp->b_vp = drive->vp; /* vnode */
|
||||
bp->b_bcount = length;
|
||||
bp->b_bufsize = length;
|
||||
|
||||
(*bdevsw[major(bp->b_dev)]->d_strategy) (bp); /* initiate the transfer */
|
||||
|
||||
spl = splbio();
|
||||
while ((bp->b_flags & B_DONE) == 0) {
|
||||
bp->b_flags |= B_CALL; /* wake me again */
|
||||
tsleep((caddr_t) bp, PRIBIO, "driveio", 0); /* and wait for it to complete */
|
||||
}
|
||||
splx(spl);
|
||||
if (bp->b_flags & B_ERROR) /* didn't work */
|
||||
error = bp->b_error; /* get the error return */
|
||||
Free(bp); /* then return the buffer */
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Read data from a drive
|
||||
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
read_drive(struct drive *drive, void *buf, size_t length, off_t offset)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
daddr_t nextbn;
|
||||
long bscale;
|
||||
|
||||
struct uio uio;
|
||||
struct iovec iov;
|
||||
daddr_t blocknum; /* block number */
|
||||
int blockoff; /* offset in block */
|
||||
int count; /* amount to transfer */
|
||||
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = length;
|
||||
|
||||
uio.uio_iov = &iov;
|
||||
uio.uio_iovcnt = length;
|
||||
uio.uio_offset = offset;
|
||||
uio.uio_resid = length;
|
||||
uio.uio_segflg = UIO_SYSSPACE;
|
||||
uio.uio_rw = UIO_READ;
|
||||
uio.uio_procp = myproc;
|
||||
|
||||
bscale = btodb(drive->blocksize); /* mask off offset from block number */
|
||||
do {
|
||||
blocknum = btodb(uio.uio_offset) & ~(bscale - 1); /* get the block number */
|
||||
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
|
||||
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
|
||||
uio.uio_resid);
|
||||
|
||||
/* XXX Check this. I think the test is wrong */
|
||||
if (drive->vp->v_lastr + bscale == blocknum) { /* did our last read finish in this block? */
|
||||
nextbn = blocknum + bscale; /* note the end of the transfer */
|
||||
error = breadn(drive->vp, /* and read with read-ahead */
|
||||
blocknum,
|
||||
(int) drive->blocksize,
|
||||
&nextbn,
|
||||
(int *) &drive->blocksize,
|
||||
1,
|
||||
NOCRED,
|
||||
&bp);
|
||||
} else /* random read: just read this block */
|
||||
error = bread(drive->vp, blocknum, (int) drive->blocksize, NOCRED, &bp);
|
||||
drive->vp->v_lastr = blocknum; /* note the last block we read */
|
||||
count = min(count, drive->blocksize - bp->b_resid);
|
||||
if (error) {
|
||||
brelse(bp);
|
||||
return error;
|
||||
}
|
||||
error = uiomove((char *) bp->b_data + blockoff, count, &uio); /* move the data */
|
||||
brelse(bp);
|
||||
}
|
||||
while (error == 0 && uio.uio_resid > 0 && count != 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Write data to a drive
|
||||
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
write_drive(struct drive *drive, void *buf, size_t length, off_t offset)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct buf *bp;
|
||||
struct uio uio;
|
||||
struct iovec iov;
|
||||
daddr_t blocknum; /* block number */
|
||||
int blockoff; /* offset in block */
|
||||
int count; /* amount to transfer */
|
||||
int blockshift;
|
||||
|
||||
if (drive->state == drive_down) /* currently down */
|
||||
return 0; /* ignore */
|
||||
if (drive->vp == NULL) {
|
||||
drive->lasterror = ENODEV;
|
||||
return ENODEV; /* not configured yet */
|
||||
}
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = length;
|
||||
|
||||
uio.uio_iov = &iov;
|
||||
uio.uio_iovcnt = length;
|
||||
uio.uio_offset = offset;
|
||||
uio.uio_resid = length;
|
||||
uio.uio_segflg = UIO_SYSSPACE;
|
||||
uio.uio_rw = UIO_WRITE;
|
||||
uio.uio_procp = myproc;
|
||||
|
||||
error = 0;
|
||||
blockshift = btodb(drive->blocksize) - 1; /* amount to shift block number
|
||||
* to get sector number */
|
||||
do {
|
||||
blocknum = btodb(uio.uio_offset) & ~blockshift; /* get the block number */
|
||||
blockoff = uio.uio_offset % drive->blocksize; /* offset in block */
|
||||
count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
|
||||
uio.uio_resid);
|
||||
if (count == drive->blocksize) /* the whole block */
|
||||
bp = getblk(drive->vp, blocknum, drive->blocksize, 0, 0); /* just get it */
|
||||
else /* partial block: */
|
||||
error = bread(drive->vp, /* read it first */
|
||||
blocknum,
|
||||
drive->blocksize,
|
||||
NOCRED,
|
||||
&bp);
|
||||
count = min(count, drive->blocksize - bp->b_resid); /* how much will we transfer now? */
|
||||
if (error == 0)
|
||||
error = uiomove((char *) bp->b_data + blockoff, /* move the data to the block */
|
||||
count,
|
||||
&uio);
|
||||
if (error) {
|
||||
brelse(bp);
|
||||
drive->lasterror = error;
|
||||
switch (error) {
|
||||
case EIO:
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
break;
|
||||
|
||||
/* XXX Add other possibilities here */
|
||||
default:
|
||||
}
|
||||
return error;
|
||||
}
|
||||
if (count + blockoff == drive->blocksize)
|
||||
/* The transfer goes to the end of the block. There's
|
||||
* no need to wait for any more data to arrive. */
|
||||
bawrite(bp); /* start the write now */
|
||||
else
|
||||
bdwrite(bp); /* do a delayed write */
|
||||
}
|
||||
while (error == 0 && uio.uio_resid > 0 && count != 0);
|
||||
if (error)
|
||||
drive->lasterror = error;
|
||||
return error; /* OK */
|
||||
}
|
||||
|
||||
/* Wake up on completion */
|
||||
void
|
||||
drive_io_done(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
wakeup((caddr_t) bp); /* Wachet auf! */
|
||||
bp->b_flags &= ~B_CALL; /* don't do this again */
|
||||
}
|
||||
|
||||
/* Check a drive for a vinum header. If found,
|
||||
* update the drive information. We come here
|
||||
* with a partially populated drive structure
|
||||
* which includes the device name.
|
||||
*
|
||||
* Return information on what we found
|
||||
*/
|
||||
enum drive_label_info
|
||||
read_drive_label(struct drive *drive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
int result; /* result of our search */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
|
||||
error = init_drive(drive); /* find the drive */
|
||||
if (error) /* find the drive */
|
||||
return DL_CANT_OPEN; /* not ours */
|
||||
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
|
||||
CHECKALLOC(vhdr, "Can't allocate memory");
|
||||
|
||||
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
|
||||
if (drive->label.name[0] /* we have a name for this drive */
|
||||
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
|
||||
drive->lasterror = EINVAL;
|
||||
result = DL_WRONG_DRIVE; /* it's the wrong drive */
|
||||
} else {
|
||||
set_drive_parms(drive); /* and set other parameters */
|
||||
result = DL_OURS;
|
||||
}
|
||||
/* We copy the drive anyway so that we have
|
||||
* the correct name in the drive info. This
|
||||
* may not be the name specified */
|
||||
drive->label = vhdr->label; /* put in the label information */
|
||||
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
|
||||
result = DL_DELETED_LABEL;
|
||||
else
|
||||
result = DL_NOT_OURS; /* we could have it, but we don't yet */
|
||||
Free(vhdr); /* that's all. */
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Check a drive for a vinum header. If found,
|
||||
* read configuration information from the drive and
|
||||
* incorporate the data into the configuration.
|
||||
*
|
||||
* Return error number
|
||||
*/
|
||||
int
|
||||
check_drive(char *drivename)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
struct nameidata nd; /* mount point credentials */
|
||||
char *config_text; /* read the config info from disk into here */
|
||||
volatile char *cptr; /* pointer into config information */
|
||||
char *eptr; /* end pointer into config information */
|
||||
int driveno;
|
||||
struct drive *drive;
|
||||
char *config_line; /* copy the config line to */
|
||||
|
||||
driveno = find_drive_by_dev(drivename, 1); /* doesn't exist, create it */
|
||||
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
|
||||
strcpy(drive->devicename, drivename); /* put in device name */
|
||||
|
||||
if (read_drive_label(drive) == DL_OURS) { /* ours! */
|
||||
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_text, "Can't allocate memory");
|
||||
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_line, "Can't allocate memory");
|
||||
|
||||
/* Read in both copies of the configuration information */
|
||||
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
|
||||
|
||||
if (error != 0) {
|
||||
printf("vinum: Can't read device %s, error %d\n", drive->devicename, error);
|
||||
Free(config_text);
|
||||
Free(config_line);
|
||||
free_drive(drive); /* give it back */
|
||||
return error;
|
||||
}
|
||||
/* XXX At this point, check that the two copies are the same, and do something useful if not.
|
||||
* In particular, consider which is newer, and what this means for the integrity of the
|
||||
* data on the drive */
|
||||
|
||||
/* Parse the configuration, and add it to the global configuration */
|
||||
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
|
||||
volatile int parse_status; /* return value from parse_config */
|
||||
|
||||
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
|
||||
*eptr++ = *cptr++;
|
||||
*eptr = '\0'; /* and delimit */
|
||||
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
|
||||
parse_status = parse_config(config_line, &keyword_set); /* parse the config line */
|
||||
if (parse_status < 0) { /* error in config */
|
||||
/* This config should have been parsed in user
|
||||
* space. If we run into problems here, something
|
||||
* serious is afoot. Complain and let the user
|
||||
* snarf the config to see what's wrong */
|
||||
printf("vinum: Config error on drive %s, aborting integration\n", nd.ni_dirp);
|
||||
Free(config_text);
|
||||
Free(config_line);
|
||||
free_drive(drive); /* give it back */
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
while (*cptr == '\n')
|
||||
cptr++; /* skip to next line */
|
||||
}
|
||||
Free(config_text);
|
||||
if ((vinum_conf.flags & VF_READING_CONFIG) == 0) /* not reading config */
|
||||
updateconfig(0); /* update object states */
|
||||
printf("vinum: read configuration from %s\n", drivename);
|
||||
return 0; /* it all worked */
|
||||
} else { /* no vinum label found */
|
||||
if (drive->lasterror) {
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
return drive->lasterror;
|
||||
} else
|
||||
return ENODEV; /* not our device */
|
||||
}
|
||||
}
|
||||
|
||||
/* Kludge: kernel printf doesn't handle longs correctly XXX */
|
||||
static char *lltoa(long long l, char *s);
|
||||
static char *sappend(char *txt, char *s);
|
||||
|
||||
static char *
|
||||
lltoa(long long l, char *s)
|
||||
{
|
||||
if (l < 0) {
|
||||
*s++ = '-';
|
||||
l = -l;
|
||||
}
|
||||
if (l > 9) {
|
||||
s = lltoa(l / 10, s);
|
||||
l %= 10;
|
||||
}
|
||||
*s++ = l + '0';
|
||||
return s;
|
||||
}
|
||||
|
||||
static char *
|
||||
sappend(char *txt, char *s)
|
||||
{
|
||||
while (*s++ = *txt++);
|
||||
return s - 1;
|
||||
}
|
||||
|
||||
/* Format the configuration in text form into the buffer
|
||||
* at config. Don't go beyond len bytes
|
||||
* XXX this stinks. Fix soon. */
|
||||
void
|
||||
format_config(char *config, int len)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
int j;
|
||||
char *s = config;
|
||||
|
||||
bzero(config, len);
|
||||
|
||||
/* First write the drive configuration */
|
||||
for (i = 0; i < vinum_conf.drives_used; i++) {
|
||||
struct drive *drive;
|
||||
|
||||
drive = &vinum_conf.drive[i];
|
||||
if (drive->state != drive_unallocated) {
|
||||
sprintf(s,
|
||||
"drive %s state %s device %s\n",
|
||||
drive->label.name,
|
||||
drive_state(drive->state),
|
||||
drive->devicename);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Then the volume configuration */
|
||||
for (i = 0; i < vinum_conf.volumes_used; i++) {
|
||||
struct volume *vol;
|
||||
|
||||
vol = &vinum_conf.volume[i];
|
||||
if (vol->state != volume_unallocated) {
|
||||
if (vol->preferred_plex >= 0) /* preferences, */
|
||||
sprintf(s,
|
||||
"volume %s state %s readpol prefer %s",
|
||||
vol->name,
|
||||
volume_state(vol->state),
|
||||
vinum_conf.plex[vol->preferred_plex].name);
|
||||
else /* default round-robin */
|
||||
sprintf(s,
|
||||
"volume %s state %s",
|
||||
vol->name,
|
||||
volume_state(vol->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
s = sappend("\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Then the plex configuration */
|
||||
for (i = 0; i < vinum_conf.plexes_used; i++) {
|
||||
struct plex *plex;
|
||||
|
||||
plex = &vinum_conf.plex[i];
|
||||
if (plex->state != plex_unallocated) {
|
||||
sprintf(s, "plex name %s state %s org %s ",
|
||||
plex->name,
|
||||
plex_state(plex->state),
|
||||
plex_org(plex->organization));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if ((plex->organization == plex_striped)
|
||||
) {
|
||||
sprintf(s, "%db ", (int) plex->stripesize);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
}
|
||||
if (plex->volno >= 0) /* we have a volume */
|
||||
sprintf(s, "vol %s ", vinum_conf.volume[plex->volno].name);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
for (j = 0; j < plex->subdisks; j++) {
|
||||
sprintf(s, " sd %s", vinum_conf.sd[plex->sdnos[j]].name);
|
||||
}
|
||||
s = sappend("\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* And finally the subdisk configuration */
|
||||
for (i = 0; i < vinum_conf.subdisks_used; i++) {
|
||||
struct sd *sd = &vinum_conf.sd[i]; /* XXX */
|
||||
if (vinum_conf.sd[i].state != sd_unallocated) {
|
||||
sprintf(s,
|
||||
"sd name %s drive %s plex %s state %s len ",
|
||||
sd->name,
|
||||
vinum_conf.drive[sd->driveno].label.name,
|
||||
vinum_conf.plex[sd->plexno].name,
|
||||
sd_state(sd->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
s = lltoa(sd->sectors, s);
|
||||
s = sappend("b driveoffset ", s);
|
||||
s = lltoa(sd->driveoffset, s);
|
||||
s = sappend("b plexoffset ", s);
|
||||
s = lltoa(sd->plexoffset, s);
|
||||
s = sappend("b\n", s);
|
||||
if (s > &config[len - 80]) {
|
||||
printf("vinum: configuration data overflow\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Write the configuration to all vinum slices */
|
||||
int
|
||||
save_config(void)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int error;
|
||||
int written_config; /* set when we firstnwrite the config to disk */
|
||||
int driveno;
|
||||
struct drive *drive; /* point to current drive info */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
char *config; /* point to config data */
|
||||
int wlabel_on; /* to set writing label on/off */
|
||||
|
||||
/* don't save the configuration while we're still working on it */
|
||||
if (vinum_conf.flags & VF_CONFIGURING)
|
||||
return 0;
|
||||
written_config = 0; /* no config written yet */
|
||||
/* Build a volume header */
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
|
||||
CHECKALLOC(vhdr, "Can't allocate config data");
|
||||
vhdr->magic = VINUM_MAGIC; /* magic number */
|
||||
vhdr->config_length = MAXCONFIG; /* length of following config info */
|
||||
|
||||
config = Malloc(MAXCONFIG); /* get space for the config data */
|
||||
CHECKALLOC(config, "Can't allocate config data");
|
||||
|
||||
format_config(config, MAXCONFIG);
|
||||
error = 0; /* no errors yet */
|
||||
for (driveno = 0; driveno < vinum_conf.drives_used; driveno++) {
|
||||
drive = &vinum_conf.drive[driveno]; /* point to drive */
|
||||
|
||||
if (drive->state != drive_down) {
|
||||
#if (__FreeBSD__ >= 3)
|
||||
getmicrotime(&drive->label.last_update); /* time of last update is now */
|
||||
#else
|
||||
drive->label.last_update = time; /* time of last update is now */
|
||||
#endif
|
||||
bcopy((char *) &drive->label, /* and the label info from the drive structure */
|
||||
(char *) &vhdr->label,
|
||||
sizeof(vhdr->label));
|
||||
if ((drive->state != drive_unallocated)
|
||||
&& (drive->state != drive_uninit)) {
|
||||
wlabel_on = 1; /* enable writing the label */
|
||||
error = VOP_IOCTL(drive->vp, /* make the label writeable */
|
||||
DIOCWLABEL,
|
||||
(caddr_t) & wlabel_on,
|
||||
FWRITE,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error == 0)
|
||||
error = write_drive(drive, vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (error == 0)
|
||||
error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET);
|
||||
wlabel_on = 0; /* enable writing the label */
|
||||
VOP_IOCTL(drive->vp, /* make the label non-writeable again */
|
||||
DIOCWLABEL,
|
||||
(caddr_t) & wlabel_on,
|
||||
FWRITE,
|
||||
NOCRED,
|
||||
myproc);
|
||||
if (error) {
|
||||
printf("vinum: Can't write config to %s, error %d\n", drive->devicename, error);
|
||||
set_drive_state(drive->driveno, drive_down, 1);
|
||||
} else
|
||||
written_config = 1; /* we've written it on at least one drive */
|
||||
}
|
||||
}
|
||||
}
|
||||
Free(vhdr);
|
||||
Free(config);
|
||||
return written_config == 0; /* return 1 if we failed to write config */
|
||||
}
|
||||
|
||||
/* Disk labels are a mess. The correct way to access them
|
||||
* is with the DIOC[GSW]DINFO ioctls, but some programs, such
|
||||
* as newfs, access the disk directly, so we have to write
|
||||
* things there. We do this only on request. If a user
|
||||
* request tries to read it directly, we fake up one on the fly.
|
||||
*/
|
||||
|
||||
/* get_volume_label returns a label structure to lp, which
|
||||
* is allocated by the caller */
|
||||
void
|
||||
get_volume_label(struct volume *vol, struct disklabel *lp)
|
||||
{
|
||||
bzero(lp, sizeof(struct disklabel));
|
||||
|
||||
strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
|
||||
lp->d_type = DTYPE_VINUM;
|
||||
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
|
||||
lp->d_rpm = 14400 * vol->plexes; /* to keep them guessing */
|
||||
lp->d_interleave = 1;
|
||||
lp->d_flags = 0;
|
||||
|
||||
/* Fitting unto the vine, a vinum has a single
|
||||
* track with all its sectors */
|
||||
lp->d_secsize = DEV_BSIZE; /* bytes per sector */
|
||||
lp->d_nsectors = vol->size; /* data sectors per track */
|
||||
lp->d_ntracks = 1; /* tracks per cylinder */
|
||||
lp->d_ncylinders = 1; /* data cylinders per unit */
|
||||
lp->d_secpercyl = vol->size; /* data sectors per cylinder */
|
||||
lp->d_secperunit = vol->size; /* data sectors per unit */
|
||||
|
||||
lp->d_bbsize = BBSIZE;
|
||||
lp->d_sbsize = SBSIZE;
|
||||
|
||||
lp->d_magic = DISKMAGIC;
|
||||
lp->d_magic2 = DISKMAGIC;
|
||||
|
||||
/* Set up partitions a, b and c to be identical
|
||||
* and the size of the volume. a is UFS, b is
|
||||
* swap, c is nothing */
|
||||
lp->d_partitions[0].p_size = vol->size;
|
||||
lp->d_partitions[0].p_fsize = 1024;
|
||||
lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
|
||||
lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */
|
||||
lp->d_partitions[0].p_frag = 8; /* and fragments per block */
|
||||
lp->d_partitions[SWAP_PART].p_size = vol->size;
|
||||
lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
|
||||
lp->d_partitions[LABEL_PART].p_size = vol->size;
|
||||
lp->d_npartitions = LABEL_PART + 1;
|
||||
strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
|
||||
lp->d_checksum = dkcksum(lp);
|
||||
}
|
||||
|
||||
int
|
||||
write_volume_label(int volno)
|
||||
{
|
||||
struct disklabel *lp;
|
||||
struct buf *bp;
|
||||
struct disklabel *dlp;
|
||||
struct volume *vol;
|
||||
int error;
|
||||
|
||||
lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
|
||||
if (lp == 0)
|
||||
return ENOMEM;
|
||||
|
||||
if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_used) /* invalid volume */
|
||||
return ENOENT;
|
||||
|
||||
vol = &VOL[volno]; /* volume in question */
|
||||
if (vol->state == volume_unallocated) /* nothing there */
|
||||
return ENOENT;
|
||||
|
||||
get_volume_label(vol, lp); /* get the label */
|
||||
|
||||
/* Now write to disk. This code is derived from the
|
||||
* system writedisklabel (), which does silly things
|
||||
* like reading the label and refusing to write
|
||||
* unless it's already there. */
|
||||
bp = geteblk((int) lp->d_secsize); /* get a buffer */
|
||||
bp->b_dev = minor(vol->devno) | (CDEV_MAJOR << MAJORDEV_SHIFT); /* our own raw volume */
|
||||
bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
|
||||
bp->b_bcount = lp->d_secsize;
|
||||
bzero(bp->b_data, lp->d_secsize);
|
||||
dlp = (struct disklabel *) bp->b_data;
|
||||
*dlp = *lp;
|
||||
bp->b_flags &= ~B_INVAL;
|
||||
bp->b_flags |= B_BUSY | B_WRITE;
|
||||
vinumstrategy(bp); /* write it out */
|
||||
error = biowait(bp);
|
||||
bp->b_flags |= B_INVAL | B_AGE;
|
||||
brelse(bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Initialize a subdisk */
|
||||
int
|
||||
initsd(int sdno)
|
||||
{
|
||||
return 0;
|
||||
}
|
137
sys/modules/vinum/lock.c
Normal file
137
sys/modules/vinum/lock.c
Normal file
@ -0,0 +1,137 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: lock.c,v 1.6 1998/07/28 06:32:57 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
|
||||
/* Lock routines. Currently, we lock either an individual volume
|
||||
* or the global configuration. I don't think tsleep and
|
||||
* wakeup are SMP safe. FIXME XXX */
|
||||
|
||||
/* Lock a volume, wait if it's in use */
|
||||
int
|
||||
lockvol(struct volume *vol)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((vol->flags & VF_LOCKED) != 0) {
|
||||
vol->flags |= VF_LOCKING;
|
||||
/* It would seem to make more sense to sleep on
|
||||
* the address 'vol'. Unfortuntaly we can't
|
||||
* guarantee that this address won't change due to
|
||||
* table expansion. The address we choose won't change. */
|
||||
if ((error = tsleep(&vinum_conf.volume + vol->devno,
|
||||
PRIBIO | PCATCH,
|
||||
"volock",
|
||||
0)) != 0)
|
||||
return error;
|
||||
}
|
||||
vol->flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock a volume and let the next one at it */
|
||||
void
|
||||
unlockvol(struct volume *vol)
|
||||
{
|
||||
vol->flags &= ~VF_LOCKED;
|
||||
if ((vol->flags & VF_LOCKING) != 0) {
|
||||
vol->flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf.volume + vol->devno);
|
||||
}
|
||||
}
|
||||
|
||||
/* Lock a plex, wait if it's in use */
|
||||
int
|
||||
lockplex(struct plex *plex)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((plex->flags & VF_LOCKED) != 0) {
|
||||
plex->flags |= VF_LOCKING;
|
||||
/* It would seem to make more sense to sleep on
|
||||
* the address 'plex'. Unfortuntaly we can't
|
||||
* guarantee that this address won't change due to
|
||||
* table expansion. The address we choose won't change. */
|
||||
if ((error = tsleep(&vinum_conf.plex + plex->sdnos[0],
|
||||
PRIBIO | PCATCH,
|
||||
"plexlk",
|
||||
0)) != 0)
|
||||
return error;
|
||||
}
|
||||
plex->flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock a plex and let the next one at it */
|
||||
void
|
||||
unlockplex(struct plex *plex)
|
||||
{
|
||||
plex->flags &= ~VF_LOCKED;
|
||||
if ((plex->flags & VF_LOCKING) != 0) {
|
||||
plex->flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf.plex + plex->plexno);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Get a lock for the global config, wait if it's not available */
|
||||
int
|
||||
lock_config(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((vinum_conf.flags & VF_LOCKED) != 0) {
|
||||
vinum_conf.flags |= VF_LOCKING;
|
||||
if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0)
|
||||
return error;
|
||||
}
|
||||
vinum_conf.flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock and wake up any waiters */
|
||||
void
|
||||
unlock_config(void)
|
||||
{
|
||||
vinum_conf.flags &= ~VF_LOCKED;
|
||||
if ((vinum_conf.flags & VF_LOCKING) != 0) {
|
||||
vinum_conf.flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf);
|
||||
}
|
||||
}
|
40
sys/modules/vinum/makestatetext
Executable file
40
sys/modules/vinum/makestatetext
Executable file
@ -0,0 +1,40 @@
|
||||
#!/bin/sh
|
||||
# Make statetexts.h from vinumstate.h
|
||||
# $Id: makestatetext,v 1.4 1998/03/13 05:36:16 grog Exp grog $
|
||||
infile=vinumstate.h
|
||||
ofile=statetexts.h
|
||||
cat <COPYRIGHT > $ofile
|
||||
|
||||
echo >>$ofile "/* Created by $0 on" `date`. "Do not edit */"
|
||||
echo >>$ofile
|
||||
echo >>$ofile "/* Drive state texts */"
|
||||
echo >>$ofile "char *drivestatetext [] =
|
||||
{ "
|
||||
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
FOO
|
186
sys/modules/vinum/memory.c
Normal file
186
sys/modules/vinum/memory.c
Normal file
@ -0,0 +1,186 @@
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: memory.c,v 1.16 1998/08/08 04:43:22 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#define USES_VM
|
||||
#include "vinumhdr.h"
|
||||
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
void freedatabuf(struct mc *me);
|
||||
caddr_t allocdatabuf(struct mc *me);
|
||||
|
||||
void
|
||||
expand_table(void **table, int oldsize, int newsize)
|
||||
{
|
||||
if (newsize > oldsize) {
|
||||
int *temp;
|
||||
|
||||
temp = (int *) Malloc(newsize); /* allocate a new table */
|
||||
CHECKALLOC(temp, "vinum: Can't expand table\n");
|
||||
if (*table != NULL) { /* already something there, */
|
||||
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
|
||||
Free(*table);
|
||||
}
|
||||
*table = temp;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef DEBUG
|
||||
/* increase the size of a request block */
|
||||
void
|
||||
expandrq(struct plexrq *prq)
|
||||
{
|
||||
expand_table((void **) &prq->rqe,
|
||||
prq->requests * sizeof(struct rqelement),
|
||||
(prq->requests + RQELTS) * sizeof(struct rqelement));
|
||||
bzero(&prq->rqe[prq->requests], RQELTS * sizeof(struct rqelement)); /* clear the new part */
|
||||
prq->rqcount += RQELTS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if DEBUG /* XXX debug */
|
||||
#define MALLOCENTRIES 16384
|
||||
int malloccount = 0;
|
||||
int highwater = 0; /* highest index ever allocated */
|
||||
static struct mc malloced[MALLOCENTRIES];
|
||||
|
||||
static total_malloced;
|
||||
|
||||
caddr_t
|
||||
MMalloc(int size, char *file, int line)
|
||||
{
|
||||
caddr_t result;
|
||||
int i;
|
||||
static int seq = 0;
|
||||
int s;
|
||||
struct mc me; /* information to pass to allocdatabuf */
|
||||
|
||||
if (malloccount >= MALLOCENTRIES) { /* too many */
|
||||
printf("vinum: can't allocate table space to trace memory allocation");
|
||||
return 0; /* can't continue */
|
||||
}
|
||||
result = malloc(size, M_DEVBUF, M_WAITOK); /* use malloc for smaller and irregular stuff */
|
||||
if (result == NULL)
|
||||
printf("vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
|
||||
else {
|
||||
me.flags = 0; /* allocation via malloc */
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if (((result + size) > malloced[i].address)
|
||||
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
|
||||
Debugger("Malloc overlap");
|
||||
}
|
||||
if (result) {
|
||||
i = malloccount++;
|
||||
total_malloced += size;
|
||||
malloced[i].address = result;
|
||||
malloced[i].size = size;
|
||||
malloced[i].line = line;
|
||||
malloced[i].seq = seq++;
|
||||
malloced[i].flags = me.flags;
|
||||
malloced[i].databuf = me.databuf; /* only used with kva alloc */
|
||||
bcopy(file, malloced[i].file, min(strlen(file) + 1, 16));
|
||||
}
|
||||
if (malloccount > highwater)
|
||||
highwater = malloccount;
|
||||
splx(s);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
FFree(void *mem, char *file, int line)
|
||||
{
|
||||
int i;
|
||||
int s;
|
||||
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if ((caddr_t) mem == malloced[i].address) { /* found it */
|
||||
bzero(mem, malloced[i].size); /* XXX */
|
||||
free(mem, M_DEVBUF);
|
||||
malloccount--;
|
||||
total_malloced -= malloced[i].size;
|
||||
if (i < malloccount) /* more coming after */
|
||||
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
|
||||
splx(s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
splx(s);
|
||||
printf("Freeing unallocated data at 0x%08x from %s, line %d\n", (int) mem, file, line);
|
||||
Debugger("Free");
|
||||
}
|
||||
|
||||
void
|
||||
vinum_meminfo(caddr_t data)
|
||||
{
|
||||
struct meminfo *m = (struct meminfo *) data;
|
||||
|
||||
m->mallocs = malloccount;
|
||||
m->total_malloced = total_malloced;
|
||||
m->malloced = malloced;
|
||||
m->highwater = highwater;
|
||||
}
|
||||
|
||||
int
|
||||
vinum_mallocinfo(caddr_t data)
|
||||
{
|
||||
struct mc *m = (struct mc *) data;
|
||||
unsigned int ent = *(int *) data; /* 1st word is index */
|
||||
|
||||
if (ent >= malloccount)
|
||||
return ENOENT;
|
||||
m->address = malloced[ent].address;
|
||||
m->size = malloced[ent].size;
|
||||
m->line = malloced[ent].line;
|
||||
m->seq = malloced[ent].seq;
|
||||
bcopy(malloced[ent].file, m->file, 16);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
206
sys/modules/vinum/parser.c
Normal file
206
sys/modules/vinum/parser.c
Normal file
@ -0,0 +1,206 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: parser.c,v 1.11 1998/08/10 08:50:42 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file contains the parser for the configuration routines. It's used
|
||||
* both in the kernel and in the user interface program, thus the separate file. */
|
||||
|
||||
/* Go through a text and split up into text tokens. These are either non-blank
|
||||
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
|
||||
* " characters may be escaped by \, which otherwise has no special meaning.
|
||||
*
|
||||
* Delimit by following with a \0, and return pointers to the starts at token [].
|
||||
* Return the number of tokens found as the return value.
|
||||
*
|
||||
* This method has the restriction that a closing " or ' must be followed by
|
||||
* grey space.
|
||||
*
|
||||
* Error conditions are end of line before end of quote, or no space after
|
||||
* a closing quote. In this case, tokenize() returns -1. */
|
||||
|
||||
#include <sys/param.h>
|
||||
#ifdef KERNEL
|
||||
#undef KERNEL /* XXX */
|
||||
#define REALLYKERNEL
|
||||
#else
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
/* All this mess for a single struct definition */
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/device.h>
|
||||
#include <sys/disk.h>
|
||||
#include "sys/buf.h"
|
||||
|
||||
#include <vinumvar.h>
|
||||
#include "vinumkw.h"
|
||||
#include "vinumio.h"
|
||||
#include "vinumext.h"
|
||||
|
||||
#ifdef REALLYKERNEL
|
||||
#define isspace(c) ((c == ' ') || (c == '\t')) /* check for white space */
|
||||
#else /* get it from the headers */
|
||||
#include <ctype.h>
|
||||
#endif
|
||||
|
||||
/* enum keyword is defined in vinumvar.h */
|
||||
|
||||
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
|
||||
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
|
||||
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
|
||||
|
||||
/* Normal keywords. These are all the words that vinum knows. */
|
||||
struct _keywords keywords[] =
|
||||
{keypair(drive),
|
||||
keypair(sd),
|
||||
keypair(subdisk),
|
||||
keypair(plex),
|
||||
keypair(volume),
|
||||
keypair(vol),
|
||||
keypair(setupstate),
|
||||
keypair(readpol),
|
||||
keypair(org),
|
||||
keypair(name),
|
||||
keypair(writethrough),
|
||||
keypair(writeback),
|
||||
keypair(raw),
|
||||
keypair(device),
|
||||
keypair(concat),
|
||||
keypair(raid5),
|
||||
keypair(striped),
|
||||
keypair(plexoffset),
|
||||
keypair(driveoffset),
|
||||
keypair(length),
|
||||
keypair(len),
|
||||
keypair(state),
|
||||
keypair(round),
|
||||
keypair(prefer),
|
||||
keypair(rename),
|
||||
keypair(detached),
|
||||
#ifndef KERNEL /* for vinum(8) only */
|
||||
#ifdef DEBUG
|
||||
keypair(debug),
|
||||
#endif
|
||||
keypair(attach),
|
||||
keypair(detach),
|
||||
keypair(printconfig),
|
||||
keypair(replace),
|
||||
keypair(create),
|
||||
keypair(read),
|
||||
keypair(modify),
|
||||
keypair(list),
|
||||
keypair(l),
|
||||
keypair(ld),
|
||||
keypair(ls),
|
||||
keypair(lp),
|
||||
keypair(lv),
|
||||
keypair(info),
|
||||
keypair(set),
|
||||
keypair(rm),
|
||||
keypair(init),
|
||||
keypair(label),
|
||||
keypair(resetconfig),
|
||||
keypair(start),
|
||||
keypair(stop),
|
||||
keypair(resetstats)
|
||||
#endif
|
||||
};
|
||||
struct keywordset keyword_set = KEYWORDSET(keywords);
|
||||
|
||||
#ifndef KERNEL
|
||||
struct _keywords flag_keywords[] =
|
||||
{flagkeypair(f),
|
||||
flagkeypair(d),
|
||||
flagkeypair(v),
|
||||
flagkeypair(s),
|
||||
flagkeypair(r)
|
||||
};
|
||||
struct keywordset flag_set = KEYWORDSET(flag_keywords);
|
||||
|
||||
#endif
|
||||
|
||||
int
|
||||
tokenize(char *cptr, char *token[])
|
||||
{
|
||||
char delim; /* delimiter for searching for the partner */
|
||||
int tokennr; /* index of this token */
|
||||
tokennr = 0; /* none found yet */
|
||||
|
||||
for (;;) {
|
||||
while (isspace(*cptr))
|
||||
cptr++; /* skip initial white space */
|
||||
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
|
||||
return tokennr; /* return number of tokens found */
|
||||
delim = *cptr;
|
||||
token[tokennr] = cptr; /* point to it */
|
||||
tokennr++; /* one more */
|
||||
/* XXX this is broken. It leaves superfluous \\ characters in the text */
|
||||
if ((delim == '\'') || (delim == '"')) { /* delimitered */
|
||||
for (;;) {
|
||||
cptr++;
|
||||
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
|
||||
cptr++; /* move on past */
|
||||
if (!isspace(*cptr)) /* error, no space after closing quote */
|
||||
return -1;
|
||||
*cptr++ = '\0'; /* delimit */
|
||||
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
|
||||
return -1;
|
||||
}
|
||||
} else { /* not quoted */
|
||||
while ((*cptr != '\0') && (!isspace(*cptr)) && (*cptr != '\n'))
|
||||
cptr++;
|
||||
if (*cptr != '\0') /* not end of the line, */
|
||||
*cptr++ = '\0'; /* delimit and move to the next */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Find a keyword and return an index */
|
||||
enum keyword
|
||||
get_keyword(char *name, struct keywordset *keywordset)
|
||||
{
|
||||
int i;
|
||||
struct _keywords *keywords = keywordset->k; /* point to the keywords */
|
||||
for (i = 0; i < keywordset->size; i++)
|
||||
if (!strcmp(name, keywords[i].name))
|
||||
return (enum keyword) keywords[i].keyword;
|
||||
return kw_invalid_keyword;
|
||||
}
|
882
sys/modules/vinum/request.c
Normal file
882
sys/modules/vinum/request.c
Normal file
@ -0,0 +1,882 @@
|
||||
/* XXX to do:
|
||||
|
||||
* Decide where we need splbio ()
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: request.c,v 1.17 1998/08/13 06:04:47 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
#include <miscfs/specfs/specdev.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
extern struct proc *myproc;
|
||||
|
||||
enum requeststatus bre(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskstart,
|
||||
daddr_t diskend);
|
||||
enum requeststatus bre5(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskstart,
|
||||
daddr_t diskend);
|
||||
enum requeststatus build_read_request(struct request *rq, int volplexno);
|
||||
enum requeststatus build_write_request(struct request *rq);
|
||||
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
|
||||
void freerq(struct request *rq);
|
||||
void free_rqg(struct rqgroup *rqg);
|
||||
int find_alternate_sd(struct request *rq);
|
||||
int check_range_covered(struct request *);
|
||||
void complete_rqe(struct buf *bp);
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
int abortrequest(struct request *rq, int error);
|
||||
void sdio(struct buf *bp);
|
||||
void sdio_done(struct buf *bp);
|
||||
int vinum_bounds_check(struct buf *bp, struct volume *vol);
|
||||
caddr_t allocdatabuf(struct rqelement *rqe);
|
||||
void freedatabuf(struct rqelement *rqe);
|
||||
|
||||
void
|
||||
vinumstrategy(struct buf *bp)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int volno;
|
||||
struct volume *vol = NULL;
|
||||
int s;
|
||||
struct devcode *device = (struct devcode *) &bp->b_dev; /* decode device number */
|
||||
enum requeststatus status;
|
||||
|
||||
switch (device->type) {
|
||||
case VINUM_SD_TYPE:
|
||||
sdio(bp);
|
||||
return;
|
||||
|
||||
/* In fact, vinum doesn't handle drives: they're
|
||||
* handled directly by the disk drivers */
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return;
|
||||
|
||||
case VINUM_VOLUME_TYPE: /* volume I/O */
|
||||
volno = VOLNO(bp->b_dev);
|
||||
vol = &VOL[volno];
|
||||
if (vol->state != volume_up) { /* can't access this volume */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */
|
||||
biodone(bp); /* have nothing to do with this */
|
||||
return;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
/* Plex I/O is pretty much the same as volume I/O
|
||||
* for a single plex. Indicate this by passing a NULL
|
||||
* pointer (set above) for the volume */
|
||||
case VINUM_PLEX_TYPE:
|
||||
bp->b_resid = bp->b_bcount; /* transfer everything */
|
||||
vinumstart(bp, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start a transfer. Return -1 on error,
|
||||
* 0 if OK, 1 if we need to retry.
|
||||
* Parameter reviveok is set when doing
|
||||
* transfers for revives: it allows transfers to
|
||||
* be started immediately when a revive is in
|
||||
* progress. During revive, normal transfers
|
||||
* are queued if they share address space with
|
||||
* a currently active revive operation. */
|
||||
int
|
||||
vinumstart(struct buf *bp, int reviveok)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int plexno;
|
||||
int maxplex; /* maximum number of plexes to handle */
|
||||
struct volume *vol;
|
||||
struct rqgroup *rqg; /* current plex's requests */
|
||||
struct rqelement *rqe; /* individual element */
|
||||
struct request *rq; /* build up our request here */
|
||||
int rqno; /* index in request list */
|
||||
enum requeststatus status;
|
||||
|
||||
/* XXX In these routines, we're assuming that
|
||||
* we will always be called with bp->b_bcount
|
||||
* which is a multiple of the sector size. This
|
||||
* is a reasonable assumption, since we are only
|
||||
* called from system routines. Should we check
|
||||
* anyway? */
|
||||
|
||||
if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */
|
||||
bp->b_error = EINVAL; /* invalid size */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return -1;
|
||||
}
|
||||
rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
|
||||
if (rq == NULL) { /* can't do it */
|
||||
bp->b_error = ENOMEM; /* can't get memory */
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return -1;
|
||||
}
|
||||
bzero(rq, sizeof(struct request));
|
||||
|
||||
/* Note the volume ID. This can be NULL, which
|
||||
* the request building functions use as an
|
||||
* indication for single plex I/O */
|
||||
rq->bp = bp; /* and the user buffer struct */
|
||||
|
||||
if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */
|
||||
rq->volplex.volno = VOLNO(bp->b_dev); /* get the volume number */
|
||||
vol = &VOL[rq->volplex.volno]; /* and point to it */
|
||||
vol->active++; /* one more active request */
|
||||
maxplex = vol->plexes; /* consider all its plexes */
|
||||
} else {
|
||||
vol = NULL; /* no volume */
|
||||
rq->volplex.plexno = PLEXNO(bp->b_dev); /* point to the plex */
|
||||
rq->isplex = 1; /* note that it's a plex */
|
||||
maxplex = 1; /* just the one plex */
|
||||
}
|
||||
|
||||
if (bp->b_flags & B_READ) {
|
||||
/* This is a read request. Decide
|
||||
* which plex to read from.
|
||||
*
|
||||
* There's a potential race condition here,
|
||||
* since we're not locked, and we could end
|
||||
* up multiply incrementing the round-robin
|
||||
* counter. This doesn't have any serious
|
||||
* effects, however. */
|
||||
if (vol != NULL) {
|
||||
vol->reads++;
|
||||
vol->bytes_read += bp->b_bcount;
|
||||
plexno = vol->preferred_plex; /* get the plex to use */
|
||||
if (plexno < 0) { /* round robin */
|
||||
plexno = vol->last_plex_read;
|
||||
vol->last_plex_read++;
|
||||
if (vol->last_plex_read == vol->plexes) /* got the the end? */
|
||||
vol->last_plex_read = 0; /* wrap around */
|
||||
}
|
||||
status = build_read_request(rq, plexno); /* build a request */
|
||||
} else {
|
||||
daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */
|
||||
status = bre(rq, /* build a request list */
|
||||
rq->volplex.plexno,
|
||||
&diskaddr,
|
||||
diskaddr + (bp->b_bcount / DEV_BSIZE));
|
||||
}
|
||||
|
||||
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
|
||||
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
|
||||
if (status == REQUEST_DOWN) { /* not enough subdisks */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
}
|
||||
biodone(bp);
|
||||
freerq(rq);
|
||||
return -1;
|
||||
}
|
||||
return launch_requests(rq, reviveok); /* now start the requests if we can */
|
||||
} else
|
||||
/* This is a write operation. We write to all
|
||||
* plexes. If this is a RAID 5 plex, we must also
|
||||
* update the parity stripe. */
|
||||
{
|
||||
if (vol != NULL) {
|
||||
vol->writes++;
|
||||
vol->bytes_written += bp->b_bcount;
|
||||
status = build_write_request(rq); /* Not all the subdisks are up */
|
||||
} else { /* plex I/O */
|
||||
daddr_t diskstart;
|
||||
|
||||
diskstart = bp->b_blkno; /* start offset of transfer */
|
||||
status = bre(rq,
|
||||
PLEXNO(bp->b_dev),
|
||||
&diskstart,
|
||||
bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */
|
||||
}
|
||||
if ((status > REQUEST_RECOVERED) /* can't satisfy it */
|
||||
||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */
|
||||
if (status == REQUEST_DOWN) { /* not enough subdisks */
|
||||
bp->b_error = EIO; /* I/O error */
|
||||
bp->b_flags |= B_ERROR;
|
||||
}
|
||||
if ((bp->b_flags & B_DONE) == 0)
|
||||
biodone(bp);
|
||||
freerq(rq);
|
||||
return -1;
|
||||
}
|
||||
return launch_requests(rq, reviveok); /* start the requests */
|
||||
}
|
||||
}
|
||||
|
||||
/* Call the low-level strategy routines to
|
||||
* perform the requests in a struct request */
|
||||
int
|
||||
launch_requests(struct request *rq, int reviveok)
|
||||
{
|
||||
struct rqgroup *rqg;
|
||||
int rqno; /* loop index */
|
||||
struct rqelement *rqe; /* current element */
|
||||
int s;
|
||||
|
||||
/* First find out whether we're reviving, and the
|
||||
* request contains a conflict. If so, we hang
|
||||
* the request off plex->waitlist of the first
|
||||
* plex we find which is reviving */
|
||||
if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */
|
||||
&&(!reviveok)) { /* and we don't want to do it now, */
|
||||
struct volume *vol = &VOL[VOLNO(rq->bp->b_dev)];
|
||||
struct plex *plex;
|
||||
int plexno;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) { /* find the reviving plex */
|
||||
plex = &PLEX[vol->plex[plexno]];
|
||||
if (plex->state == plex_reviving) /* found it */
|
||||
break;
|
||||
}
|
||||
if (plexno < vol->plexes) { /* found it? */
|
||||
struct request *waitlist = plex->waitlist; /* point to the waiting list */
|
||||
|
||||
while (waitlist->next != NULL) /* find the end */
|
||||
waitlist = waitlist->next;
|
||||
waitlist->next = rq; /* hook our request there */
|
||||
return 0; /* and get out of here */
|
||||
} else /* bad vinum, bad */
|
||||
printf("vinum: can't find reviving plex for volume %s\n", vol->name);
|
||||
}
|
||||
rq->active = 0; /* nothing yet */
|
||||
/* XXX This is probably due to a bug */
|
||||
if (rq->rqg == NULL) { /* no request */
|
||||
abortrequest(rq, EINVAL);
|
||||
return -1;
|
||||
}
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf("Request: %x\nWrite dev 0x%x, offset 0x%x, length %ld\n",
|
||||
(u_int) rq,
|
||||
rq->bp->b_dev,
|
||||
rq->bp->b_blkno,
|
||||
rq->bp->b_bcount); /* XXX */
|
||||
vinum_conf.lastrq = (int) rq;
|
||||
vinum_conf.lastbuf = rq->bp;
|
||||
#endif
|
||||
for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */
|
||||
rqg->active = rqg->count; /* they're all active */
|
||||
rq->active++; /* one more active request group */
|
||||
for (rqno = 0; rqno < rqg->count; rqno++) {
|
||||
rqe = &rqg->rqe[rqno];
|
||||
if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */
|
||||
rqg->active--; /* one less active request */
|
||||
else {
|
||||
struct drive *drive = &DRIVE[rqe->driveno]; /* drive to access */
|
||||
if ((rqe->b.b_flags & B_READ) == 0)
|
||||
rqe->b.b_vp->v_numoutput++; /* one more output going */
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
|
||||
rqe->b.b_flags & B_READ ? "Read" : "Write",
|
||||
rqe->b.b_dev,
|
||||
rqe->sdno,
|
||||
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
|
||||
rqe->b.b_blkno,
|
||||
rqe->b.b_bcount); /* XXX */
|
||||
if (debug & DEBUG_NUMOUTPUT)
|
||||
printf(" vinumstart sd %d numoutput %ld\n",
|
||||
rqe->sdno,
|
||||
rqe->b.b_vp->v_numoutput);
|
||||
#endif
|
||||
/* fire off the request */
|
||||
s = splbio();
|
||||
(*bdevsw[major(rqe->b.b_dev)]->d_strategy) (&rqe->b);
|
||||
splx(s);
|
||||
}
|
||||
/* XXX Do we need caching? Think about this more */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* define the low-level requests needed to perform a
|
||||
* high-level I/O operation for a specific plex 'plexno'.
|
||||
*
|
||||
* Return 0 if all subdisks involved in the request are up, 1 if some
|
||||
* subdisks are not up, and -1 if the request is at least partially
|
||||
* outside the bounds of the subdisks.
|
||||
*
|
||||
* Modify the pointer *diskstart to point to the end address. On
|
||||
* read, return on the first bad subdisk, so that the caller
|
||||
* (build_read_request) can try alternatives.
|
||||
*
|
||||
* On entry to this routine, the rqg structures are not assigned. The
|
||||
* assignment is performed by expandrq(). Strictly speaking, the
|
||||
* elements rqe->sdno of all entries should be set to -1, since 0
|
||||
* (from bzero) is a valid subdisk number. We avoid this problem by
|
||||
* initializing the ones we use, and not looking at the others (index
|
||||
* >= rqg->requests).
|
||||
*/
|
||||
enum requeststatus
|
||||
bre(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskaddr,
|
||||
daddr_t diskend)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
struct rqgroup *rqg;
|
||||
struct buf *bp; /* user's bp */
|
||||
struct plex *plex;
|
||||
enum requeststatus status; /* return value */
|
||||
daddr_t plexoffset; /* offset of transfer in plex */
|
||||
daddr_t stripebase; /* base address of stripe (1st subdisk) */
|
||||
daddr_t stripeoffset; /* offset in stripe */
|
||||
daddr_t blockoffset; /* offset in stripe on subdisk */
|
||||
struct rqelement *rqe; /* point to this request information */
|
||||
daddr_t diskstart = *diskaddr; /* remember where this transfer starts */
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
status = REQUEST_OK; /* return value: OK until proven otherwise */
|
||||
plex = &PLEX[plexno]; /* point to the plex */
|
||||
|
||||
switch (plex->organization) {
|
||||
case plex_concat:
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
if ((*diskaddr < (sd->plexoffset + sd->sectors)) /* The request starts before the end of this */
|
||||
&&(diskend > sd->plexoffset)) { /* subdisk and ends after the start of this sd */
|
||||
if ((sd->state != sd_up) || (plex->state != plex_up)) {
|
||||
enum requeststatus s;
|
||||
|
||||
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
|
||||
if (s) /* give up? */
|
||||
return s; /* yup */
|
||||
}
|
||||
rqg = allocrqg(rq, 1); /* space for the request */
|
||||
if (rqg == NULL) { /* malloc failed */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM;
|
||||
}
|
||||
rqg->plexno = plexno;
|
||||
|
||||
rqe = &rqg->rqe[0]; /* point to the element */
|
||||
rqe->rqg = rqg; /* group */
|
||||
rqe->sdno = sd->sdno; /* put in the subdisk number */
|
||||
plexoffset = max(sd->plexoffset, *diskaddr); /* start offset in plex */
|
||||
rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
|
||||
rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */
|
||||
rqe->dataoffset = 0;
|
||||
rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */
|
||||
sd->sectors - rqe->sdoffset);
|
||||
rqe->groupoffset = 0; /* no groups for concatenated plexes */
|
||||
rqe->grouplen = 0;
|
||||
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
|
||||
rqe->flags = 0;
|
||||
rqe->driveno = sd->driveno;
|
||||
*diskaddr += rqe->datalen; /* bump the address */
|
||||
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
|
||||
deallocrqg(rqg);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
}
|
||||
}
|
||||
if (*diskaddr > diskend) /* we're finished, */
|
||||
break; /* get out of here */
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_striped:
|
||||
{
|
||||
while (*diskaddr < diskend) { /* until we get it all sorted out */
|
||||
/* The offset of the start address from
|
||||
* the start of the stripe */
|
||||
stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
|
||||
|
||||
/* The plex-relative address of the
|
||||
* start of the stripe */
|
||||
stripebase = *diskaddr - stripeoffset;
|
||||
|
||||
/* The number of the subdisk in which
|
||||
* the start is located */
|
||||
sdno = stripeoffset / plex->stripesize;
|
||||
|
||||
/* The offset from the beginning of the stripe
|
||||
* on this subdisk */
|
||||
blockoffset = stripeoffset % plex->stripesize;
|
||||
|
||||
sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */
|
||||
if ((sd->state != sd_up) || (plex->state != plex_up)) {
|
||||
enum requeststatus s;
|
||||
|
||||
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
|
||||
if (s) /* give up? */
|
||||
return s; /* yup */
|
||||
}
|
||||
rqg = allocrqg(rq, 1); /* space for the request */
|
||||
if (rqg == NULL) { /* malloc failed */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM;
|
||||
}
|
||||
rqg->plexno = plexno;
|
||||
|
||||
rqe = &rqg->rqe[0]; /* point to the element */
|
||||
rqe->rqg = rqg;
|
||||
rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
|
||||
rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */
|
||||
rqe->dataoffset = 0;
|
||||
rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */
|
||||
plex->stripesize - blockoffset); /* and the amount left in this stripe */
|
||||
rqe->groupoffset = 0; /* no groups for striped plexes */
|
||||
rqe->grouplen = 0;
|
||||
rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
|
||||
rqe->flags = 0;
|
||||
rqe->sdno = sd->sdno; /* put in the subdisk number */
|
||||
rqe->driveno = sd->driveno;
|
||||
|
||||
if (rqe->sdoffset >= sd->sectors) { /* starts beyond the end of the subdisk? */
|
||||
deallocrqg(rqg);
|
||||
return REQUEST_EOF;
|
||||
} else if (rqe->sdoffset + rqe->datalen > sd->sectors) /* ends beyond the end of the subdisk? */
|
||||
rqe->datalen = sd->sectors - rqe->sdoffset; /* yes, truncate */
|
||||
|
||||
if (build_rq_buffer(rqe, plex)) { /* build the buffer */
|
||||
deallocrqg(rqg);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
}
|
||||
*diskaddr += rqe->datalen; /* look at the remainder */
|
||||
if (*diskaddr < diskend) { /* didn't finish the request on this stripe */
|
||||
plex->multiblock++; /* count another one */
|
||||
if (sdno == plex->subdisks - 1) /* last subdisk, */
|
||||
plex->multistripe++; /* another stripe as well */
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
printf("vinum: invalid plex type in bre");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Build up a request structure for reading volumes.
|
||||
* This function is not needed for plex reads, since there's
|
||||
* no recovery if a plex read can't be satisified. */
|
||||
enum requeststatus
|
||||
build_read_request(struct request *rq, /* request */
|
||||
int plexindex)
|
||||
{ /* index in the volume's plex table */
|
||||
BROKEN_GDB;
|
||||
struct buf *bp;
|
||||
daddr_t startaddr; /* offset of previous part of transfer */
|
||||
daddr_t diskaddr; /* offset of current part of transfer */
|
||||
daddr_t diskend; /* and end offset of transfer */
|
||||
int plexno; /* plex index in vinum_conf */
|
||||
struct rqgroup *rqg; /* point to the request we're working on */
|
||||
struct volume *vol; /* volume in question */
|
||||
off_t oldstart; /* note where we started */
|
||||
int recovered = 0; /* set if we recover a read */
|
||||
enum requeststatus status = REQUEST_OK;
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
diskaddr = bp->b_blkno; /* start offset of transfer */
|
||||
diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */
|
||||
rqg = &rq->rqg[plexindex]; /* plex request */
|
||||
vol = &VOL[rq->volplex.volno]; /* point to volume */
|
||||
|
||||
while (diskaddr < diskend) { /* build up request components */
|
||||
startaddr = diskaddr;
|
||||
status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
|
||||
switch (status) {
|
||||
case REQUEST_OK:
|
||||
continue;
|
||||
|
||||
case REQUEST_RECOVERED:
|
||||
recovered = 1;
|
||||
break;
|
||||
|
||||
case REQUEST_EOF:
|
||||
case REQUEST_ENOMEM:
|
||||
return status;
|
||||
|
||||
/* if we get here, we have either had a failure or
|
||||
* a RAID 5 recovery. We don't want to use the
|
||||
* recovery, because it's expensive, so first we
|
||||
* check if we have alternatives */
|
||||
case REQUEST_DOWN: /* can't access the plex */
|
||||
if (vol != NULL) { /* and this is volume I/O */
|
||||
/* Try to satisfy the request
|
||||
* from another plex */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
diskaddr = startaddr; /* start at the beginning again */
|
||||
oldstart = startaddr; /* and note where that was */
|
||||
if (plexno != plexindex) { /* don't try this plex again */
|
||||
bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
|
||||
if (diskaddr > oldstart) { /* we satisfied another part */
|
||||
recovered = 1; /* we recovered from the problem */
|
||||
status = REQUEST_OK; /* don't complain about it */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (plexno == (vol->plexes - 1)) /* couldn't satisfy the request */
|
||||
return REQUEST_DOWN; /* failed */
|
||||
}
|
||||
} else
|
||||
return REQUEST_DOWN; /* bad luck */
|
||||
}
|
||||
if (recovered)
|
||||
vol->recovered_reads += recovered; /* adjust our recovery count */
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Build up a request structure for writes.
|
||||
* Return 0 if all subdisks involved in the request are up, 1 if some
|
||||
* subdisks are not up, and -1 if the request is at least partially
|
||||
* outside the bounds of the subdisks. */
|
||||
enum requeststatus
|
||||
build_write_request(struct request *rq)
|
||||
{ /* request */
|
||||
BROKEN_GDB;
|
||||
struct buf *bp;
|
||||
daddr_t diskstart; /* offset of current part of transfer */
|
||||
daddr_t diskend; /* and end offset of transfer */
|
||||
int plexno; /* plex index in vinum_conf */
|
||||
struct volume *vol; /* volume in question */
|
||||
enum requeststatus status;
|
||||
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
vol = &VOL[rq->volplex.volno]; /* point to volume */
|
||||
diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */
|
||||
status = REQUEST_OK;
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
diskstart = bp->b_blkno; /* start offset of transfer */
|
||||
status = min(status, bre(rq, /* build requests for the plex */
|
||||
vol->plex[plexno],
|
||||
&diskstart,
|
||||
diskend));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Fill in the struct buf part of a request element. */
|
||||
enum requeststatus
|
||||
build_rq_buffer(struct rqelement *rqe, struct plex *plex)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct sd *sd; /* point to subdisk */
|
||||
struct volume *vol;
|
||||
struct buf *bp;
|
||||
struct buf *ubp; /* user (high level) buffer header */
|
||||
|
||||
vol = &VOL[rqe->rqg->rq->volplex.volno];
|
||||
sd = &SD[rqe->sdno]; /* point to subdisk */
|
||||
bp = &rqe->b;
|
||||
ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */
|
||||
|
||||
/* Initialize the buf struct */
|
||||
bzero(&rqe->b, sizeof(struct buf));
|
||||
bp->b_proc = ubp->b_proc; /* process pointer */
|
||||
bp->b_flags = ubp->b_flags & (B_NOCACHE | B_READ | B_ASYNC); /* copy these flags from user bp */
|
||||
bp->b_flags |= B_CALL | B_BUSY; /* inform us when it's done */
|
||||
if (plex->state == plex_reviving)
|
||||
bp->b_flags |= B_ORDERED; /* keep request order if we're reviving */
|
||||
bp->b_iodone = complete_rqe; /* by calling us here */
|
||||
bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */
|
||||
bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */
|
||||
bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */
|
||||
bp->b_resid = bp->b_bcount; /* and it's still all waiting */
|
||||
bp->b_bufsize = bp->b_bcount; /* and buffer size */
|
||||
bp->b_vp = DRIVE[rqe->driveno].vp; /* drive vnode */
|
||||
bp->b_rcred = FSCRED; /* we have the file system credentials */
|
||||
bp->b_wcred = FSCRED; /* we have the file system credentials */
|
||||
|
||||
if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */
|
||||
bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */
|
||||
if (bp->b_data == NULL) { /* failed */
|
||||
Debugger("XXX");
|
||||
abortrequest(rqe->rqg->rq, ENOMEM);
|
||||
return REQUEST_ENOMEM; /* no memory */
|
||||
}
|
||||
} else
|
||||
/* Point directly to user buffer data. This means
|
||||
* that we don't need to do anything when we have
|
||||
* finished the transfer */
|
||||
bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
|
||||
return 0;
|
||||
}
|
||||
/* Abort a request: free resources and complete the
|
||||
* user request with the specified error */
|
||||
int
|
||||
abortrequest(struct request *rq, int error)
|
||||
{
|
||||
struct buf *bp = rq->bp; /* user buffer */
|
||||
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = error;
|
||||
freerq(rq); /* free everything we're doing */
|
||||
biodone(bp);
|
||||
return error; /* and give up */
|
||||
}
|
||||
|
||||
/* Check that our transfer will cover the
|
||||
* complete address space of the user request.
|
||||
*
|
||||
* Return 1 if it can, otherwise 0 */
|
||||
int
|
||||
check_range_covered(struct request *rq)
|
||||
{
|
||||
/* XXX */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Perform I/O on a subdisk */
|
||||
void
|
||||
sdio(struct buf *bp)
|
||||
{
|
||||
int s; /* spl */
|
||||
struct sd *sd;
|
||||
struct sdbuf *sbp;
|
||||
daddr_t endoffset;
|
||||
struct drive *drive;
|
||||
|
||||
sd = &SD[SDNO(bp->b_dev)]; /* point to the subdisk */
|
||||
drive = &DRIVE[sd->driveno];
|
||||
|
||||
if (drive->state != drive_up) { /* XXX until we get the states fixed */
|
||||
set_sd_state(SDNO(bp->b_dev), sd_obsolete, setstate_force);
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = EIO;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
/* XXX decide which states we will really accept here. up
|
||||
* implies it could be involved with a plex, in which
|
||||
* case we don't want to dick with it */
|
||||
if ((sd->state != sd_up)
|
||||
&& (sd->state != sd_initializing)
|
||||
&& (sd->state != sd_reborn)) { /* we can't access it */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_flags = EIO;
|
||||
if (bp->b_flags & B_BUSY) /* XXX why isn't this always the case? */
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
/* Get a buffer */
|
||||
sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
|
||||
if (sbp == NULL) {
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOMEM;
|
||||
biodone(bp);
|
||||
return;
|
||||
}
|
||||
bcopy(bp, &sbp->b, sizeof(struct buf)); /* start with the user's buffer */
|
||||
sbp->b.b_flags |= B_CALL; /* tell us when it's done */
|
||||
sbp->b.b_iodone = sdio_done; /* here */
|
||||
sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */
|
||||
sbp->b.b_vp = DRIVE[sd->driveno].vp; /* vnode */
|
||||
sbp->b.b_blkno += sd->driveoffset;
|
||||
sbp->bp = bp; /* note the address of the original header */
|
||||
sbp->sdno = sd->sdno; /* note for statistics */
|
||||
sbp->driveno = sd->driveno;
|
||||
endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */
|
||||
if (endoffset > sd->sectors) { /* beyond the end */
|
||||
sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
|
||||
if (sbp->b.b_bcount <= 0) { /* nothing to transfer */
|
||||
bp->b_resid = bp->b_bcount; /* nothing transferred */
|
||||
/* XXX Grrr. This doesn't seem to work. Return
|
||||
* an error after all */
|
||||
bp->b_flags |= B_ERROR;
|
||||
bp->b_error = ENOSPC;
|
||||
biodone(bp);
|
||||
Free(sbp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if ((sbp->b.b_flags & B_READ) == 0) /* write */
|
||||
sbp->b.b_vp->v_numoutput++; /* one more output going */
|
||||
#if DEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
printf(" %s dev 0x%x, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
|
||||
sbp->b.b_flags & B_READ ? "Read" : "Write",
|
||||
sbp->b.b_dev,
|
||||
sbp->sdno,
|
||||
(u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
|
||||
(int) sbp->b.b_blkno,
|
||||
sbp->b.b_bcount); /* XXX */
|
||||
if (debug & DEBUG_NUMOUTPUT)
|
||||
printf(" vinumstart sd %d numoutput %ld\n",
|
||||
sbp->sdno,
|
||||
sbp->b.b_vp->v_numoutput);
|
||||
#endif
|
||||
s = splbio();
|
||||
(*bdevsw[major(sbp->b.b_dev)]->d_strategy) (&sbp->b);
|
||||
splx(s);
|
||||
}
|
||||
|
||||
/* Simplified version of bounds_check_with_label
|
||||
* Determine the size of the transfer, and make sure it is
|
||||
* within the boundaries of the partition. Adjust transfer
|
||||
* if needed, and signal errors or early completion.
|
||||
*
|
||||
* Volumes are simpler than disk slices: they only contain
|
||||
* one component (though we call them a, b and c to make
|
||||
* system utilities happy), and they always take up the
|
||||
* complete space of the "partition".
|
||||
*
|
||||
* I'm still not happy with this: why should the label be
|
||||
* protected? If it weren't so damned difficult to write
|
||||
* one in the first pleace (because it's protected), it wouldn't
|
||||
* be a problem.
|
||||
*/
|
||||
int
|
||||
vinum_bounds_check(struct buf *bp, struct volume *vol)
|
||||
{
|
||||
int maxsize = vol->size; /* size of the partition (sectors) */
|
||||
int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
|
||||
|
||||
/* Would this transfer overwrite the disk label? */
|
||||
if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */
|
||||
#if LABELSECTOR != 0
|
||||
&& bp->b_blkno + size > LABELSECTOR /* and finishes after */
|
||||
#endif
|
||||
&& (!(vol->flags & VF_RAW)) /* and it's not raw */
|
||||
&&major(bp->b_dev) == BDEV_MAJOR /* and it's the block device */
|
||||
&& (bp->b_flags & B_READ) == 0 /* and it's a write */
|
||||
&& (!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */
|
||||
bp->b_error = EROFS; /* read-only */
|
||||
bp->b_flags |= B_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (size == 0) /* no transfer specified, */
|
||||
return 0; /* treat as EOF */
|
||||
/* beyond partition? */
|
||||
if (bp->b_blkno < 0 /* negative start */
|
||||
|| bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */
|
||||
/* if exactly at end of disk, return an EOF */
|
||||
if (bp->b_blkno == maxsize) {
|
||||
bp->b_resid = bp->b_bcount;
|
||||
return 0;
|
||||
}
|
||||
/* or truncate if part of it fits */
|
||||
size = maxsize - bp->b_blkno;
|
||||
if (size <= 0) { /* nothing to transfer */
|
||||
bp->b_error = EINVAL;
|
||||
bp->b_flags |= B_ERROR;
|
||||
return -1;
|
||||
}
|
||||
bp->b_bcount = size << DEV_BSHIFT;
|
||||
}
|
||||
bp->b_pblkno = bp->b_blkno;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Allocate a request group and hook
|
||||
* it in in the list for rq */
|
||||
struct rqgroup *
|
||||
allocrqg(struct request *rq, int elements)
|
||||
{
|
||||
struct rqgroup *rqg; /* the one we're going to allocate */
|
||||
int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
|
||||
|
||||
rqg = (struct rqgroup *) Malloc(size);
|
||||
if (rqg != NULL) { /* malloc OK, */
|
||||
if (rq->rqg) /* we already have requests */
|
||||
rq->lrqg->next = rqg; /* hang it off the end */
|
||||
else /* first request */
|
||||
rq->rqg = rqg; /* at the start */
|
||||
rq->lrqg = rqg; /* this one is the last in the list */
|
||||
|
||||
bzero(rqg, size); /* no old junk */
|
||||
rqg->rq = rq; /* point back to the parent request */
|
||||
rqg->count = elements; /* number of requests in the group */
|
||||
} else
|
||||
Debugger("XXX");
|
||||
return rqg;
|
||||
}
|
||||
|
||||
/* Deallocate a request group out of a chain. We do
|
||||
* this by linear search: the chain is short, this
|
||||
* almost never happens, and currently it can only
|
||||
* happen to the first member of the chain. */
|
||||
void
|
||||
deallocrqg(struct rqgroup *rqg)
|
||||
{
|
||||
struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */
|
||||
|
||||
if (rqg->rq->rqg == rqg) /* we're first in line */
|
||||
rqg->rq->rqg = rqg->next; /* unhook ourselves */
|
||||
else {
|
||||
while (rqgc->next != rqg) /* find the group */
|
||||
rqgc = rqgc->next;
|
||||
rqgc->next = rqg->next;
|
||||
}
|
||||
Free(rqgc);
|
||||
}
|
||||
|
||||
/* Character device interface */
|
||||
int
|
||||
vinumread(dev_t dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
return (physio(vinumstrategy, NULL, dev, 1, minphys, uio));
|
||||
}
|
||||
|
||||
int
|
||||
vinumwrite(dev_t dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
return (physio(vinumstrategy, NULL, dev, 0, minphys, uio));
|
||||
}
|
159
sys/modules/vinum/request.h
Normal file
159
sys/modules/vinum/request.h
Normal file
@ -0,0 +1,159 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: request.h,v 1.10 1998/08/03 07:15:26 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* Information needed to set up a transfer */
|
||||
|
||||
/* struct buf is surprisingly big (about 300
|
||||
* bytes), and it's part of the request, so this
|
||||
* value is really important. Most requests
|
||||
* don't need more than 2 subrequests per
|
||||
* plex. The table is automatically extended if
|
||||
* this value is too small. */
|
||||
#define RQELTS 2 /* default of 2 requests per transfer */
|
||||
|
||||
enum xferinfo {
|
||||
XFR_NORMAL_READ = 1,
|
||||
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
|
||||
XFR_RECOVERY_READ = 4,
|
||||
XFR_DEGRADED_WRITE = 8,
|
||||
XFR_PARITYLESS_WRITE = 0x10,
|
||||
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
|
||||
XFR_DATA_BLOCK = 0x40, /* data block in request */
|
||||
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
|
||||
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
|
||||
XFR_MALLOCED = 0x200, /* this buffer is malloced */
|
||||
#if DEBUG
|
||||
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
|
||||
#endif
|
||||
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
|
||||
/* operations that need a parity block */
|
||||
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
|
||||
/* operations that use the group parameters */
|
||||
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
|
||||
/* operations that that use the data parameters */
|
||||
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
|
||||
/* operations requiring read before write */
|
||||
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
|
||||
/* operations that need a malloced buffer */
|
||||
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
|
||||
};
|
||||
|
||||
/* Describe one low-level request, part
|
||||
* of a high-level request. This is an
|
||||
* extended struct buf buffer, and the first
|
||||
* element *must* be a struct buf. We pass this structure
|
||||
* to the I/O routines instead of a struct buf in oder
|
||||
* to be able to locate the high-level request when it
|
||||
* completes.
|
||||
*
|
||||
* All offsets and lengths are in "blocks", i.e. sectors */
|
||||
struct rqelement {
|
||||
struct buf b; /* buf structure */
|
||||
struct rqgroup *rqg; /* pointer to our group */
|
||||
/* Information about the transfer */
|
||||
daddr_t sdoffset; /* offset in subdisk */
|
||||
int useroffset; /* offset in user buffer of normal data */
|
||||
/* dataoffset and datalen refer to "individual"
|
||||
* data transfers (normal read, parityless write)
|
||||
* and also degraded write.
|
||||
*
|
||||
* groupoffset and grouplen refer to the other
|
||||
* "group" operations (normal write, recovery read)
|
||||
* Both the offsets are relative to the start of the
|
||||
* local buffer */
|
||||
int dataoffset; /* offset in buffer of the normal data */
|
||||
int groupoffset; /* offset in buffer of group data */
|
||||
short datalen; /* length of normal data (sectors) */
|
||||
short grouplen; /* length of group data (sectors) */
|
||||
short buflen; /* total buffer length to allocate */
|
||||
short flags; /* really enum xferinfo (see above) */
|
||||
/* Ways to find other components */
|
||||
short sdno; /* subdisk number */
|
||||
short driveno; /* drive number */
|
||||
};
|
||||
|
||||
/* A group of requests built to satisfy a certain
|
||||
* component of a user request */
|
||||
struct rqgroup {
|
||||
struct rqgroup *next; /* pointer to next group */
|
||||
struct request *rq; /* pointer to the request */
|
||||
short count; /* number of requests in this group */
|
||||
short active; /* and number active */
|
||||
short plexno; /* index of plex */
|
||||
int badsdno; /* index of bad subdisk or -1 */
|
||||
enum xferinfo flags; /* description of transfer */
|
||||
struct rqelement rqe[0]; /* and the elements of this request */
|
||||
};
|
||||
|
||||
/* Describe one high-level request and the
|
||||
* work we have to do to satisfy it */
|
||||
struct request {
|
||||
struct buf *bp; /* pointer to the high-level request */
|
||||
int flags;
|
||||
union {
|
||||
int volno; /* volume index */
|
||||
int plexno; /* or plex index */
|
||||
} volplex;
|
||||
int error; /* current error indication */
|
||||
short isplex; /* set if this is a plex request */
|
||||
short active; /* number of subrequests still active */
|
||||
struct rqgroup *rqg; /* pointer to the first group of requests */
|
||||
struct rqgroup *lrqg; /* and to the first group of requests */
|
||||
struct request *next; /* link of waiting requests */
|
||||
};
|
||||
|
||||
/* Extended buffer header for subdisk I/O. Includes
|
||||
* a pointer to the user I/O request. */
|
||||
struct sdbuf {
|
||||
struct buf b; /* our buffer */
|
||||
struct buf *bp; /* and pointer to parent */
|
||||
short driveno; /* drive index */
|
||||
short sdno; /* and subdisk index */
|
||||
};
|
||||
|
||||
/* Values returned by rqe and friends.
|
||||
* Be careful with these: they are in order of increasing
|
||||
* seriousness. Some routines check for > REQUEST_RECOVERED
|
||||
* to indicate a completely failed request. */
|
||||
enum requeststatus {
|
||||
REQUEST_OK, /* request built OK */
|
||||
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
|
||||
REQUEST_EOF, /* request failed: outside plex */
|
||||
REQUEST_DOWN, /* request failed: subdisk down */
|
||||
REQUEST_ENOMEM /* ran out of memory */
|
||||
};
|
128
sys/modules/vinum/revive.c
Normal file
128
sys/modules/vinum/revive.c
Normal file
@ -0,0 +1,128 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: revive.c,v 1.1 1998/08/14 06:16:59 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
|
||||
/* revive a block of a plex. Return an error
|
||||
* indication. EAGAIN means successful copy, but
|
||||
* that more blocks remain to be copied.
|
||||
* XXX We should specify a block size here. At the moment,
|
||||
* just take a default value. FIXME */
|
||||
int
|
||||
revive_block(int plexno)
|
||||
{
|
||||
struct plex *plex = &PLEX[plexno];
|
||||
struct buf *bp;
|
||||
int error = EAGAIN;
|
||||
int size;
|
||||
int s; /* priority level */
|
||||
|
||||
if (plex->revive_blocksize == 0) {
|
||||
if (plex->stripesize != 0) /* we're striped, don't revive more than */
|
||||
plex->revive_blocksize = min(DEFAULT_REVIVE_BLOCKSIZE, plex->stripesize); /* one block at a time */
|
||||
else
|
||||
plex->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
|
||||
}
|
||||
size = min(plex->revive_blocksize, plex->length - plex->revived) << DEV_BSHIFT;
|
||||
|
||||
s = splbio();
|
||||
/* Get a buffer */
|
||||
bp = geteblk(size);
|
||||
if (bp == NULL) {
|
||||
splx(s);
|
||||
return ENOMEM;
|
||||
}
|
||||
if (bp->b_qindex != 0) /* on a queue, */
|
||||
bremfree(bp); /* remove it */
|
||||
splx(s);
|
||||
|
||||
/* Amount to transfer: block size, unless it
|
||||
* would overlap the end */
|
||||
bp->b_bufsize = size;
|
||||
bp->b_bcount = bp->b_bufsize;
|
||||
bp->b_resid = 0x0;
|
||||
bp->b_blkno = plex->revived; /* we've got this far */
|
||||
|
||||
/* XXX what about reviving anonymous plexes? */
|
||||
|
||||
/* First, read the data from the volume. We don't
|
||||
* care which plex, that's bre's job */
|
||||
bp->b_dev = VINUMBDEV(plex->volno, 0, 0, VINUM_VOLUME_TYPE); /* create the device number */
|
||||
bp->b_flags = B_BUSY | B_READ;
|
||||
vinumstart(bp, 1);
|
||||
biowait(bp);
|
||||
if (bp->b_flags & B_ERROR)
|
||||
error = bp->b_error;
|
||||
else
|
||||
/* Now write to the plex */
|
||||
{
|
||||
s = splbio();
|
||||
if (bp->b_qindex != 0) /* on a queue, */
|
||||
bremfree(bp); /* remove it */
|
||||
splx(s);
|
||||
bp->b_dev = VINUMBDEV(plex->volno, plex->volplexno, 0, VINUM_PLEX_TYPE); /* create the device number */
|
||||
|
||||
bp->b_flags = B_BUSY; /* make this a write */
|
||||
bp->b_resid = 0x0;
|
||||
vinumstart(bp, 1);
|
||||
biowait(bp);
|
||||
if (bp->b_flags & B_ERROR)
|
||||
error = bp->b_error;
|
||||
else {
|
||||
plex->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
|
||||
if (plex->revived >= plex->length) { /* finished */
|
||||
plex->revived = 0;
|
||||
plex->state = plex_up; /* do we need to do more? */
|
||||
if (plex->volno >= 0) /* we have a volume, */
|
||||
set_volume_state(plex->volno, volume_up, 0);
|
||||
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
|
||||
save_config(); /* and save the updated configuration */
|
||||
error = 0; /* we're done */
|
||||
}
|
||||
}
|
||||
while (plex->waitlist) { /* we have waiting requests */
|
||||
launch_requests(plex->waitlist, 1); /* do them now */
|
||||
plex->waitlist = plex->waitlist->next; /* and move on to the next */
|
||||
}
|
||||
}
|
||||
if (bp->b_qindex == 0) /* not on a queue, */
|
||||
brelse(bp); /* is this kosher? */
|
||||
return error;
|
||||
}
|
755
sys/modules/vinum/state.c
Normal file
755
sys/modules/vinum/state.c
Normal file
@ -0,0 +1,755 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: state.c,v 2.6 1998/08/19 08:04:47 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "request.h"
|
||||
|
||||
/* Update drive state */
|
||||
/* Return 1 if the state changes, otherwise 0 */
|
||||
int
|
||||
set_drive_state(int driveno, enum drivestate state, int flags)
|
||||
{
|
||||
struct drive *drive = &DRIVE[driveno];
|
||||
int oldstate = drive->state;
|
||||
int sdno;
|
||||
|
||||
if (drive->state == drive_unallocated) /* no drive to do anything with, */
|
||||
return 0;
|
||||
|
||||
if (state != oldstate) { /* don't change it if it's not different */
|
||||
if (state == drive_down) { /* the drive's going down */
|
||||
if (flags || (drive->opencount == 0)) { /* we can do it */
|
||||
close_drive(drive);
|
||||
drive->state = state;
|
||||
printf("vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state));
|
||||
} else
|
||||
return 0; /* don't do it */
|
||||
}
|
||||
drive->state = state; /* set the state */
|
||||
if (((drive->state == drive_up)
|
||||
|| ((drive->state == drive_coming_up)))
|
||||
&& (drive->vp == NULL)) /* should be open, but we're not */
|
||||
init_drive(drive); /* which changes the state again */
|
||||
if ((state != oldstate) /* state has changed */
|
||||
&&((flags & setstate_norecurse) == 0)) { /* and we want to recurse, */
|
||||
for (sdno = 0; sdno < vinum_conf.subdisks_used; sdno++) { /* find this drive's subdisks */
|
||||
if (SD[sdno].driveno == driveno) /* belongs to this drive */
|
||||
set_sd_state(sdno, sd_down, setstate_force | setstate_recursing); /* take it down */
|
||||
}
|
||||
save_config(); /* and save the updated configuration */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try to set the subdisk state. Return 1 if state changed to
|
||||
* what we wanted, -1 if it changed to something else, and 0
|
||||
* if no change.
|
||||
*
|
||||
* This routine is called both from the user (up, down states
|
||||
* only) and internally.
|
||||
*/
|
||||
int
|
||||
set_sd_state(int sdno, enum sdstate state, enum setstateflags flags)
|
||||
{
|
||||
struct sd *sd = &SD[sdno];
|
||||
int oldstate = sd->state;
|
||||
int status = 1; /* status to return */
|
||||
|
||||
if (state == oldstate)
|
||||
return 0; /* no change */
|
||||
|
||||
if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
|
||||
return 0;
|
||||
|
||||
if (sd->driveoffset < 0) { /* not allocated space */
|
||||
sd->state = sd_down;
|
||||
if (state != sd_down)
|
||||
return -1;
|
||||
} else { /* space allocated */
|
||||
switch (state) {
|
||||
case sd_down:
|
||||
if ((!flags & setstate_force) /* but gently */
|
||||
&&(sd->plexno >= 0)) /* and we're attached to a plex, */
|
||||
return 0; /* don't do it */
|
||||
break;
|
||||
|
||||
case sd_up:
|
||||
if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
|
||||
return 0; /* not even by force */
|
||||
switch (sd->state) {
|
||||
case sd_obsolete:
|
||||
case sd_down: /* been down, no data lost */
|
||||
if ((sd->plexno) /* we're associated with a plex */
|
||||
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||||
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||||
break;
|
||||
/* XXX Get this right: make sure that other plexes in
|
||||
* the volume cover this address space, otherwise
|
||||
* we make this one sd_up */
|
||||
sd->state = sd_reborn; /* here it is again */
|
||||
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||||
status = -1;
|
||||
break;
|
||||
|
||||
case sd_init: /* brand new */
|
||||
if (flags & setstate_configuring) /* we're doing this while configuring */
|
||||
break;
|
||||
sd->state = sd_empty; /* nothing in it */
|
||||
printf("vinum: subdisk %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(state));
|
||||
status = -1;
|
||||
break;
|
||||
|
||||
case sd_initializing:
|
||||
break; /* go on and do it */
|
||||
|
||||
case sd_empty:
|
||||
if ((sd->plexno) /* we're associated with a plex */
|
||||
&&(((PLEX[sd->plexno].state < plex_firstup) /* and it's not up */
|
||||
||(PLEX[sd->plexno].subdisks > 1)))) /* or it's the only one */
|
||||
break;
|
||||
return 0; /* can't do it */
|
||||
|
||||
default: /* can't do it */
|
||||
/* There's no way to bring subdisks up directly from
|
||||
* other states. First they need to be initialized
|
||||
* or revived */
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default: /* other ones, only internal with force */
|
||||
if (flags & setstate_force == 0) /* no force? What's this? */
|
||||
return 0; /* don't do it */
|
||||
}
|
||||
}
|
||||
sd->state = state;
|
||||
printf("vinum: subdisk %s is %s\n", sd->name, sd_state(sd->state));
|
||||
if ((flags & setstate_norecurse) == 0)
|
||||
set_plex_state(sd->plexno, plex_up, setstate_recursing); /* update plex state */
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Called from request routines when they find
|
||||
* a subdisk which is not kosher. Decide whether
|
||||
* it warrants changing the state. Return
|
||||
* REQUEST_DOWN if we can't use the subdisk,
|
||||
* REQUEST_OK if we can. */
|
||||
enum requeststatus
|
||||
checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
|
||||
{
|
||||
struct plex *plex = &PLEX[sd->plexno];
|
||||
int writeop = (rq->bp->b_flags & B_READ) == 0; /* note if we're writing */
|
||||
|
||||
/* first, see if the plex wants to be accessed */
|
||||
switch (plex->state) {
|
||||
case plex_reviving:
|
||||
/* When writing, we'll write anything that starts
|
||||
* up to the current revive pointer, but we'll
|
||||
* only accept a read which finishes before the
|
||||
* current revive pointer.
|
||||
*/
|
||||
if ((writeop && (diskaddr > plex->revived)) /* write starts after current revive pointer */
|
||||
||((!writeop) && (diskend >= plex->revived))) { /* or read ends after current revive pointer */
|
||||
if (writeop) { /* writing to a consistent down disk */
|
||||
if (DRIVE[sd->driveno].state == drive_up)
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||||
else
|
||||
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||||
}
|
||||
return REQUEST_DOWN; /* that part of the plex is still down */
|
||||
} else if (diskend >= plex->revived) /* write finishes beyond revive pointer */
|
||||
rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case plex_up:
|
||||
case plex_degraded:
|
||||
case plex_flaky:
|
||||
/* We can access the plex: let's see
|
||||
* how the subdisk feels */
|
||||
switch (sd->state) {
|
||||
case sd_up:
|
||||
return REQUEST_OK;
|
||||
|
||||
case sd_reborn:
|
||||
if (writeop)
|
||||
return REQUEST_OK; /* always write to a reborn disk */
|
||||
/* Handle the mapping. We don't want to reject
|
||||
* a read request to a reborn subdisk if that's
|
||||
* all we have. XXX */
|
||||
return REQUEST_DOWN;
|
||||
|
||||
case sd_down:
|
||||
case sd_crashed:
|
||||
if (writeop) { /* writing to a consistent down disk */
|
||||
if (DRIVE[sd->driveno].state == drive_up)
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
|
||||
else
|
||||
set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
|
||||
}
|
||||
return REQUEST_DOWN; /* and it's down one way or another */
|
||||
|
||||
default:
|
||||
return REQUEST_DOWN;
|
||||
}
|
||||
|
||||
default:
|
||||
return REQUEST_DOWN;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
add_defective_region(struct plex *plex, off_t offset, size_t length)
|
||||
{
|
||||
/* XXX get this ordered, and coalesce regions if necessary */
|
||||
if (++plex->defective_regions > plex->defective_region_count)
|
||||
EXPAND(plex->defective_region,
|
||||
struct plexregion,
|
||||
plex->defective_region_count,
|
||||
PLEX_REGION_TABLE_SIZE);
|
||||
plex->defective_region[plex->defective_regions - 1].offset = offset;
|
||||
plex->defective_region[plex->defective_regions - 1].length = length;
|
||||
}
|
||||
|
||||
void
|
||||
add_unmapped_region(struct plex *plex, off_t offset, size_t length)
|
||||
{
|
||||
if (++plex->unmapped_regions > plex->unmapped_region_count)
|
||||
EXPAND(plex->unmapped_region,
|
||||
struct plexregion,
|
||||
plex->unmapped_region_count,
|
||||
PLEX_REGION_TABLE_SIZE);
|
||||
plex->unmapped_region[plex->unmapped_regions - 1].offset = offset;
|
||||
plex->unmapped_region[plex->unmapped_regions - 1].length = length;
|
||||
}
|
||||
|
||||
/* Rebuild a plex free list and set state if
|
||||
* we have a configuration error */
|
||||
void
|
||||
rebuild_plex_unmappedlist(struct plex *plex)
|
||||
{
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
int lastsdend = 0; /* end offset of last subdisk */
|
||||
|
||||
if (plex->unmapped_region != NULL) { /* we're going to rebuild it */
|
||||
Free(plex->unmapped_region);
|
||||
plex->unmapped_region = NULL;
|
||||
plex->unmapped_regions = 0;
|
||||
plex->unmapped_region_count = 0;
|
||||
}
|
||||
if (plex->defective_region != NULL) {
|
||||
Free(plex->defective_region);
|
||||
plex->defective_region = NULL;
|
||||
plex->defective_regions = 0;
|
||||
plex->defective_region_count = 0;
|
||||
}
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
if (sd->plexoffset < lastsdend) { /* overlap */
|
||||
printf("vinum: Plex %s, subdisk %s overlaps previous\n", plex->name, sd->name);
|
||||
set_plex_state(plex->plexno, plex_down, setstate_force); /* don't allow that */
|
||||
} else if (sd->plexoffset > lastsdend) /* gap */
|
||||
add_unmapped_region(plex, lastsdend, sd->plexoffset - lastsdend);
|
||||
else if (sd->state < sd_reborn) /* this part defective */
|
||||
add_defective_region(plex, sd->plexoffset, sd->sectors);
|
||||
lastsdend = sd->plexoffset + sd->sectors;
|
||||
}
|
||||
}
|
||||
|
||||
/* return a state map for the subdisks of a plex */
|
||||
enum sdstates
|
||||
sdstatemap(struct plex *plex, int *sddowncount)
|
||||
{
|
||||
int sdno;
|
||||
enum sdstates statemap = 0; /* note the states we find */
|
||||
|
||||
*sddowncount = 0; /* no subdisks down yet */
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
|
||||
|
||||
switch (sd->state) {
|
||||
case sd_empty:
|
||||
statemap |= sd_emptystate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_init:
|
||||
statemap |= sd_initstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_down:
|
||||
statemap |= sd_downstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_crashed:
|
||||
statemap |= sd_crashedstate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_obsolete:
|
||||
statemap |= sd_obsolete;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_stale:
|
||||
statemap |= sd_stalestate;
|
||||
(*sddowncount)++; /* another unusable subdisk */
|
||||
break;
|
||||
|
||||
case sd_reborn:
|
||||
statemap |= sd_rebornstate;
|
||||
break;
|
||||
|
||||
case sd_up:
|
||||
statemap |= sd_upstate;
|
||||
break;
|
||||
|
||||
default:
|
||||
statemap |= sd_otherstate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return statemap;
|
||||
}
|
||||
|
||||
/* determine the state of the volume relative to this plex */
|
||||
enum volplexstate
|
||||
vpstate(struct plex *plex)
|
||||
{
|
||||
struct volume *vol;
|
||||
enum volplexstate state = volplex_onlyusdown; /* state to return */
|
||||
int plexno;
|
||||
|
||||
if (plex->volno < 0) /* not associated with a volume */
|
||||
return volplex_onlyusdown; /* assume the worst */
|
||||
|
||||
vol = &VOL[plex->volno]; /* point to our volume */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
if (&PLEX[vol->plex[plexno]] == plex) { /* us */
|
||||
if (PLEX[vol->plex[plexno]].state == plex_up) /* are we up? */
|
||||
state |= volplex_onlyus; /* yes */
|
||||
} else {
|
||||
if (PLEX[vol->plex[plexno]].state == plex_up) /* not us */
|
||||
state |= volplex_otherup; /* and when they were up, they were up */
|
||||
else
|
||||
state |= volplex_alldown; /* and when they were down, they were down */
|
||||
}
|
||||
}
|
||||
return state; /* and when they were only halfway up */
|
||||
} /* they were neither up nor down */
|
||||
|
||||
/* Check if all bits b are set in a */
|
||||
int allset(int a, int b);
|
||||
|
||||
int
|
||||
allset(int a, int b)
|
||||
{
|
||||
return (a & b) == b;
|
||||
}
|
||||
|
||||
/* Update the state of a plex dependent on its subdisks.
|
||||
* Also rebuild the unmapped_region and defective_region table */
|
||||
int
|
||||
set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
|
||||
{
|
||||
int sddowncount = 0; /* number of down subdisks */
|
||||
struct plex *plex = &PLEX[plexno]; /* point to our plex */
|
||||
enum plexstate oldstate = plex->state;
|
||||
enum volplexstate vps = vpstate(plex); /* how do we compare with the other plexes? */
|
||||
enum sdstates statemap = sdstatemap(plex, &sddowncount); /* get a map of the subdisk states */
|
||||
|
||||
if ((flags & setstate_force) && (oldstate == state)) /* we're there already, */
|
||||
return 0; /* no change */
|
||||
|
||||
if (plex->state == plex_unallocated) /* no plex to do anything with, */
|
||||
return 0;
|
||||
|
||||
switch (state) {
|
||||
case plex_up:
|
||||
if ((plex->state == plex_initializing) /* we're initializing */
|
||||
&&(statemap != sd_upstate)) /* but SDs aren't up yet */
|
||||
return 0; /* do nothing */
|
||||
|
||||
/* We don't really care what our state was before
|
||||
* if we want to come up. We rely entirely on the
|
||||
* state of our subdisks and our volume */
|
||||
switch (vps) {
|
||||
case volplex_onlyusdown:
|
||||
case volplex_alldown: /* another plex is down, and so are we */
|
||||
if (statemap == sd_upstate) { /* all subdisks ready for action */
|
||||
if ((plex->state == plex_init) /* we're brand spanking new */
|
||||
&&(VOL[plex->volno].flags & VF_CONFIG_SETUPSTATE)) { /* and we consider that up */
|
||||
/* Conceptually, an empty plex does not contain valid data,
|
||||
* but normally we'll see this state when we have just
|
||||
* created a plex, and it's either consistent from earlier,
|
||||
* or we don't care about the previous contents (we're going
|
||||
* to create a file system or use it for swap).
|
||||
*
|
||||
* We need to do this in one swell foop: on the next call
|
||||
* we will no longer be just empty.
|
||||
*
|
||||
* We'll still come back to this function for the remaining
|
||||
* plexes in the volume. They'll be up already, so that
|
||||
* doesn't change anything, but it's not worth the additional
|
||||
* code to stop doing it. */
|
||||
struct volume *vol = &VOL[plex->volno];
|
||||
int plexno;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++)
|
||||
PLEX[vol->plex[plexno]].state = plex_up;
|
||||
}
|
||||
plex->state = plex_up; /* bring up up, anyway */
|
||||
} else
|
||||
plex->state = plex_down;
|
||||
break;
|
||||
|
||||
case volplex_onlyusup: /* only we are up: others are down */
|
||||
case volplex_onlyus: /* we're up and alone */
|
||||
if ((statemap == sd_upstate) /* subdisks all up */
|
||||
||(statemap == sd_emptystate)) /* or all empty */
|
||||
plex->state = plex_up; /* go for it */
|
||||
else if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||||
plex->state = plex_flaky;
|
||||
else if (statemap & (sd_upstate | sd_reborn)) /* some up or reborn, */
|
||||
plex->state = plex_degraded; /* so far no corruption */
|
||||
else
|
||||
plex->state = plex_faulty;
|
||||
break;
|
||||
|
||||
case volplex_otherup: /* another plex is up */
|
||||
case volplex_otherupdown: /* other plexes are up and down */
|
||||
if ((statemap == sd_upstate) /* subdisks all up */
|
||||
||(statemap == sd_emptystate) /* or all empty */
|
||||
) {
|
||||
/* Is the data in all subdisks valid? */
|
||||
if (statemap == statemap & (sd_downstate | sd_rebornstate | sd_upstate))
|
||||
break; /* yes, we can bring the plex up */
|
||||
plex->state = plex_reviving; /* we need reviving */
|
||||
return EAGAIN;
|
||||
} else
|
||||
plex->state = plex_faulty; /* still in error */
|
||||
break;
|
||||
|
||||
case volplex_allup: /* all plexes are up */
|
||||
case volplex_someup:
|
||||
if ((statemap & (sd_upstate | sd_reborn)) == statemap) /* all up or reborn, */
|
||||
break; /* no change */
|
||||
else
|
||||
plex->state = plex_degraded; /* we're not all there */
|
||||
}
|
||||
|
||||
if (plex->state != oldstate)
|
||||
break;
|
||||
return 0; /* no change */
|
||||
|
||||
case plex_down: /* want to take it down */
|
||||
if (((vps == volplex_onlyus) /* we're the only one up */
|
||||
||(vps == volplex_onlyusup)) /* we're the only one up */
|
||||
&&(!(flags & setstate_force))) /* and we don't want to use force */
|
||||
return 0; /* can't do it */
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
/* This is only requested by the driver.
|
||||
* Trust ourselves */
|
||||
case plex_faulty:
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
case plex_initializing:
|
||||
/* XXX consider what safeguards we need here */
|
||||
if ((flags & setstate_force) == 0)
|
||||
return 0;
|
||||
plex->state = state; /* do it */
|
||||
break;
|
||||
|
||||
/* What's this? */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
printf("vinum: plex %s is %s\n", plex->name, plex_state(plex->state));
|
||||
/* Now see what we have left, and whether
|
||||
* we're taking the volume down */
|
||||
if (plex->volno >= 0) { /* we have a volume */
|
||||
struct volume *vol = &VOL[plex->volno];
|
||||
|
||||
vps = vpstate(plex); /* get our combined state again */
|
||||
if ((flags & setstate_norecurse) == 0) { /* we can recurse */
|
||||
if ((vol->state == volume_up)
|
||||
&& (vps == volplex_alldown)) /* and we're all down */
|
||||
set_volume_state(plex->volno, volume_down, setstate_recursing); /* take our volume down */
|
||||
else if ((vol->state == volume_down)
|
||||
&& (vps & (volplex_otherup | volplex_onlyusup))) /* and at least one is up */
|
||||
set_volume_state(plex->volno, volume_up, setstate_recursing); /* bring our volume up */
|
||||
}
|
||||
}
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Update the state of a plex dependent on its plexes.
|
||||
* Also rebuild the unmapped_region and defective_region table */
|
||||
int
|
||||
set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
|
||||
{
|
||||
int plexno;
|
||||
enum plexstates {
|
||||
plex_downstate = 1, /* found a plex which is down */
|
||||
plex_degradedstate = 2, /* found a plex which is halfway up */
|
||||
plex_upstate = 4 /* found a plex which is completely up */
|
||||
};
|
||||
|
||||
int plexstatemap = 0; /* note the states we find */
|
||||
struct volume *vol = &VOL[volno]; /* point to our volume */
|
||||
|
||||
if (vol->state == state) /* we're there already */
|
||||
return 0; /* no change */
|
||||
if (vol->state == volume_unallocated) /* no volume to do anything with, */
|
||||
return 0;
|
||||
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
|
||||
switch (plex->state) {
|
||||
case plex_degraded:
|
||||
case plex_flaky:
|
||||
case plex_reviving:
|
||||
plexstatemap |= plex_degradedstate;
|
||||
break;
|
||||
|
||||
case plex_up:
|
||||
plexstatemap |= plex_upstate;
|
||||
break;
|
||||
|
||||
default:
|
||||
plexstatemap |= plex_downstate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == volume_up) { /* want to come up */
|
||||
if (plexstatemap & plex_upstate) { /* we have a plex which is completely up */
|
||||
vol->state = volume_up; /* did it */
|
||||
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
/* Here we should check whether we have enough
|
||||
* coverage for the complete volume. Writeme XXX */
|
||||
} else if (state == volume_down) { /* want to go down */
|
||||
if ((vol->opencount == 0) /* not open */
|
||||
||(flags & setstate_force != 0)) { /* or we're forcing */
|
||||
vol->state = volume_down;
|
||||
printf("vinum: volume %s is %s\n", vol->name, volume_state(vol->state));
|
||||
if ((flags & (setstate_configuring | setstate_recursing)) == 0) /* save config now */
|
||||
save_config();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0; /* no change */
|
||||
}
|
||||
|
||||
/* Start an object, in other words do what we can to get it up.
|
||||
* This is called from vinumioctl (VINUMSTART).
|
||||
* Return error indications via ioctl_reply
|
||||
*/
|
||||
void
|
||||
start_object(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int status;
|
||||
int realstatus; /* what we really have */
|
||||
int objindex = data->index; /* data gets overwritten */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||||
|
||||
switch (data->type) {
|
||||
case drive_object:
|
||||
status = set_drive_state(objindex, drive_up, setstate_none);
|
||||
realstatus = DRIVE[objindex].state == drive_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case sd_object:
|
||||
status = set_sd_state(objindex, sd_up, setstate_none); /* set state */
|
||||
realstatus = SD[objindex].state == sd_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
if (PLEX[objindex].state == plex_reviving) { /* reviving, */
|
||||
ioctl_reply->error = revive_block(objindex); /* revive another block */
|
||||
ioctl_reply->msg[0] = '\0'; /* no comment */
|
||||
return;
|
||||
}
|
||||
status = set_plex_state(objindex, plex_up, setstate_none);
|
||||
realstatus = PLEX[objindex].state == plex_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
status = set_volume_state(objindex, volume_up, setstate_none);
|
||||
realstatus = VOL[objindex].state == volume_up; /* set status on whether we really did it */
|
||||
break;
|
||||
|
||||
default:
|
||||
ioctl_reply->error = EINVAL;
|
||||
strcpy(ioctl_reply->msg, "Invalid object type");
|
||||
return;
|
||||
}
|
||||
/* There's no point in saying anything here:
|
||||
* the userland program does it better */
|
||||
ioctl_reply->msg[0] = '\0';
|
||||
if (realstatus == 0) /* couldn't do it */
|
||||
ioctl_reply->error = EINVAL;
|
||||
else
|
||||
ioctl_reply->error = 0;
|
||||
}
|
||||
|
||||
/* Stop an object, in other words do what we can to get it down
|
||||
* This is called from vinumioctl (VINUMSTOP).
|
||||
* Return error indications via ioctl_reply.
|
||||
*/
|
||||
void
|
||||
stop_object(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int status = 1;
|
||||
int objindex = data->index; /* save the number from change */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
|
||||
|
||||
switch (data->type) {
|
||||
case drive_object:
|
||||
status = set_drive_state(objindex, drive_down, data->force);
|
||||
break;
|
||||
|
||||
case sd_object:
|
||||
status = set_sd_state(objindex, sd_down, data->force);
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
status = set_plex_state(objindex, plex_down, data->force);
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
status = set_volume_state(objindex, volume_down, data->force);
|
||||
break;
|
||||
|
||||
default:
|
||||
ioctl_reply->error = EINVAL;
|
||||
strcpy(ioctl_reply->msg, "Invalid object type");
|
||||
return;
|
||||
}
|
||||
ioctl_reply->msg[0] = '\0';
|
||||
if (status == 0) /* couldn't do it */
|
||||
ioctl_reply->error = EINVAL;
|
||||
else
|
||||
ioctl_reply->error = 0;
|
||||
}
|
||||
|
||||
/* VINUM_SETSTATE ioctl: set an object state
|
||||
* msg is the message passed by the user */
|
||||
void
|
||||
setstate(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
|
||||
|
||||
switch (msg->state) {
|
||||
case object_down:
|
||||
stop_object(msg);
|
||||
break;
|
||||
|
||||
case object_initializing:
|
||||
switch (msg->type) {
|
||||
case sd_object:
|
||||
sd = &SD[msg->index];
|
||||
if ((msg->index >= vinum_conf.subdisks_used)
|
||||
|| (sd->state == sd_unallocated)) {
|
||||
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||||
ioctl_reply->error = EFAULT;
|
||||
return;
|
||||
}
|
||||
set_sd_state(msg->index, sd_initializing, msg->force);
|
||||
if (sd->state != sd_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
} else
|
||||
ioctl_reply->error = 0;
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
plex = &PLEX[msg->index];
|
||||
if ((msg->index >= vinum_conf.plexes_used)
|
||||
|| (plex->state == plex_unallocated)) {
|
||||
sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
|
||||
ioctl_reply->error = EFAULT;
|
||||
return;
|
||||
}
|
||||
set_plex_state(msg->index, plex_initializing, msg->force);
|
||||
if (plex->state != plex_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
} else {
|
||||
ioctl_reply->error = 0;
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
|
||||
if (sd->state != sd_initializing) {
|
||||
strcpy(ioctl_reply->msg, "Can't set state");
|
||||
ioctl_reply->error = EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
strcpy(ioctl_reply->msg, "Invalid object");
|
||||
ioctl_reply->error = EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
case object_up:
|
||||
start_object(msg);
|
||||
}
|
||||
}
|
88
sys/modules/vinum/statetexts.h
Normal file
88
sys/modules/vinum/statetexts.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: COPYRIGHT,v 1.1 1998/03/05 06:07:05 grog Exp grog $
|
||||
*/
|
||||
/* Created by ./makestatetext on Tue 4 Aug 15:53:16 CST 1998. Do not edit */
|
||||
|
||||
/* Drive state texts */
|
||||
char *drivestatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"down",
|
||||
"coming_up",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"init",
|
||||
"initializing",
|
||||
"empty",
|
||||
"obsolete",
|
||||
"stale",
|
||||
"crashed",
|
||||
"down",
|
||||
"reborn",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"init",
|
||||
"faulty",
|
||||
"down",
|
||||
"reviving",
|
||||
"initializing",
|
||||
"corrupt",
|
||||
"degraded",
|
||||
"flaky",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"down",
|
||||
"up",
|
||||
};
|
211
sys/modules/vinum/util.c
Normal file
211
sys/modules/vinum/util.c
Normal file
@ -0,0 +1,211 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: util.c,v 1.7 1998/08/07 09:23:10 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file contains utility routines used both in kernel and user context */
|
||||
|
||||
#include "vinumhdr.h"
|
||||
#include "statetexts.h"
|
||||
#ifndef REALLYKERNEL
|
||||
#include <stdio.h>
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
#endif
|
||||
|
||||
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
|
||||
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
|
||||
/* Return drive state as a string */
|
||||
char *
|
||||
drive_state(enum drivestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(drive)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return drivestatetext[state];
|
||||
}
|
||||
|
||||
/* Return volume state as a string */
|
||||
char *
|
||||
volume_state(enum volumestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(vol)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return volstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex state as a string */
|
||||
char *
|
||||
plex_state(enum plexstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(plex)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return plexstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex organization as a string */
|
||||
char *
|
||||
plex_org(enum plexorg org)
|
||||
{
|
||||
switch (org) {
|
||||
case plex_disorg: /* disorganized */
|
||||
return "disorg";
|
||||
break;
|
||||
|
||||
case plex_concat: /* concatenated plex */
|
||||
return "concat";
|
||||
break;
|
||||
|
||||
case plex_striped: /* striped plex */
|
||||
return "striped";
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
sprintf(numeric_state, "Invalid org %d", (int) org);
|
||||
return numeric_state;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return sd state as a string */
|
||||
char *
|
||||
sd_state(enum sdstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(sd)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return sdstatetext[state];
|
||||
}
|
||||
|
||||
/* Now convert in the other direction */
|
||||
/* These are currently used only internally,
|
||||
* so we don't do too much error checking */
|
||||
enum drivestate
|
||||
DriveState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(drive); i++)
|
||||
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
|
||||
return (enum drivestate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum sdstate
|
||||
SdState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(sd); i++)
|
||||
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
|
||||
return (enum sdstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum plexstate
|
||||
PlexState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(plex); i++)
|
||||
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
|
||||
return (enum plexstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum volumestate
|
||||
VolState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(vol); i++)
|
||||
if (strcmp(text, volstatetext[i]) == 0) /* found it */
|
||||
return (enum volstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Take a number with an optional scale factor and convert
|
||||
* it to a number of bytes.
|
||||
*
|
||||
* The scale factors are:
|
||||
*
|
||||
* b blocks (of 512 bytes)
|
||||
* k kilobytes (1024 bytes)
|
||||
* m megabytes (of 1024 * 1024 bytes)
|
||||
* g gigabytes (of 1024 * 1024 * 1024 bytes)
|
||||
*/
|
||||
u_int64_t
|
||||
sizespec(char *spec)
|
||||
{
|
||||
u_int64_t size;
|
||||
char *s;
|
||||
|
||||
size = 0;
|
||||
s = spec;
|
||||
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
|
||||
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
|
||||
size = size * 10 + *s++ - '0'; /* convert it */
|
||||
switch (*s) {
|
||||
case '\0':
|
||||
return size;
|
||||
|
||||
case 'B':
|
||||
case 'b':
|
||||
return size * 512;
|
||||
|
||||
case 'K':
|
||||
case 'k':
|
||||
return size * 1024;
|
||||
|
||||
case 'M':
|
||||
case 'm':
|
||||
return size * 1024 * 1024;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return size * 1024 * 1024 * 1024;
|
||||
}
|
||||
}
|
||||
#ifdef REALLYKERNEL
|
||||
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
|
||||
#else
|
||||
fprintf(stderr, "Invalid length specification: %s", spec);
|
||||
longjmp(command_fail, -1);
|
||||
#endif
|
||||
/* NOTREACHED */
|
||||
return -1;
|
||||
}
|
512
sys/modules/vinum/vinum.c
Normal file
512
sys/modules/vinum/vinum.c
Normal file
@ -0,0 +1,512 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinum.c,v 1.19 1998/08/13 05:24:02 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "sys/sysproto.h" /* for sync(2) */
|
||||
#ifdef DEBUG
|
||||
#include <sys/reboot.h>
|
||||
int debug = 0;
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
struct proc *myproc;
|
||||
|
||||
#if __FreeBSD__ < 3
|
||||
STATIC struct cdevsw vinum_cdevsw;
|
||||
STATIC struct bdevsw vinum_bdevsw =
|
||||
{
|
||||
vinumopen, vinumclose, vinumstrategy, vinumioctl,
|
||||
vinumdump, vinumsize, 0,
|
||||
"vinum", &vinum_cdevsw, -1
|
||||
};
|
||||
#else /* goodbye, bdevsw */
|
||||
STATIC struct cdevsw vinum_cdevsw =
|
||||
{
|
||||
vinumopen, vinumclose, vinumread, vinumwrite,
|
||||
vinumioctl, nostop, nullreset, nodevtotty,
|
||||
seltrue, nommap, vinumstrategy, "vinum",
|
||||
NULL, -1, vinumdump, vinumsize,
|
||||
D_DISK, 0, -1
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Called by main() during pseudo-device attachment. */
|
||||
STATIC void vinumattach(void *);
|
||||
|
||||
STATIC void vinumgetdisklabel(dev_t);
|
||||
void vinum_scandisk(void);
|
||||
int vinum_inactive(void);
|
||||
void free_vinum(int);
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
extern jmp_buf command_fail; /* return here if config fails */
|
||||
|
||||
struct _vinum_conf vinum_conf; /* configuration information */
|
||||
|
||||
STATIC int vinum_devsw_installed = 0;
|
||||
|
||||
/*
|
||||
* Called by main() during pseudo-device attachment. All we need
|
||||
* to do is allocate enough space for devices to be configured later, and
|
||||
* add devsw entries.
|
||||
*/
|
||||
void
|
||||
vinumattach(void *dummy)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
char *buf; /* pointer to temporary buffer */
|
||||
struct _ioctl_reply *ioctl_reply; /* struct to return */
|
||||
struct uio uio;
|
||||
struct iovec iovec;
|
||||
|
||||
/* modload should prevent multiple loads, so this is worth a panic */
|
||||
if ((vinum_conf.flags & VF_LOADED) != NULL)
|
||||
panic("vinum: already loaded");
|
||||
|
||||
printf("vinum: loaded\n");
|
||||
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
|
||||
|
||||
/* We don't have a p pointer here, so take it from curproc */
|
||||
myproc = curproc;
|
||||
#if __FreeBSD__ < 3
|
||||
bdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_bdevsw);
|
||||
#else
|
||||
cdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vinum_cdevsw);
|
||||
#endif
|
||||
#ifdef DEVFS
|
||||
#error DEVFS not finished yet
|
||||
#endif
|
||||
|
||||
uio.uio_iov = &iovec;
|
||||
uio.uio_iovcnt = 1; /* just one buffer */
|
||||
uio.uio_offset = 0; /* start at the beginning */
|
||||
uio.uio_resid = 512; /* one sector */
|
||||
uio.uio_segflg = UIO_SYSSPACE; /* we're in system space */
|
||||
uio.uio_rw = UIO_READ; /* do we need this? */
|
||||
uio.uio_procp = curproc; /* do it for our own process */
|
||||
|
||||
iovec.iov_len = 512;
|
||||
buf = (char *) Malloc(iovec.iov_len); /* get a buffer */
|
||||
CHECKALLOC(buf, "vinum: no memory\n"); /* can't get 512 bytes? */
|
||||
iovec.iov_base = buf; /* read into buf */
|
||||
|
||||
/* allocate space: drives... */
|
||||
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
|
||||
CHECKALLOC(DRIVE, "vinum: no memory\n");
|
||||
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
|
||||
vinum_conf.drives_used = 0; /* and number in use */
|
||||
|
||||
/* volumes, ... */
|
||||
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
|
||||
CHECKALLOC(VOL, "vinum: no memory\n");
|
||||
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
|
||||
vinum_conf.volumes_used = 0; /* and number in use */
|
||||
|
||||
/* plexes, ... */
|
||||
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
|
||||
CHECKALLOC(PLEX, "vinum: no memory\n");
|
||||
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
|
||||
vinum_conf.plexes_used = 0; /* and number in use */
|
||||
|
||||
/* and subdisks */
|
||||
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
|
||||
CHECKALLOC(SD, "vinum: no memory\n");
|
||||
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
|
||||
vinum_conf.subdisks_used = 0; /* and number in use */
|
||||
|
||||
ioctl_reply = NULL; /* no reply on longjmp */
|
||||
}
|
||||
|
||||
|
||||
#ifdef ACTUALLY_LKM_NOT_KERNEL /* stuff for LKMs */
|
||||
|
||||
/* Check if we have anything open. If so, return 0 (not inactive),
|
||||
* otherwise 1 (inactive) */
|
||||
int
|
||||
vinum_inactive(void)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
int can_do = 1; /* assume we can do it */
|
||||
|
||||
lock_config();
|
||||
for (i = 0; i < vinum_conf.volumes_used; i++) {
|
||||
if (VOL[i].pid != NULL) { /* volume is open */
|
||||
can_do = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unlock_config();
|
||||
return can_do;
|
||||
}
|
||||
|
||||
/* Free all structures.
|
||||
* If cleardrive is 0, save the configuration; otherwise
|
||||
* remove the configuration from the drive.
|
||||
*
|
||||
* Before coming here, ensure that no volumes are open.
|
||||
*/
|
||||
void
|
||||
free_vinum(int cleardrive)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int i;
|
||||
|
||||
if (cleardrive) {
|
||||
for (i = 0; i < vinum_conf.drives_used; i++)
|
||||
remove_drive(i); /* remove the drive */
|
||||
} else { /* keep the config */
|
||||
save_config();
|
||||
if (DRIVE != NULL) {
|
||||
for (i = 0; i < vinum_conf.drives_used; i++)
|
||||
free_drive(&DRIVE[i]); /* close files and things */
|
||||
Free(DRIVE);
|
||||
}
|
||||
}
|
||||
if (SD != NULL)
|
||||
Free(SD);
|
||||
if (PLEX != NULL) {
|
||||
for (i = 0; i < vinum_conf.plexes_used; i++) {
|
||||
struct plex *plex = &vinum_conf.plex[i];
|
||||
|
||||
if (plex->state != plex_unallocated) { /* we have real data there */
|
||||
if (plex->sdnos)
|
||||
Free(plex->sdnos);
|
||||
if (plex->unmapped_regions)
|
||||
Free(plex->unmapped_region);
|
||||
if (plex->defective_regions)
|
||||
Free(plex->defective_region);
|
||||
}
|
||||
}
|
||||
Free(PLEX);
|
||||
}
|
||||
if (VOL != NULL)
|
||||
Free(VOL);
|
||||
bzero(&vinum_conf, sizeof(vinum_conf));
|
||||
}
|
||||
|
||||
MOD_MISC(vinum);
|
||||
|
||||
/*
|
||||
* Function called when loading the driver.
|
||||
*/
|
||||
STATIC int
|
||||
vinum_load(struct lkm_table *lkmtp, int cmd)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
/* Debugger ("vinum_load"); */
|
||||
vinumattach(NULL);
|
||||
return 0; /* OK */
|
||||
}
|
||||
|
||||
/*
|
||||
* Function called when unloading the driver.
|
||||
*/
|
||||
STATIC int
|
||||
vinum_unload(struct lkm_table *lkmtp, int cmd)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
if (vinum_inactive()) { /* is anything open? */
|
||||
struct sync_args dummyarg =
|
||||
{0};
|
||||
#if __FreeBSD__ < 3
|
||||
int retval;
|
||||
#endif
|
||||
|
||||
printf("vinum: unloaded\n");
|
||||
#if __FreeBSD__ < 3
|
||||
sync(curproc, &dummyarg, &retval); /* write out buffers */
|
||||
#else
|
||||
sync(curproc, &dummyarg); /* write out buffers */
|
||||
#endif
|
||||
free_vinum(0); /* no: clean up */
|
||||
#if __FreeBSD__ < 3
|
||||
bdevsw[BDEV_MAJOR] = NULL; /* clear bdevsw */
|
||||
#endif
|
||||
cdevsw[CDEV_MAJOR] = NULL; /* and cdevsw */
|
||||
return 0;
|
||||
} else
|
||||
return EBUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatcher function for the module (load/unload/stat).
|
||||
*/
|
||||
int
|
||||
vinum_mod(struct lkm_table *lkmtp, int cmd, int ver)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
MOD_DISPATCH(vinum, /* module name */
|
||||
lkmtp, /* LKM table */
|
||||
cmd, /* command */
|
||||
ver,
|
||||
vinum_load, /* load with this function */
|
||||
vinum_unload, /* and unload with this */
|
||||
lkm_nullcmd);
|
||||
}
|
||||
|
||||
#else /* not LKM */
|
||||
#error "This driver must be compiled as a loadable kernel module"
|
||||
#endif /* LKM */
|
||||
|
||||
/* ARGSUSED */
|
||||
/* Open a vinum object
|
||||
* At the moment, we only open volumes and the
|
||||
* super device. It's a nice concept to be
|
||||
* able to open drives, subdisks and plexes, but
|
||||
* I can't think what good it could be */
|
||||
int
|
||||
vinumopen(dev_t dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
int s; /* spl */
|
||||
int error;
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
struct sd *sd;
|
||||
struct devcode *device;
|
||||
|
||||
device = (struct devcode *) &dev;
|
||||
|
||||
error = 0;
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
index = VOLNO(dev);
|
||||
if (index >= vinum_conf.volumes_used)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
s = splhigh(); /* quick lock */
|
||||
if (error)
|
||||
return error;
|
||||
if (vol->opencount == 0)
|
||||
vol->openflags = flags; /* set our flags */
|
||||
vol->opencount++;
|
||||
vol->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
if (VOLNO(dev) >= vinum_conf.volumes_used)
|
||||
return ENXIO;
|
||||
index = PLEXNO(dev); /* get plex index in vinum_conf */
|
||||
if (index >= vinum_conf.plexes_used)
|
||||
return ENXIO; /* no such device */
|
||||
plex = &PLEX[index];
|
||||
|
||||
switch (plex->state) {
|
||||
case plex_unallocated:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
s = splhigh();
|
||||
if (plex->pid /* it's open already */
|
||||
&& (plex->pid != p->p_pid)) { /* and not by us, */
|
||||
splx(s);
|
||||
return EBUSY; /* one at a time, please */
|
||||
}
|
||||
plex->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
|
||||
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
|
||||
return ENXIO; /* no such device */
|
||||
index = SDNO(dev); /* get the subdisk number */
|
||||
if (index >= vinum_conf.subdisks_used)
|
||||
return ENXIO; /* no such device */
|
||||
sd = &SD[index];
|
||||
|
||||
/* Opening a subdisk is always a special operation, so we
|
||||
* ignore the state as long as it represents a real subdisk */
|
||||
switch (sd->state) {
|
||||
case sd_unallocated:
|
||||
case sd_uninit:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
s = splhigh();
|
||||
if (sd->pid /* it's open already */
|
||||
&& (sd->pid != p->p_pid)) { /* and not by us, */
|
||||
splx(s);
|
||||
return EBUSY; /* one at a time, please */
|
||||
}
|
||||
sd->pid = p->p_pid; /* and say who we are (do we need this? XXX) */
|
||||
splx(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
return ENODEV; /* don't know what to do with these */
|
||||
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
if (p->p_ucred->cr_uid == 0) { /* root calling, */
|
||||
vinum_conf.opencount++; /* one more opener */
|
||||
return 0; /* no worries opening super dev */
|
||||
} else
|
||||
return EPERM; /* you can't do that! */
|
||||
}
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
int
|
||||
vinumclose(dev_t dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
struct sd *sd;
|
||||
struct devcode *device = (struct devcode *) &dev;
|
||||
|
||||
index = VOLNO(dev);
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
if (index >= vinum_conf.volumes_used)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
vol->opencount = 0; /* reset our flags */
|
||||
vol->pid = NULL; /* and forget who owned us */
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
if (VOLNO(dev) >= vinum_conf.volumes_used)
|
||||
return ENXIO;
|
||||
index = PLEXNO(dev); /* get plex index in vinum_conf */
|
||||
if (index >= vinum_conf.plexes_used)
|
||||
return ENXIO; /* no such device */
|
||||
plex = &PLEX[index];
|
||||
plex->pid = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
if ((VOLNO(dev) >= vinum_conf.volumes_used) || /* no such volume */
|
||||
(PLEXNO(dev) >= vinum_conf.plexes_used)) /* or no such plex */
|
||||
return ENXIO; /* no such device */
|
||||
index = SDNO(dev); /* get the subdisk number */
|
||||
if (index >= vinum_conf.subdisks_used)
|
||||
return ENXIO; /* no such device */
|
||||
sd = &SD[index];
|
||||
sd->pid = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
if (p->p_ucred->cr_uid == 0) /* root calling, */
|
||||
vinum_conf.opencount--; /* one less opener */
|
||||
return 0; /* no worries closing super dev */
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
default:
|
||||
return ENODEV; /* don't know what to do with these */
|
||||
}
|
||||
}
|
||||
|
||||
/* size routine */
|
||||
int
|
||||
vinumsize(dev_t dev)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
struct volume *vol;
|
||||
int size;
|
||||
|
||||
/* XXX This is bogus. We don't need to open
|
||||
* a device to find its size */
|
||||
vol = &VOL[VOLNO(dev)];
|
||||
|
||||
if (vol->state == volume_up)
|
||||
size = vol->size;
|
||||
else
|
||||
return 0; /* err on the size of conservatism */
|
||||
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int
|
||||
vinumdump(dev_t dev)
|
||||
{
|
||||
/* Not implemented. */
|
||||
return ENXIO;
|
||||
}
|
214
sys/modules/vinum/vinumext.h
Normal file
214
sys/modules/vinum/vinumext.h
Normal file
@ -0,0 +1,214 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumext.h,v 1.14 1998/08/11 00:03:57 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* vinumext.h: external definitions */
|
||||
|
||||
extern struct _vinum_conf vinum_conf; /* configuration information */
|
||||
|
||||
#ifdef DEBUG
|
||||
extern debug; /* debug flags */
|
||||
#endif
|
||||
|
||||
#define CHECKALLOC(ptr, msg) \
|
||||
if (ptr == NULL) \
|
||||
{ \
|
||||
printf (msg); \
|
||||
longjmp (command_fail, -1); \
|
||||
}
|
||||
#ifndef KERNEL
|
||||
struct vnode;
|
||||
struct proc;
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL
|
||||
int give_sd_to_plex(int plexno, int sdno);
|
||||
int give_plex_to_volume(int volno, int plexno);
|
||||
int check_drive(char *);
|
||||
enum drive_label_info read_drive_label(struct drive *drive);
|
||||
int parse_config(char *, struct keywordset *);
|
||||
int parse_user_config(char *cptr, struct keywordset *keyset);
|
||||
u_int64_t sizespec(char *spec);
|
||||
int volume_index(struct volume *volume);
|
||||
int plex_index(struct plex *plex);
|
||||
int sd_index(struct sd *sd);
|
||||
int drive_index(struct drive *drive);
|
||||
int my_plex(int volno, int plexno);
|
||||
int my_sd(int plexno, int sdno);
|
||||
int get_empty_drive(void);
|
||||
int find_drive(const char *name, int create);
|
||||
int find_drive_by_dev(const char *devname, int create);
|
||||
int get_empty_sd(void);
|
||||
int find_subdisk(const char *name, int create);
|
||||
void free_sd(int sdno);
|
||||
void free_volume(int volno);
|
||||
int get_empty_plex(void);
|
||||
int find_plex(const char *name, int create);
|
||||
void free_plex(int plexno);
|
||||
int get_empty_volume(void);
|
||||
int find_volume(const char *name, int create);
|
||||
void config_subdisk(void);
|
||||
void config_plex(void);
|
||||
void config_volume(void);
|
||||
void config_drive(void);
|
||||
void updateconfig(int);
|
||||
void update_sd_config(int sdno, int kernelstate);
|
||||
void update_plex_config(int plexno, int kernelstate);
|
||||
void update_volume_config(int volno, int kernelstate);
|
||||
void update_config(void);
|
||||
void drive_io_done(struct buf *);
|
||||
int save_config(void);
|
||||
void write_config(char *, int);
|
||||
int start_config(void);
|
||||
void finish_config(int);
|
||||
void remove(struct vinum_ioctl_msg *msg);
|
||||
void remove_drive_entry(int driveno, int force, int recurse);
|
||||
void remove_sd_entry(int sdno, int force, int recurse);
|
||||
void remove_plex_entry(int plexno, int force, int recurse);
|
||||
void remove_volume_entry(int volno, int force, int recurse);
|
||||
|
||||
void checkernel(char *);
|
||||
int open_drive(struct drive *, struct proc *);
|
||||
void close_drive(struct drive *drive);
|
||||
int driveio(struct drive *, void *, size_t, off_t, int);
|
||||
/* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
|
||||
#define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) */
|
||||
int set_drive_parms(struct drive *drive);
|
||||
int init_drive(struct drive *);
|
||||
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
|
||||
void throw_rude_remark(int, char *,...);
|
||||
|
||||
int read_drive(struct drive *drive, void *buf, size_t length, off_t offset);
|
||||
int write_drive(struct drive *drive, void *buf, size_t length, off_t offset);
|
||||
void format_config(char *config, int len);
|
||||
void checkkernel(char *op);
|
||||
void free_drive(struct drive *drive);
|
||||
void down_drive(struct drive *drive);
|
||||
void remove_drive(int driveno);
|
||||
|
||||
/* I/O */
|
||||
d_open_t vinumopen;
|
||||
d_close_t vinumclose;
|
||||
d_strategy_t vinumstrategy;
|
||||
d_ioctl_t vinumioctl;
|
||||
d_dump_t vinumdump;
|
||||
d_psize_t vinumsize;
|
||||
d_read_t vinumread;
|
||||
d_write_t vinumwrite;
|
||||
|
||||
int vinumstart(struct buf *bp, int reviveok);
|
||||
int launch_requests(struct request *rq, int reviveok);
|
||||
|
||||
/* XXX Do we need this? */
|
||||
int vinumpart(dev_t);
|
||||
|
||||
/* Memory allocation */
|
||||
void vinum_meminfo(caddr_t data);
|
||||
int vinum_mallocinfo(caddr_t data);
|
||||
|
||||
void expand_table(void **, int, int);
|
||||
|
||||
void add_defective_region(struct plex *plex, off_t offset, size_t length);
|
||||
void add_unmapped_region(struct plex *plex, off_t offset, size_t length);
|
||||
void rebuild_plex_unmappedlist(struct plex *plex);
|
||||
struct request;
|
||||
struct rqgroup *allocrqg(struct request *rq, int elements);
|
||||
void deallocrqg(struct rqgroup *rqg);
|
||||
|
||||
/* State transitions */
|
||||
int set_drive_state(int driveno, enum drivestate state, int force);
|
||||
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
|
||||
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
|
||||
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
|
||||
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
|
||||
void get_volume_label(struct volume *vol, struct disklabel *lp);
|
||||
int write_volume_label(int);
|
||||
void start_object(struct vinum_ioctl_msg *);
|
||||
void stop_object(struct vinum_ioctl_msg *);
|
||||
void setstate(struct vinum_ioctl_msg *msg);
|
||||
void vinum_label(int);
|
||||
int vinum_writedisklabel(struct volume *, struct disklabel *);
|
||||
int initsd(int);
|
||||
|
||||
int restart_plex(int plexno);
|
||||
int revive_block(int plexno);
|
||||
|
||||
/* Auxiliary functions */
|
||||
enum sdstates sdstatemap(struct plex *plex, int *sddowncount);
|
||||
enum volplexstate vpstate(struct plex *plex);
|
||||
#endif
|
||||
|
||||
enum keyword get_keyword(char *, struct keywordset *);
|
||||
void listconfig(void);
|
||||
char *drive_state(enum drivestate);
|
||||
char *volume_state(enum volumestate);
|
||||
char *plex_state(enum plexstate);
|
||||
char *plex_org(enum plexorg);
|
||||
char *sd_state(enum sdstate);
|
||||
enum drivestate DriveState(char *text);
|
||||
enum sdstate SdState(char *text);
|
||||
enum plexstate PlexState(char *text);
|
||||
enum volumestate VolState(char *text);
|
||||
struct drive *validdrive(int driveno, struct _ioctl_reply *);
|
||||
struct sd *validsd(int sdno, struct _ioctl_reply *);
|
||||
struct plex *validplex(int plexno, struct _ioctl_reply *);
|
||||
struct volume *validvol(int volno, struct _ioctl_reply *);
|
||||
int tokenize(char *, char *[]);
|
||||
void resetstats(struct vinum_ioctl_msg *msg);
|
||||
|
||||
/* Locking */
|
||||
int lockvol(struct volume *vol);
|
||||
void unlockvol(struct volume *vol);
|
||||
int lockplex(struct plex *plex);
|
||||
void unlockplex(struct plex *plex);
|
||||
int lockrange(struct plex *plex, off_t first, off_t last);
|
||||
void unlockrange(struct plex *plex, off_t first, off_t last);
|
||||
int lock_config(void);
|
||||
void unlock_config(void);
|
||||
|
||||
#ifdef DEBUG
|
||||
#define expandrq(prq) \
|
||||
{ \
|
||||
expand_table ((void **) &prq->rqe, \
|
||||
prq->requests * sizeof (struct rqelement), \
|
||||
(prq->requests + RQELTS) * sizeof (struct rqelement) ); \
|
||||
bzero (&prq->rqe [prq->requests], RQELTS * sizeof (struct rqelement)); \
|
||||
prq->rqcount += RQELTS; \
|
||||
}
|
||||
#else
|
||||
void expandrq(struct plexrq *);
|
||||
#endif
|
104
sys/modules/vinum/vinumhdr.h
Normal file
104
sys/modules/vinum/vinumhdr.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Header files used by all modules */
|
||||
/* $Id: vinumhdr.h,v 1.7 1998/08/07 04:41:18 grog Exp grog $ */
|
||||
|
||||
#ifdef KERNEL
|
||||
#define REALLYKERNEL
|
||||
#endif
|
||||
#include <sys/param.h>
|
||||
#ifdef REALLYKERNEL
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#endif
|
||||
#ifdef DEVFS
|
||||
#error "DEVFS code not complete yet"
|
||||
#include <sys/devfsext.h>
|
||||
#endif /*DEVFS */
|
||||
#include <sys/proc.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/dkstat.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <ufs/ffs/fs.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/device.h>
|
||||
#undef KERNEL /* XXX */
|
||||
#include <sys/disk.h>
|
||||
#ifdef REALLYKERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/dkbad.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdarg.h>
|
||||
#include <vm/vm.h>
|
||||
#ifdef USES_VM
|
||||
/* XXX Do we need this? */
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_prot.h>
|
||||
/* #include <vm/vm_page.h> */
|
||||
#include <sys/vmmeter.h>
|
||||
/* #include <machine/pmap.h> */
|
||||
#include <machine/cputypes.h>
|
||||
#endif /* USES_VM */
|
||||
#include <vinumvar.h>
|
||||
#include <vinumio.h>
|
||||
#include "vinumkw.h"
|
||||
#include "vinumext.h"
|
||||
|
||||
#undef Free /* defined in some funny net stuff */
|
||||
#ifdef REALLYKERNEL
|
||||
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
caddr_t MMalloc (int size, char *, int);
|
||||
void FFree (void *mem, char *, int);
|
||||
#else
|
||||
#define Malloc(x) malloc ((x)) /* just the size */
|
||||
#define Free(x) free ((x)) /* just the address */
|
||||
#endif
|
||||
|
132
sys/modules/vinum/vinumio.h
Normal file
132
sys/modules/vinum/vinumio.h
Normal file
@ -0,0 +1,132 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumio.h,v 1.10 1998/08/10 05:46:19 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define MAX_IOCTL_REPLY 256
|
||||
#define L 'F' /* ID letter of our ioctls */
|
||||
/* VINUM_CREATE returns a buffer of this kind */
|
||||
struct _ioctl_reply {
|
||||
int error;
|
||||
char msg[MAX_IOCTL_REPLY];
|
||||
};
|
||||
|
||||
/* ioctl requests */
|
||||
#define BUFSIZE 1024 /* size of buffer, including continuations */
|
||||
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
|
||||
#define VINUM_GETCONFIG _IOR(L, 65, struct _vinum_conf) /* get global config */
|
||||
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct drive) /* get drive config */
|
||||
#define VINUM_SDCONFIG _IOWR(L, 67, struct sd) /* get subdisk config */
|
||||
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct plex) /* get plex config */
|
||||
#define VINUM_VOLCONFIG _IOWR(L, 69, struct volume) /* get volume config */
|
||||
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct sd) /* get sd config for plex (plex, sdno) */
|
||||
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
|
||||
#define VINUM_SAVECONFIG _IOC(0, L, 72, 0) /* release locks, update, write config to disk */
|
||||
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
|
||||
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
|
||||
#ifdef DEBUG
|
||||
|
||||
struct debuginfo {
|
||||
int changeit;
|
||||
int param;
|
||||
};
|
||||
|
||||
#define VINUM_DEBUG _IOWR(L, 75, struct debuginfo) /* call the debugger from ioctl () */
|
||||
#endif
|
||||
|
||||
enum objecttype {
|
||||
drive_object,
|
||||
sd_object,
|
||||
plex_object,
|
||||
volume_object,
|
||||
invalid_object
|
||||
};
|
||||
|
||||
/* Start an object. Pass two integers:
|
||||
* msg [0] index in vinum_conf.<object>
|
||||
* msg [1] type of object (see below)
|
||||
*
|
||||
* Return ioctl_reply
|
||||
*/
|
||||
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
|
||||
|
||||
/* The state to set with VINUM_SETSTATE. Since
|
||||
* each object has a different set of states, we
|
||||
* need to translate later */
|
||||
enum objectstate {
|
||||
object_down,
|
||||
object_initializing,
|
||||
object_up
|
||||
};
|
||||
|
||||
/* This structure is used for modifying objects
|
||||
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
|
||||
* VINUM_DETACH, VINUM_REPLACE
|
||||
*/
|
||||
struct vinum_ioctl_msg {
|
||||
int index;
|
||||
enum objecttype type;
|
||||
enum objectstate state; /* state to set (VINUM_SETSTATE) */
|
||||
int force; /* do it even if it doesn't make sense */
|
||||
int recurse; /* recurse (VINUM_REMOVE) */
|
||||
int otherobject; /* superordinate object (attach),
|
||||
* replacement object (replace) */
|
||||
int rename; /* rename object (attach) */
|
||||
int64_t offset; /* offset of subdisk (for attach) */
|
||||
};
|
||||
|
||||
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
|
||||
#define VINUM_STARTCONFIG _IOC(0, L, 78, 0) /* start a configuration operation */
|
||||
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
|
||||
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
|
||||
#define VINUM_LABEL _IOC(IOC_IN | IOC_OUT, L, 81, MAX_IOCTL_REPLY) /* label a volume */
|
||||
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
|
||||
#define VINUM_REMOVE _IOC(IOC_IN | IOC_OUT, L, 83, MAX_IOCTL_REPLY) /* remove an object */
|
||||
#define VINUM_GETUNMAPPED _IOWR(L, 84, struct plexregion) /* get unmapped element (plex, re) */
|
||||
#define VINUM_GETDEFECTIVE _IOWR(L, 85, struct plexregion) /* get defective element (plex, re) */
|
||||
#define VINUM_RESETSTATS _IOC(IOC_IN | IOC_OUT, L, 86, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_ATTACH _IOC(IOC_IN | IOC_OUT, L, 87, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_DETACH _IOC(IOC_IN | IOC_OUT, L, 88, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
|
||||
struct vinum_rename_msg {
|
||||
int index;
|
||||
int recurse; /* rename subordinate objects too */
|
||||
enum objecttype type;
|
||||
char newname[MAXNAME]; /* new name to give to object */
|
||||
};
|
||||
|
||||
#define VINUM_RENAME _IOC(IOC_IN | IOC_OUT, L, 89, MAX_IOCTL_REPLY) /* reset object stats */
|
||||
#define VINUM_REPLACE _IOC(IOC_IN | IOC_OUT, L, 90, MAX_IOCTL_REPLY) /* reset object stats */
|
787
sys/modules/vinum/vinumioctl.c
Normal file
787
sys/modules/vinum/vinumioctl.c
Normal file
@ -0,0 +1,787 @@
|
||||
/* XXX replace all the checks on object validity with
|
||||
* calls to valid<object> */
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumioctl.c,v 1.1 1998/08/14 08:46:10 grog Exp grog $
|
||||
*/
|
||||
|
||||
#define STATIC /* nothing while we're testing XXX */
|
||||
|
||||
#define REALLYKERNEL
|
||||
#include "vinumhdr.h"
|
||||
#include "sys/sysproto.h" /* for sync(2) */
|
||||
#ifdef DEBUG
|
||||
#include <sys/reboot.h>
|
||||
#endif
|
||||
|
||||
jmp_buf command_fail; /* return on a failed command */
|
||||
|
||||
#if __FreeBSD__ >= 3
|
||||
/* Why aren't these declared anywhere? XXX */
|
||||
int setjmp(jmp_buf);
|
||||
void longjmp(jmp_buf, int);
|
||||
#endif
|
||||
|
||||
/* pointer to ioctl p parameter, to save passing it around */
|
||||
struct proc *myproc;
|
||||
|
||||
int vinum_inactive(void);
|
||||
void free_vinum(int);
|
||||
void attachobject(struct vinum_ioctl_msg *);
|
||||
void detachobject(struct vinum_ioctl_msg *);
|
||||
void renameobject(struct vinum_rename_msg *);
|
||||
void replaceobject(struct vinum_ioctl_msg *);
|
||||
|
||||
/* ioctl routine */
|
||||
int
|
||||
vinumioctl(dev_t dev,
|
||||
#if __FreeBSD__ >= 3
|
||||
u_long cmd,
|
||||
#else
|
||||
int cmd,
|
||||
#endif
|
||||
caddr_t data,
|
||||
int flag,
|
||||
struct proc *p)
|
||||
{
|
||||
BROKEN_GDB;
|
||||
unsigned int objno;
|
||||
int error = 0;
|
||||
struct volume *vol;
|
||||
unsigned int index; /* for transferring config info */
|
||||
unsigned int sdno; /* for transferring config info */
|
||||
int fe; /* free list element number */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
|
||||
|
||||
struct devcode *device = (struct devcode *) &dev;
|
||||
|
||||
/* First, decide what we're looking at */
|
||||
switch (device->type) {
|
||||
case VINUM_SUPERDEV_TYPE:
|
||||
myproc = p; /* save pointer to process */
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error) /* bombed out */
|
||||
return 0; /* the reply will contain meaningful info */
|
||||
switch (cmd) {
|
||||
/* XXX #ifdef DEBUG */
|
||||
case VINUM_DEBUG:
|
||||
boothowto |= RB_GDB; /* serial debug line */
|
||||
if (((struct debuginfo *) data)->changeit) /* change debug settings */
|
||||
debug = (((struct debuginfo *) data)->param);
|
||||
else
|
||||
Debugger("vinum debug");
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
/* XXX #endif */
|
||||
|
||||
case VINUM_CREATE: /* create a vinum object */
|
||||
error = lock_config(); /* get the config for us alone */
|
||||
if (error) /* can't do it, */
|
||||
return error; /* give up */
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error == 0) { /* first time, */
|
||||
parse_user_config((char *) data, &keyword_set); /* update the config */
|
||||
ioctl_reply->error = 0; /* no error if we make it here */
|
||||
} else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
|
||||
ioctl_reply->error = EINVAL; /* note that something's up */
|
||||
ioctl_reply->msg[0] = '\0'; /* no message? */
|
||||
}
|
||||
unlock_config();
|
||||
return 0; /* must be 0 to return the real error info */
|
||||
|
||||
case VINUM_GETCONFIG: /* get the configuration information */
|
||||
bcopy(&vinum_conf, data, sizeof(vinum_conf));
|
||||
return 0;
|
||||
|
||||
/* start configuring the subsystem */
|
||||
case VINUM_STARTCONFIG:
|
||||
return start_config(); /* just lock it */
|
||||
|
||||
/* Move the individual parts of the config to user space.
|
||||
|
||||
* Specify the index of the object in the first word of data,
|
||||
* and return the object there
|
||||
*/
|
||||
case VINUM_DRIVECONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.drives_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&DRIVE[index], data, sizeof(struct drive)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_SDCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.subdisks_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&SD[index], data, sizeof(struct sd)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.plexes_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&PLEX[index], data, sizeof(struct plex)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_VOLCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.volumes_used) /* can't do it */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&VOL[index], data, sizeof(struct volume)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXSDCONFIG:
|
||||
index = *(int *) data; /* get the plex index */
|
||||
sdno = ((int *) data)[1]; /* and the sd index */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
|
||||
return EFAULT; /* bang */
|
||||
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
|
||||
data,
|
||||
sizeof(struct sd));
|
||||
return 0;
|
||||
|
||||
case VINUM_SAVECONFIG:
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
finish_config(1); /* finish the configuration and update it */
|
||||
error = save_config(); /* save configuration to disk */
|
||||
} else
|
||||
error = EINVAL; /* queue up for this one, please */
|
||||
return error;
|
||||
|
||||
case VINUM_RELEASECONFIG: /* release the config */
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
finish_config(0); /* finish the configuration, don't change it */
|
||||
error = save_config(); /* save configuration to disk */
|
||||
} else
|
||||
error = EINVAL; /* release what config? */
|
||||
return error;
|
||||
|
||||
case VINUM_INIT:
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETCONFIG:
|
||||
if (vinum_inactive() && (vinum_conf.opencount < 2)) { /* if we're not active */
|
||||
/* Note the open count. We may be called from v, so we'll be open.
|
||||
* Keep the count so we don't underflow */
|
||||
int oc = vinum_conf.opencount;
|
||||
free_vinum(1); /* clean up everything */
|
||||
printf("vinum: CONFIGURATION OBLITERATED\n");
|
||||
vinum_conf.opencount = oc;
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
}
|
||||
return EBUSY;
|
||||
|
||||
case VINUM_SETSTATE:
|
||||
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
|
||||
return 0;
|
||||
|
||||
case VINUM_MEMINFO:
|
||||
vinum_meminfo(data);
|
||||
return 0;
|
||||
|
||||
case VINUM_MALLOCINFO:
|
||||
return vinum_mallocinfo(data);
|
||||
|
||||
case VINUM_LABEL: /* label a volume */
|
||||
ioctl_reply->error = write_volume_label(*(int *) data); /* index of the volume to label */
|
||||
ioctl_reply->msg[0] = '\0'; /* no message */
|
||||
return 0;
|
||||
|
||||
case VINUM_REMOVE:
|
||||
remove((struct vinum_ioctl_msg *) data); /* remove an object */
|
||||
return 0;
|
||||
|
||||
case VINUM_GETFREELIST: /* get a drive free list element */
|
||||
index = *(int *) data; /* get the drive index */
|
||||
fe = ((int *) data)[1]; /* and the free list element */
|
||||
if ((index >= (unsigned) vinum_conf.drives_used) /* plex doesn't exist */
|
||||
||(DRIVE[index].state == drive_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&DRIVE[index].freelist[fe],
|
||||
data,
|
||||
sizeof(struct drive_freelist));
|
||||
return 0;
|
||||
|
||||
case VINUM_GETDEFECTIVE: /* get a plex defective area element */
|
||||
index = *(int *) data; /* get the plex index */
|
||||
fe = ((int *) data)[1]; /* and the region number */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(PLEX[index].state == plex_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= PLEX[index].defective_regions) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&PLEX[index].defective_region[fe],
|
||||
data,
|
||||
sizeof(struct plexregion));
|
||||
return 0;
|
||||
|
||||
case VINUM_GETUNMAPPED: /* get a plex unmapped area element */
|
||||
index = *(int *) data; /* get the plex index */
|
||||
fe = ((int *) data)[1]; /* and the region number */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_used) /* plex doesn't exist */
|
||||
||(PLEX[index].state == plex_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= PLEX[index].unmapped_regions) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&PLEX[index].unmapped_region[fe],
|
||||
data,
|
||||
sizeof(struct plexregion));
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETSTATS:
|
||||
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
|
||||
return 0;
|
||||
|
||||
/* attach an object to a superordinate object */
|
||||
case VINUM_ATTACH:
|
||||
attachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* detach an object from a superordinate object */
|
||||
case VINUM_DETACH:
|
||||
detachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* rename an object */
|
||||
case VINUM_RENAME:
|
||||
renameobject((struct vinum_rename_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* replace an object */
|
||||
case VINUM_REPLACE:
|
||||
replaceobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
default:
|
||||
/* FALLTHROUGH */
|
||||
}
|
||||
|
||||
default:
|
||||
#if __FreeBSD__>=3
|
||||
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %lx\n",
|
||||
device->type,
|
||||
device->sd,
|
||||
device->plex,
|
||||
device->major,
|
||||
device->volume,
|
||||
cmd); /* XXX */
|
||||
|
||||
#else
|
||||
printf("vinumioctl: type %d, sd %d, plex %d, major %x, volume %d, command %x\n",
|
||||
device->type,
|
||||
device->sd,
|
||||
device->plex,
|
||||
device->major,
|
||||
device->volume,
|
||||
cmd); /* XXX */
|
||||
|
||||
#endif
|
||||
return EINVAL;
|
||||
|
||||
case VINUM_DRIVE_TYPE:
|
||||
case VINUM_PLEX_TYPE:
|
||||
return EAGAIN; /* try again next week */
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
objno = SDNO(dev);
|
||||
|
||||
switch (cmd) {
|
||||
case VINUM_INITSD: /* initialize subdisk */
|
||||
return initsd(objno);
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
break;
|
||||
|
||||
case VINUM_VOLUME_TYPE:
|
||||
objno = VOLNO(dev);
|
||||
|
||||
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_used) /* not a valid volume */
|
||||
return ENXIO;
|
||||
vol = &VOL[objno];
|
||||
if (vol->state != volume_up) /* not up, */
|
||||
return EIO; /* I/O error */
|
||||
|
||||
switch (cmd) {
|
||||
case DIOCGDINFO: /* get disk label */
|
||||
get_volume_label(vol, (struct disklabel *) data);
|
||||
break;
|
||||
|
||||
/* Care! DIOCGPART returns *pointers* to
|
||||
* the caller, so we need to store this crap as well.
|
||||
* And yes, we need it. */
|
||||
case DIOCGPART: /* get partition information */
|
||||
get_volume_label(vol, &vol->label);
|
||||
((struct partinfo *) data)->disklab = &vol->label;
|
||||
((struct partinfo *) data)->part = &vol->label.d_partitions[0];
|
||||
break;
|
||||
|
||||
/* We don't have this stuff on hardware,
|
||||
* so just pretend to do it so that
|
||||
* utilities don't get upset. */
|
||||
case DIOCWDINFO: /* write partition info */
|
||||
case DIOCSDINFO: /* set partition info */
|
||||
return 0; /* not a titty */
|
||||
|
||||
case DIOCWLABEL: /* set or reset label writeable */
|
||||
if ((flag & FWRITE) == 0) /* not writeable? */
|
||||
return EACCES; /* no, die */
|
||||
if (*(int *) data != 0) /* set it? */
|
||||
vol->flags |= VF_WLABEL; /* yes */
|
||||
else
|
||||
vol->flags &= ~VF_WLABEL; /* no, reset */
|
||||
break;
|
||||
|
||||
default:
|
||||
return ENOTTY; /* not my kind of ioctl */
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0; /* XXX */
|
||||
}
|
||||
|
||||
/* The following four functions check the supplied
|
||||
* object index and return a pointer to the object
|
||||
* if it exists. Otherwise they longjump out via
|
||||
* throw_rude_remark */
|
||||
struct drive *
|
||||
validdrive(int driveno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((driveno < vinum_conf.drives_used)
|
||||
&& (DRIVE[driveno].state != drive_unallocated))
|
||||
return &DRIVE[driveno];
|
||||
strcpy(reply->msg, "No such drive");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct sd *
|
||||
validsd(int sdno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((sdno < vinum_conf.subdisks_used)
|
||||
&& (SD[sdno].state != sd_unallocated))
|
||||
return &SD[sdno];
|
||||
strcpy(reply->msg, "No such subdisk");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct plex *
|
||||
validplex(int plexno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((plexno < vinum_conf.plexes_used)
|
||||
&& (PLEX[plexno].state != plex_unallocated))
|
||||
return &PLEX[plexno];
|
||||
strcpy(reply->msg, "No such plex");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct volume *
|
||||
validvol(int volno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((volno < vinum_conf.volumes_used)
|
||||
&& (VOL[volno].state != volume_unallocated))
|
||||
return &VOL[volno];
|
||||
strcpy(reply->msg, "No such volume");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* reset an object's stats */
|
||||
void
|
||||
resetstats(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object:
|
||||
if (msg->index < vinum_conf.drives_used) {
|
||||
struct drive *drive = &DRIVE[msg->index];
|
||||
if (drive->state != drive_unallocated) {
|
||||
drive->reads = 0; /* number of reads on this drive */
|
||||
drive->writes = 0; /* number of writes on this drive */
|
||||
drive->bytes_read = 0; /* number of bytes read */
|
||||
drive->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case sd_object:
|
||||
if (msg->index < vinum_conf.subdisks_used) {
|
||||
struct sd *sd = &SD[msg->index];
|
||||
if (sd->state != sd_unallocated) {
|
||||
sd->reads = 0; /* number of reads on this subdisk */
|
||||
sd->writes = 0; /* number of writes on this subdisk */
|
||||
sd->bytes_read = 0; /* number of bytes read */
|
||||
sd->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
if (msg->index < vinum_conf.plexes_used) {
|
||||
struct plex *plex = &PLEX[msg->index];
|
||||
if (plex->state != plex_unallocated) {
|
||||
plex->reads = 0;
|
||||
plex->writes = 0; /* number of writes on this plex */
|
||||
plex->bytes_read = 0; /* number of bytes read */
|
||||
plex->bytes_written = 0; /* number of bytes written */
|
||||
plex->multiblock = 0; /* requests that needed more than one block */
|
||||
plex->multistripe = 0; /* requests that needed more than one stripe */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
if (msg->index < vinum_conf.volumes_used) {
|
||||
struct volume *vol = &VOL[msg->index];
|
||||
if (vol->state != volume_unallocated) {
|
||||
vol->bytes_read = 0; /* number of bytes read */
|
||||
vol->bytes_written = 0; /* number of bytes written */
|
||||
vol->reads = 0; /* number of reads on this volume */
|
||||
vol->writes = 0; /* number of writes on this volume */
|
||||
vol->recovered_reads = 0; /* reads recovered from another plex */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case invalid_object: /* can't get this */
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* attach an object to a superior object */
|
||||
void
|
||||
attachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL) /* not a valid subdisk */
|
||||
return;
|
||||
plex = validplex(msg->otherobject, reply);
|
||||
if (plex) {
|
||||
if (sd->plexno >= 0) { /* already belong to a plex */
|
||||
reply->error = EBUSY; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd->plexoffset = msg->offset; /* this is where we want it */
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
|
||||
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
if (plex->organization != plex_concat) { /* can't attach to striped and raid-5 */
|
||||
reply->error = EINVAL; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
vol = validvol(msg->otherobject, reply); /* and volume information */
|
||||
if (vol) {
|
||||
if ((vol->plexes == MAXPLEX) /* we have too many already */
|
||||
||(plex->volno >= 0)) { /* or the plex has an owner */
|
||||
reply->error = EINVAL; /* no message, the user should check */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
set_plex_state(plex->plexno, plex_down, setstate_force); /* make sure it's down */
|
||||
give_plex_to_volume(msg->otherobject, msg->index); /* and give it to the volume */
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
if (plex->state == plex_reviving)
|
||||
reply->error = EAGAIN; /* need to revive it */
|
||||
else
|
||||
reply->error = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* detach an object from a superior object */
|
||||
void
|
||||
detachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
int sdno;
|
||||
int plexno;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL)
|
||||
return;
|
||||
if (sd->plexno < 0) { /* doesn't belong to a plex */
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Subdisk is not attached");
|
||||
return;
|
||||
} else { /* valid plex number */
|
||||
plex = &PLEX[sd->plexno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((plex->state == plex_up) /* and the plex is up */
|
||||
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
|
||||
reply->error = EBUSY; /* we need this sd */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd->plexno = -1; /* anonymous sd */
|
||||
if (plex->subdisks == 1) { /* this was the only subdisk */
|
||||
Free(plex->sdnos); /* free the subdisk array */
|
||||
plex->sdnos = NULL; /* and note the fact */
|
||||
plex->subdisks_allocated = 0; /* no subdisk space */
|
||||
} else {
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
|
||||
break;
|
||||
}
|
||||
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
|
||||
bcopy(&plex->sdnos[sdno + 1],
|
||||
&plex->sdnos[sdno],
|
||||
(plex->subdisks - 1 - sdno) * sizeof(int));
|
||||
}
|
||||
plex->subdisks--;
|
||||
rebuild_plex_unmappedlist(plex); /* rebuild the unmapped list */
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* this subdisk is named after the plex */
|
||||
bcopy(sd->name,
|
||||
&sd->name[3],
|
||||
min(strlen(sd->name), MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
update_plex_config(plex->plexno, 0);
|
||||
if ((plex->organization == plex_striped) /* we've just mutilated our plex, */
|
||||
||(plex->organization == plex_striped)) /* the data no longer matches */
|
||||
set_plex_state(plex->plexno,
|
||||
plex_down,
|
||||
setstate_force | setstate_configuring);
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
if (plex->volno >= 0) {
|
||||
int volno = plex->volno;
|
||||
|
||||
vol = &VOL[volno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((vol->state == volume_up) /* and the volume is up */
|
||||
&&(vol->plexes == 1))) { /* and this is the last plex */
|
||||
/* XXX As elsewhere, check whether we will lose
|
||||
* mapping by removing this plex */
|
||||
reply->error = EBUSY; /* we need this plex */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex->volno = -1; /* anonymous plex */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
if (vol->plex[plexno] == msg->index) /* found our plex */
|
||||
break;
|
||||
}
|
||||
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
|
||||
bcopy(&vol[plexno + 1], &vol[plexno], (vol->plexes - 1 - plexno) * sizeof(int));
|
||||
vol->plexes--;
|
||||
if (!bcmp(vol->name, plex->name, strlen(vol->name))) { /* this plex is named after the volume */
|
||||
/* First, check if the subdisks are the same */
|
||||
if (msg->recurse) {
|
||||
int sdno;
|
||||
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]];
|
||||
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name))) { /* subdisk is named after the plex */
|
||||
bcopy(sd->name, &sd->name[3], min(strlen(sd->name), MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
bcopy(plex->name, &plex->name[3], min(strlen(plex->name), MAXPLEXNAME - 3));
|
||||
bcopy("ex-", plex->name, 3);
|
||||
plex->name[MAXPLEXNAME - 1] = '\0';
|
||||
}
|
||||
update_plex_config(plex->plexno, 0);
|
||||
update_volume_config(volno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
} else {
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Plex is not attached");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
renameobject(struct vinum_rename_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
drive = validdrive(msg->index, reply);
|
||||
if (drive) {
|
||||
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case sd_object: /* you can't attach a subdisk to anything */
|
||||
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd) {
|
||||
bcopy(msg->newname, sd->name, MAXSDNAME);
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object: /* you can't attach a plex to anything */
|
||||
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex = validplex(msg->index, reply);
|
||||
if (plex) {
|
||||
bcopy(msg->newname, plex->name, MAXPLEXNAME);
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case volume_object: /* you can't attach a volume to anything */
|
||||
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
vol = validvol(msg->index, reply);
|
||||
if (vol) {
|
||||
bcopy(msg->newname, vol->name, MAXVOLNAME);
|
||||
update_volume_config(msg->index, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case invalid_object:
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace one object with another */
|
||||
void
|
||||
replaceobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
reply->error = ENODEV; /* until I know how to do this */
|
||||
strcpy(reply->msg, "replace not implemented yet");
|
||||
/* save_config (); */
|
||||
}
|
120
sys/modules/vinum/vinumkw.h
Normal file
120
sys/modules/vinum/vinumkw.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumkw.h,v 1.7 1998/08/07 02:35:51 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* Command keywords that vinum knows. These include both user-level
|
||||
* and kernel-level stuff */
|
||||
|
||||
/* Our complete vocabulary. The names of the commands are
|
||||
* the same as the identifier without the kw_ at the beginning
|
||||
* (i.e. kw_create defines the "create" keyword). Preprocessor
|
||||
* magic in parser.c does the rest. */
|
||||
enum keyword {
|
||||
kw_create,
|
||||
kw_modify,
|
||||
kw_list,
|
||||
kw_l = kw_list,
|
||||
kw_ld, /* list drive */
|
||||
kw_ls, /* list subdisk */
|
||||
kw_lp, /* list plex */
|
||||
kw_lv, /* list volume */
|
||||
kw_set,
|
||||
kw_rm,
|
||||
kw_start,
|
||||
kw_stop,
|
||||
kw_drive,
|
||||
kw_sd,
|
||||
kw_subdisk = kw_sd,
|
||||
kw_plex,
|
||||
kw_volume,
|
||||
kw_vol = kw_volume,
|
||||
kw_read,
|
||||
kw_readpol,
|
||||
kw_org,
|
||||
kw_name,
|
||||
kw_concat,
|
||||
kw_striped,
|
||||
kw_raid5,
|
||||
kw_driveoffset,
|
||||
kw_plexoffset,
|
||||
kw_len,
|
||||
kw_length = kw_len,
|
||||
kw_state,
|
||||
kw_setupstate,
|
||||
kw_d, /* flag names */
|
||||
kw_f,
|
||||
kw_r,
|
||||
kw_s,
|
||||
kw_v,
|
||||
kw_round, /* round robin */
|
||||
kw_prefer, /* prefer plex */
|
||||
kw_device,
|
||||
kw_init,
|
||||
kw_label,
|
||||
kw_resetconfig,
|
||||
kw_writethrough,
|
||||
kw_writeback,
|
||||
kw_raw,
|
||||
kw_resetstats,
|
||||
kw_attach,
|
||||
kw_detach,
|
||||
kw_rename,
|
||||
kw_printconfig,
|
||||
kw_replace,
|
||||
kw_detached,
|
||||
#ifdef DEBUG
|
||||
kw_debug, /* go into debugger */
|
||||
kw_info,
|
||||
#endif
|
||||
kw_invalid_keyword = -1
|
||||
};
|
||||
|
||||
struct _keywords {
|
||||
char *name;
|
||||
enum keyword keyword;
|
||||
};
|
||||
|
||||
struct keywordset {
|
||||
int size;
|
||||
struct _keywords *k;
|
||||
};
|
||||
|
||||
extern struct _keywords keywords[];
|
||||
extern struct _keywords flag_keywords[];
|
||||
|
||||
extern struct keywordset keyword_set;
|
||||
extern struct keywordset flag_set;
|
213
sys/modules/vinum/vinumstate.h
Normal file
213
sys/modules/vinum/vinumstate.h
Normal file
@ -0,0 +1,213 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumstate.h,v 1.11 1998/08/04 06:22:49 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* This file gets read by makestatetext to create text files
|
||||
* with the names of the states, so don't change the file
|
||||
* format */
|
||||
|
||||
enum volumestate {
|
||||
volume_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
volume_uninit,
|
||||
/* mentioned elsewhere but not defined */
|
||||
|
||||
volume_down,
|
||||
|
||||
/* The volume is up and functional, but not all plexes may be available */
|
||||
volume_up,
|
||||
volume_laststate = volume_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum plexstate {
|
||||
/* An empty entry, not a plex at all. */
|
||||
plex_unallocated,
|
||||
|
||||
/* The plex has been allocated, but there configuration
|
||||
* is not complete */
|
||||
plex_init,
|
||||
|
||||
/* A plex which has gone completely down because of
|
||||
* I/O errors. */
|
||||
plex_faulty,
|
||||
|
||||
/* A plex which has been taken down by the
|
||||
* administrator. */
|
||||
plex_down,
|
||||
|
||||
/* A plex which is currently being brought up after
|
||||
* being not up. This involves copying data from
|
||||
* another plex */
|
||||
plex_reviving,
|
||||
|
||||
/* A plex which is being initialized */
|
||||
plex_initializing,
|
||||
|
||||
/* *** The remaining states represent plexes which are
|
||||
* at least partially up. Keep these separate so that
|
||||
* they can be checked more easily. */
|
||||
|
||||
/* A plex entry which is at least partially up. Not
|
||||
* all subdisks are available, and an inconsistency
|
||||
* has occurred. If no other plex is uncorrupted,
|
||||
* the volume is no longer consistent. */
|
||||
plex_corrupt,
|
||||
|
||||
plex_firstup = plex_corrupt, /* first "up" state */
|
||||
|
||||
/* A plex entry which is at least partially up. Not
|
||||
* all subdisks are available, but so far no
|
||||
* inconsistency has occurred (this will change with
|
||||
* the first write to the address space occupied by
|
||||
* a defective subdisk). A RAID 5 plex with one subdisk
|
||||
* down will remain degraded even after a write */
|
||||
plex_degraded,
|
||||
|
||||
/* A plex which is really up, but which has a reborn
|
||||
* subdisk which we don't completely trust, and
|
||||
* which we don't want to read if we can avoid it */
|
||||
plex_flaky,
|
||||
|
||||
/* A plex entry which is completely up. All subdisks
|
||||
* are up. */
|
||||
plex_up,
|
||||
|
||||
plex_laststate = plex_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
/* subdisk states */
|
||||
enum sdstate {
|
||||
/* An empty entry, not a subdisk at all. */
|
||||
sd_unallocated,
|
||||
|
||||
/* A subdisk entry which has not been created
|
||||
* completely. Some fields may be empty.
|
||||
*/
|
||||
sd_uninit,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, but the disk hasn't
|
||||
* been updated.
|
||||
*/
|
||||
sd_init,
|
||||
|
||||
/* A subdisk entry which has been created completely and
|
||||
* which is currently being initialized */
|
||||
sd_initializing,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, and the disk has been
|
||||
* updated, but there is no data on the disk.
|
||||
*/
|
||||
sd_empty,
|
||||
|
||||
/* *** The following states represent invalid data */
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down, and as a result updates have been
|
||||
* missed.
|
||||
*/
|
||||
sd_obsolete,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down, updates have been lost, and then
|
||||
* the drive came up again.
|
||||
*/
|
||||
sd_stale,
|
||||
|
||||
/* *** The following states represent valid, inaccessible data */
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down. No attempt has been made to write
|
||||
* to the subdisk since the crash.
|
||||
*/
|
||||
sd_crashed,
|
||||
|
||||
/* A subdisk entry which was up, which contained
|
||||
* valid data, and which was taken down by the
|
||||
* administrator. The data is valid. */
|
||||
sd_down,
|
||||
|
||||
/* *** The following states represent accessible subdisks
|
||||
* with valid data */
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down and up again. No updates were lost,
|
||||
* but it is possible that the subdisk has been
|
||||
* damaged. We won't read from this subdisk if we
|
||||
* have a choice. If this is the only subdisk which
|
||||
* covers this address space in the plex, we set its
|
||||
* state to sd_up under these circumstances, so this
|
||||
* status implies that there is another subdisk to
|
||||
* fulfil the request.
|
||||
*/
|
||||
sd_reborn,
|
||||
|
||||
/* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data is valid.
|
||||
*/
|
||||
sd_up,
|
||||
|
||||
sd_laststate = sd_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum drivestate {
|
||||
drive_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
drive_uninit,
|
||||
/* just mentioned in some other config entry */
|
||||
|
||||
drive_down,
|
||||
/* not accessible */
|
||||
|
||||
drive_coming_up,
|
||||
/* in the process of being brought up */
|
||||
|
||||
drive_up,
|
||||
/* up and running */
|
||||
|
||||
drive_laststate = drive_up /* last value, for table dimensions */
|
||||
};
|
510
sys/modules/vinum/vinumvar.h
Normal file
510
sys/modules/vinum/vinumvar.h
Normal file
@ -0,0 +1,510 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumvar.h,v 1.15 1998/08/14 06:36:41 grog Exp grog $
|
||||
*/
|
||||
|
||||
/* XXX gdb can't find our global pointers, so use this kludge to
|
||||
* point to them locally. Remove after testing */
|
||||
#define BROKEN_GDB struct _vinum_conf *VC = &vinum_conf
|
||||
|
||||
#include <sys/time.h>
|
||||
#include "vinumstate.h"
|
||||
/* Some configuration maxima. They're an enum because
|
||||
* we can't define global constants. Sorry about that.
|
||||
*
|
||||
* These aren't as bad as they look: most of them
|
||||
* are soft limits. Only the MAXCONFIG parameter is set in stone
|
||||
*/
|
||||
|
||||
enum constants {
|
||||
VINUM_HEADER = 512, /* size of header on disk */
|
||||
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
|
||||
/* XXX Do we still need this? */
|
||||
MINVINUMSLICE = 1048576, /* minimum size of a slice */
|
||||
|
||||
CDEV_MAJOR = 91, /* major number for character device */
|
||||
BDEV_MAJOR = 25, /* and block device */
|
||||
|
||||
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
|
||||
|
||||
/* type field in minor number */
|
||||
VINUM_VOLUME_TYPE = 0,
|
||||
VINUM_PLEX_TYPE = 1,
|
||||
VINUM_SD_TYPE = 2,
|
||||
VINUM_DRIVE_TYPE = 3,
|
||||
VINUM_SUPERDEV_TYPE = 4, /* super device. */
|
||||
|
||||
/* Shifts for the individual fields in the device */
|
||||
VINUM_TYPE_SHIFT = 28,
|
||||
VINUM_VOL_SHIFT = 0,
|
||||
VINUM_PLEX_SHIFT = 16,
|
||||
VINUM_SD_SHIFT = 20,
|
||||
VINUM_VOL_WIDTH = 8,
|
||||
VINUM_PLEX_WIDTH = 3,
|
||||
VINUM_SD_WIDTH = 8,
|
||||
MAJORDEV_SHIFT = 8,
|
||||
|
||||
|
||||
/* Create a block device number */
|
||||
#define VINUMBDEV(v,p,s,t) ((BDEV_MAJOR << MAJORDEV_SHIFT) \
|
||||
| (v << VINUM_VOL_SHIFT) \
|
||||
| (p << VINUM_PLEX_SHIFT) \
|
||||
| (s << VINUM_SD_SHIFT) \
|
||||
| (t << VINUM_TYPE_SHIFT) )
|
||||
|
||||
/* And a character device number */
|
||||
#define VINUMCDEV(v,p,s,t) ((CDEV_MAJOR << MAJORDEV_SHIFT) \
|
||||
| (v << VINUM_VOL_SHIFT) \
|
||||
| (p << VINUM_PLEX_SHIFT) \
|
||||
| (s << VINUM_SD_SHIFT) \
|
||||
| (t << VINUM_TYPE_SHIFT) )
|
||||
|
||||
/* extract device type */
|
||||
#define DEVTYPE(x) ((x >> VINUM_TYPE_SHIFT) & 7)
|
||||
|
||||
/* extract volume number */
|
||||
#define VOLNO(x) (x & ((1 << VINUM_VOL_WIDTH) - 1))
|
||||
|
||||
/* extract plex number */
|
||||
#define PLEXNO(x) (VOL [VOLNO (x)].plex [(x >> VINUM_PLEX_SHIFT) & ((1 << VINUM_PLEX_WIDTH) - 1)])
|
||||
|
||||
/* extract subdisk number */
|
||||
#define SDNO(x) (PLEX [PLEXNO (x)].sdnos [(x >> VINUM_SD_SHIFT) & ((1 << VINUM_SD_WIDTH) - 1)])
|
||||
|
||||
/* extract drive number */
|
||||
#define DRIVENO(x) (SD [SDNO (x)].driveno)
|
||||
|
||||
VINUM_SUPERDEV = VINUMBDEV(0, 0, 0, VINUM_SUPERDEV_TYPE), /* superdevice number */
|
||||
|
||||
/* the number of object entries to cater for initially, and also the
|
||||
* value by which they are incremented. It doesn't take long
|
||||
* to extend them, so theoretically we could start with 1 of each, but
|
||||
* it's untidy to allocate such small areas. These values are
|
||||
* probably too small.
|
||||
*/
|
||||
|
||||
INITIAL_DRIVES = 4,
|
||||
INITIAL_VOLUMES = 4,
|
||||
INITIAL_PLEXES = 8,
|
||||
INITIAL_SUBDISKS = 16,
|
||||
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
|
||||
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
|
||||
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
|
||||
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
|
||||
INITIAL_LOCKS = 8, /* number of locks to allocate to a volume */
|
||||
DEFAULT_REVIVE_BLOCKSIZE = 32768, /* size of block to transfer in one op */
|
||||
};
|
||||
|
||||
/* device numbers */
|
||||
|
||||
/*
|
||||
* 31 30 28 27 20 19 18 16 15 8 7 0
|
||||
* |-----------------------------------------------------------------------------------------------|
|
||||
* |X | Type | Subdisk number | X| Plex | Major number | volume number |
|
||||
* |-----------------------------------------------------------------------------------------------|
|
||||
*
|
||||
* 0x2 03 1 19 06
|
||||
*/
|
||||
struct devcode {
|
||||
/* CARE. These fields assume a big-endian word. On a
|
||||
* little-endian system, they're the wrong way around */
|
||||
unsigned volume:8; /* up to 256 volumes */
|
||||
unsigned major:8; /* this is where the major number fits */
|
||||
unsigned plex:3; /* up to 8 plexes per volume */
|
||||
unsigned unused:1; /* up for grabs */
|
||||
unsigned sd:8; /* up to 256 subdisks per plex */
|
||||
unsigned type:3; /* type of object */
|
||||
/* type field
|
||||
VINUM_VOLUME = 0,
|
||||
VINUM_PLEX = 1,
|
||||
VINUM_SUBDISK = 2,
|
||||
VINUM_DRIVE = 3,
|
||||
VINUM_SUPERDEV = 4, */
|
||||
unsigned signbit:1; /* to make 32 bits */
|
||||
};
|
||||
|
||||
#define VINUM_DIR "/dev/vinum"
|
||||
#define VINUM_RDIR "/dev/rvinum"
|
||||
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control"
|
||||
#define MAXDRIVENAME 32 /* maximum length of a device name */
|
||||
#define MAXSDNAME 64 /* maximum length of a subdisk name */
|
||||
#define MAXPLEXNAME 64 /* maximum length of a plex name */
|
||||
#define MAXVOLNAME 64 /* maximum length of a volume name */
|
||||
#define MAXNAME 64 /* maximum length of any name */
|
||||
#define MAXVOLPLEX 8 /* maximum number of plexes in a volume */
|
||||
|
||||
/* Flags for all objects. Most of them only apply to
|
||||
* specific objects, but we have space for all in any
|
||||
* 32 bit flags word. */
|
||||
enum objflags {
|
||||
VF_LOCKED = 1, /* somebody has locked access to this object */
|
||||
VF_LOCKING = 2, /* we want access to this object */
|
||||
VF_WRITETHROUGH = 8, /* volume: write through */
|
||||
VF_INITED = 0x10, /* unit has been initialized */
|
||||
VF_WLABEL = 0x20, /* label area is writable */
|
||||
VF_LABELLING = 0x40, /* unit is currently being labelled */
|
||||
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
|
||||
VF_RAW = 0x100, /* raw volume (no file system) */
|
||||
VF_LOADED = 0x200, /* module is loaded */
|
||||
VF_CONFIGURING = 0x400, /* somebody is changing the config */
|
||||
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
|
||||
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
|
||||
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
|
||||
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
|
||||
VF_KERNELOP = 0x8000, /* we're performing ops from kernel space */
|
||||
};
|
||||
|
||||
/* Global configuration information for the vinum subsystem */
|
||||
struct _vinum_conf {
|
||||
/* Pointers to vinum structures */
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *volume;
|
||||
|
||||
/* the number allocated */
|
||||
int drives_allocated;
|
||||
int subdisks_allocated;
|
||||
int plexes_allocated;
|
||||
int volumes_allocated;
|
||||
|
||||
/* and the number currently in use */
|
||||
int drives_used;
|
||||
int subdisks_used;
|
||||
int plexes_used;
|
||||
int volumes_used;
|
||||
|
||||
int flags;
|
||||
int opencount; /* number of times we've been opened */
|
||||
#if DEBUG
|
||||
int lastrq;
|
||||
struct buf *lastbuf;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Use these defines to simplify code */
|
||||
#define DRIVE vinum_conf.drive
|
||||
#define SD vinum_conf.sd
|
||||
#define PLEX vinum_conf.plex
|
||||
#define VOL vinum_conf.volume
|
||||
#define VFLAGS vinum_conf.flags
|
||||
|
||||
/* Slice header
|
||||
|
||||
* Vinum drives start with this structure:
|
||||
*
|
||||
* Sector
|
||||
* |--------------------------------------|
|
||||
* | PDP-11 memorial boot block | 0
|
||||
* |--------------------------------------|
|
||||
* | Disk label, maybe | 1
|
||||
* |--------------------------------------|
|
||||
* | Slice definition (vinum_hdr) | 2
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, first copy | 3
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, second copy | 3 + size of config
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
*/
|
||||
|
||||
/* Sizes and offsets of our information */
|
||||
enum {
|
||||
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
|
||||
VINUMHEADERLEN = 512, /* size of vinum label */
|
||||
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
|
||||
MAXCONFIG = 65536, /* and size of config copy */
|
||||
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
|
||||
};
|
||||
|
||||
/* hostname is 256 bytes long, but we don't need to shlep
|
||||
* multiple copies in vinum. We use the host name just
|
||||
* to identify this system, and 32 bytes should be ample
|
||||
* for that purpose */
|
||||
#define VINUMHOSTNAMELEN 32
|
||||
|
||||
struct vinum_label {
|
||||
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
|
||||
char name[MAXDRIVENAME]; /* our name of the drive */
|
||||
struct timeval date_of_birth; /* the time it was created */
|
||||
struct timeval last_update; /* and the time of last update */
|
||||
off_t drive_size; /* total size in bytes of the drive.
|
||||
* This value includes the headers */
|
||||
};
|
||||
|
||||
struct vinum_hdr {
|
||||
long long magic; /* we're long on magic numbers */
|
||||
/* XXX Get these right for big-endian */
|
||||
#define VINUM_MAGIC 22322600044678729LL /* should be this */
|
||||
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
|
||||
int config_length; /* size in bytes of each copy of the
|
||||
* configuration info.
|
||||
* This must be a multiple of the sector size. */
|
||||
|
||||
struct vinum_label label; /* unique label */
|
||||
};
|
||||
|
||||
/* Information returned from read_drive_label */
|
||||
enum drive_label_info {
|
||||
DL_CANT_OPEN, /* invalid partition */
|
||||
DL_NOT_OURS, /* valid partition, but no vinum label */
|
||||
DL_DELETED_LABEL, /* valid partition, deleted label found */
|
||||
DL_WRONG_DRIVE, /* drive name doesn't match */
|
||||
DL_OURS /* valid partition and label found */
|
||||
};
|
||||
|
||||
/*** Drive definitions ***/
|
||||
/* A drive corresponds to a disk slice. We use a different term to show
|
||||
* the difference in usage: it doesn't have to be a slice, and could
|
||||
* theroretically be a complete, unpartitioned disk */
|
||||
|
||||
struct drive {
|
||||
enum drivestate state; /* current state */
|
||||
int subdisks_allocated; /* number of entries in sd */
|
||||
int subdisks_used; /* and the number used */
|
||||
int blocksize; /* size of fs blocks */
|
||||
u_int64_t sectors_available; /* number of sectors still available */
|
||||
int secsperblock;
|
||||
int lasterror; /* last error on drive */
|
||||
int driveno; /* index of drive in vinum_conf */
|
||||
int opencount; /* number of up subdisks */
|
||||
u_int64_t reads; /* number of reads on this drive */
|
||||
u_int64_t writes; /* number of writes on this drive */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
dev_t dev; /* and device number */
|
||||
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
|
||||
struct vnode *vp; /* vnode pointer */
|
||||
struct proc *p;
|
||||
struct vinum_label label; /* and the label information */
|
||||
struct partinfo partinfo; /* partition information */
|
||||
int freelist_size; /* number of entries alloced in free list */
|
||||
int freelist_entries; /* number of entries used in free list */
|
||||
struct drive_freelist { /* sorted list of free space on drive */
|
||||
u_int64_t offset;
|
||||
long sectors;
|
||||
} *freelist;
|
||||
};
|
||||
|
||||
/*** Subdisk definitions ***/
|
||||
|
||||
struct sd {
|
||||
enum sdstate state; /* state */
|
||||
/* offsets in blocks */
|
||||
int64_t driveoffset; /* offset on drive */
|
||||
int64_t plexoffset; /* offset in plex */
|
||||
u_int64_t sectors; /* and length in sectors */
|
||||
int plexno; /* index of plex, if it belongs */
|
||||
int driveno; /* index of the drive on which it is located */
|
||||
int sdno; /* our index in vinum_conf */
|
||||
int pid; /* pid of process which opened us */
|
||||
u_int64_t reads; /* number of reads on this subdisk */
|
||||
u_int64_t writes; /* number of writes on this subdisk */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
char name[MAXSDNAME]; /* name of subdisk */
|
||||
};
|
||||
|
||||
/*** Plex definitions ***/
|
||||
|
||||
/* kinds of plex organization */
|
||||
enum plexorg {
|
||||
plex_disorg, /* disorganized */
|
||||
plex_concat, /* concatenated plex */
|
||||
plex_striped, /* striped plex */
|
||||
plex_raid5 /* RAID5 plex */
|
||||
};
|
||||
|
||||
/* Region in plex (either defective or unmapped) */
|
||||
struct plexregion {
|
||||
u_int64_t offset; /* start of region */
|
||||
u_int64_t length; /* length */
|
||||
};
|
||||
|
||||
struct plex {
|
||||
enum plexorg organization; /* Plex organization */
|
||||
enum plexstate state; /* and current state */
|
||||
u_int64_t length; /* total length of plex (max offset) */
|
||||
int flags;
|
||||
int stripesize; /* size of stripe or raid band, in sectors */
|
||||
int subdisks; /* number of associated subdisks */
|
||||
int subdisks_allocated; /* number of subdisks allocated space for */
|
||||
int *sdnos; /* list of component subdisks */
|
||||
int plexno; /* index of plex in vinum_conf */
|
||||
int volno; /* index of volume */
|
||||
int volplexno; /* number of plex in volume */
|
||||
int pid; /* pid of process which opened us */
|
||||
/* Lock information */
|
||||
int locks; /* number of locks used */
|
||||
int alloclocks; /* number of locks allocated */
|
||||
struct rangelock *lock; /* ranges of locked addresses */
|
||||
/* Statistics */
|
||||
u_int64_t reads; /* number of reads on this plex */
|
||||
u_int64_t writes; /* number of writes on this plex */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t multiblock; /* requests that needed more than one block */
|
||||
u_int64_t multistripe; /* requests that needed more than one stripe */
|
||||
/* revive parameters */
|
||||
u_int64_t revived; /* block number of current revive request */
|
||||
int revive_blocksize; /* revive block size (bytes) */
|
||||
int revive_interval; /* and time to wait between transfers */
|
||||
struct request *waitlist; /* list of requests waiting on revive op */
|
||||
/* geometry control */
|
||||
int defective_regions; /* number of regions which are defective */
|
||||
int defective_region_count; /* number of entries in defective_region */
|
||||
struct plexregion *defective_region; /* list of offset/length pairs: defective sds */
|
||||
int unmapped_regions; /* number of regions which are missing */
|
||||
int unmapped_region_count; /* number of entries in unmapped_region */
|
||||
struct plexregion *unmapped_region; /* list of offset/length pairs: missing sds */
|
||||
char name[MAXPLEXNAME]; /* name of plex */
|
||||
};
|
||||
|
||||
/*** Volume definitions ***/
|
||||
|
||||
#define MAXPLEX 8 /* maximum number of plexes */
|
||||
|
||||
|
||||
struct volume {
|
||||
enum volumestate state; /* current state */
|
||||
int plexes; /* number of plexes */
|
||||
int preferred_plex; /* plex to read from, -1 for round-robin */
|
||||
int last_plex_read; /* index of plex used for last read,
|
||||
* for round-robin */
|
||||
dev_t devno; /* device number */
|
||||
int flags; /* status and configuration flags */
|
||||
int opencount; /* number of opens (all the same process) */
|
||||
int openflags; /* flags supplied to last open(2) */
|
||||
u_int64_t size; /* size of volume */
|
||||
int disk; /* disk index */
|
||||
int blocksize; /* logical block size */
|
||||
int active; /* number of outstanding requests active */
|
||||
int subops; /* and the number of suboperations */
|
||||
pid_t pid; /* pid of locker */
|
||||
/* Statistics */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t reads; /* number of reads on this volume */
|
||||
u_int64_t writes; /* number of writes on this volume */
|
||||
u_int64_t recovered_reads; /* reads recovered from another plex */
|
||||
/* Unlike subdisks in the plex, space for the plex pointers is static */
|
||||
int plex[MAXPLEX]; /* index of plexes */
|
||||
char name[MAXVOLNAME]; /* name of volume */
|
||||
struct disklabel label; /* for DIOCGPART */
|
||||
};
|
||||
|
||||
/* Table expansion. Expand table, which contains oldcount
|
||||
* entries of type element, by increment entries, and change
|
||||
* oldcount accordingly */
|
||||
#define EXPAND(table, element, oldcount, increment) \
|
||||
{ \
|
||||
expand_table ((void **) &table, \
|
||||
oldcount * sizeof (element), \
|
||||
(oldcount + increment) * sizeof (element) ); \
|
||||
oldcount += increment; \
|
||||
}
|
||||
|
||||
/* Information on vinum's memory usage */
|
||||
struct meminfo {
|
||||
int mallocs; /* number of malloced blocks */
|
||||
int total_malloced; /* total amount malloced */
|
||||
int highwater; /* maximum number of mallocs */
|
||||
struct mc *malloced; /* pointer to kernel table */
|
||||
};
|
||||
|
||||
struct mc {
|
||||
int seq;
|
||||
int size;
|
||||
short line;
|
||||
short flags;
|
||||
#define ALLOC_KVA 1 /* allocated via kva calls */
|
||||
int *databuf; /* really vm_object_t */
|
||||
caddr_t address;
|
||||
char file[16];
|
||||
};
|
||||
|
||||
/* These enums are used by the state transition
|
||||
* routines. They're in bit map format:
|
||||
*
|
||||
* Bit 0: Other plexes in the volume are down
|
||||
* Bit 1: Other plexes in the volume are up
|
||||
* Bit 2: The current plex is up
|
||||
* Maybe they should be local to
|
||||
* state.c */
|
||||
enum volplexstate {
|
||||
volplex_onlyusdown = 0, /* we're the only plex, and we're down */
|
||||
volplex_alldown, /* 1: another plex is down, and so are we */
|
||||
volplex_otherup, /* 2: another plex is up */
|
||||
volplex_otherupdown, /* other plexes are up and down */
|
||||
volplex_onlyus, /* 4: we're up and alone */
|
||||
volplex_onlyusup, /* only we are up, others are down */
|
||||
volplex_allup, /* all plexes are up */
|
||||
volplex_someup /* some plexes are up, including us */
|
||||
};
|
||||
|
||||
/* state map for plex */
|
||||
enum sdstates {
|
||||
sd_emptystate = 1,
|
||||
sd_downstate = 2, /* found an SD which is down */
|
||||
sd_crashedstate = 4, /* found an SD which is crashed */
|
||||
sd_obsoletestate = 8, /* found an SD which is obsolete */
|
||||
sd_stalestate = 16, /* found an SD which is stale */
|
||||
sd_rebornstate = 32, /* found an SD which is reborn */
|
||||
sd_upstate = 64, /* found an SD which is up */
|
||||
sd_initstate = 128, /* found an SD which is init */
|
||||
sd_otherstate = 256 /* found an SD in some other state */
|
||||
};
|
||||
|
||||
/* This is really just a parameter to pass to
|
||||
* set_<foo>_state, but since it needs to be known
|
||||
* in the external definitions, we need to define
|
||||
* it here */
|
||||
enum setstateflags {
|
||||
setstate_none = 0, /* no flags */
|
||||
setstate_force = 1, /* force the state change */
|
||||
setstate_configuring = 2, /* we're currently configuring, don't save */
|
||||
setstate_recursing = 4, /* we're called from another setstate function */
|
||||
setstate_norecurse = 8 /* don't call other setstate functions */
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
/* Debugging stuff */
|
||||
#define DEBUG_ADDRESSES 1
|
||||
#define DEBUG_NUMOUTPUT 2
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user