602 lines
14 KiB
C
602 lines
14 KiB
C
/*
|
|
* ----------------------------------------------------------------------------
|
|
* "THE BEER-WARE LICENSE" (Revision 42):
|
|
* <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
|
|
* can do whatever you want with this stuff. If we meet some day, and you think
|
|
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
|
|
* ----------------------------------------------------------------------------
|
|
*
|
|
* $FreeBSD$
|
|
*
|
|
*/
|
|
|
|
#include "opt_geom.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/stdint.h>
|
|
#include <sys/bio.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/disk.h>
|
|
#include <sys/disklabel.h>
|
|
#ifdef NO_GEOM
|
|
#include <sys/diskslice.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/sysctl.h>
|
|
#include <machine/md_var.h>
|
|
#include <sys/ctype.h>
|
|
|
|
static MALLOC_DEFINE(M_DISK, "disk", "disk data");
|
|
|
|
static d_strategy_t diskstrategy;
|
|
static d_open_t diskopen;
|
|
static d_close_t diskclose;
|
|
static d_ioctl_t diskioctl;
|
|
static d_psize_t diskpsize;
|
|
|
|
static LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist);
|
|
|
|
void disk_dev_synth(dev_t dev);
|
|
|
|
void
|
|
disk_dev_synth(dev_t dev)
|
|
{
|
|
struct disk *dp;
|
|
int u, s, p;
|
|
dev_t pdev;
|
|
|
|
if (dksparebits(dev))
|
|
return;
|
|
LIST_FOREACH(dp, &disklist, d_list) {
|
|
if (major(dev) != dp->d_devsw->d_maj)
|
|
continue;
|
|
u = dkunit(dev);
|
|
p = RAW_PART;
|
|
s = WHOLE_DISK_SLICE;
|
|
pdev = makedev(dp->d_devsw->d_maj, dkmakeminor(u, s, p));
|
|
if (pdev->si_devsw == NULL)
|
|
return; /* Probably a unit we don't have */
|
|
s = dkslice(dev);
|
|
p = dkpart(dev);
|
|
if (s == WHOLE_DISK_SLICE && p == RAW_PART) {
|
|
/* XXX: actually should not happen */
|
|
dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%d",
|
|
dp->d_devsw->d_name, u);
|
|
dev_depends(pdev, dev);
|
|
return;
|
|
}
|
|
if (s == COMPATIBILITY_SLICE) {
|
|
dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%d%c",
|
|
dp->d_devsw->d_name, u, 'a' + p);
|
|
dev_depends(pdev, dev);
|
|
return;
|
|
}
|
|
if (p != RAW_PART) {
|
|
dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d%c",
|
|
dp->d_devsw->d_name, u, s - BASE_SLICE + 1,
|
|
'a' + p);
|
|
} else {
|
|
dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d",
|
|
dp->d_devsw->d_name, u, s - BASE_SLICE + 1);
|
|
make_dev_alias(dev, "%s%ds%dc",
|
|
dp->d_devsw->d_name, u, s - BASE_SLICE + 1);
|
|
}
|
|
dev_depends(pdev, dev);
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void
|
|
disk_clone(void *arg, char *name, int namelen, dev_t *dev)
|
|
{
|
|
struct disk *dp;
|
|
char const *d;
|
|
char *e;
|
|
int j, u, s, p;
|
|
dev_t pdev;
|
|
|
|
if (*dev != NODEV)
|
|
return;
|
|
|
|
LIST_FOREACH(dp, &disklist, d_list) {
|
|
d = dp->d_devsw->d_name;
|
|
j = dev_stdclone(name, &e, d, &u);
|
|
if (j == 0)
|
|
continue;
|
|
if (u > DKMAXUNIT)
|
|
continue;
|
|
p = RAW_PART;
|
|
s = WHOLE_DISK_SLICE;
|
|
pdev = makedev(dp->d_devsw->d_maj, dkmakeminor(u, s, p));
|
|
if (pdev->si_disk == NULL)
|
|
continue;
|
|
if (*e != '\0') {
|
|
j = dev_stdclone(e, &e, "s", &s);
|
|
if (j == 0)
|
|
s = COMPATIBILITY_SLICE;
|
|
else if (j == 1 || j == 2)
|
|
s += BASE_SLICE - 1;
|
|
if (!*e)
|
|
; /* ad0s1 case */
|
|
else if (e[1] != '\0')
|
|
return; /* can never be a disk name */
|
|
else if (*e < 'a' || *e > 'h')
|
|
return; /* can never be a disk name */
|
|
else
|
|
p = *e - 'a';
|
|
}
|
|
if (s == WHOLE_DISK_SLICE && p == RAW_PART) {
|
|
return;
|
|
} else if (s >= BASE_SLICE && p != RAW_PART) {
|
|
*dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d%c",
|
|
pdev->si_devsw->d_name, u, s - BASE_SLICE + 1,
|
|
p + 'a');
|
|
} else if (s >= BASE_SLICE) {
|
|
*dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d",
|
|
pdev->si_devsw->d_name, u, s - BASE_SLICE + 1);
|
|
make_dev_alias(*dev, "%s%ds%dc",
|
|
pdev->si_devsw->d_name, u, s - BASE_SLICE + 1);
|
|
} else {
|
|
*dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%d%c",
|
|
pdev->si_devsw->d_name, u, p + 'a');
|
|
}
|
|
dev_depends(pdev, *dev);
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void
|
|
inherit_raw(dev_t pdev, dev_t dev)
|
|
{
|
|
dev->si_disk = pdev->si_disk;
|
|
dev->si_drv1 = pdev->si_drv1;
|
|
dev->si_drv2 = pdev->si_drv2;
|
|
dev->si_iosize_max = pdev->si_iosize_max;
|
|
dev->si_bsize_phys = pdev->si_bsize_phys;
|
|
dev->si_bsize_best = pdev->si_bsize_best;
|
|
}
|
|
|
|
dev_t
|
|
disk_create(int unit, struct disk *dp, int flags, struct cdevsw *cdevsw, struct cdevsw *proto)
|
|
{
|
|
static int once;
|
|
dev_t dev;
|
|
|
|
if (!once) {
|
|
EVENTHANDLER_REGISTER(dev_clone, disk_clone, 0, 1000);
|
|
once++;
|
|
}
|
|
|
|
bzero(dp, sizeof(*dp));
|
|
dp->d_label = malloc(sizeof *dp->d_label, M_DEVBUF, M_WAITOK|M_ZERO);
|
|
|
|
if (proto->d_open != diskopen) {
|
|
*proto = *cdevsw;
|
|
proto->d_open = diskopen;
|
|
proto->d_close = diskclose;
|
|
proto->d_ioctl = diskioctl;
|
|
proto->d_strategy = diskstrategy;
|
|
proto->d_psize = diskpsize;
|
|
}
|
|
|
|
if (bootverbose)
|
|
printf("Creating DISK %s%d\n", cdevsw->d_name, unit);
|
|
dev = make_dev(proto, dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART),
|
|
UID_ROOT, GID_OPERATOR, 0640, "%s%d", cdevsw->d_name, unit);
|
|
|
|
dev->si_disk = dp;
|
|
dp->d_dev = dev;
|
|
dp->d_dsflags = flags;
|
|
dp->d_devsw = cdevsw;
|
|
LIST_INSERT_HEAD(&disklist, dp, d_list);
|
|
|
|
return (dev);
|
|
}
|
|
|
|
static int
|
|
diskdumpconf(u_int onoff, dev_t dev, struct disk *dp)
|
|
{
|
|
struct dumperinfo di;
|
|
struct disklabel *dl;
|
|
|
|
if (!onoff)
|
|
return(set_dumper(NULL));
|
|
dl = dsgetlabel(dev, dp->d_slice);
|
|
if (!dl)
|
|
return (ENXIO);
|
|
bzero(&di, sizeof di);
|
|
di.dumper = (dumper_t *)dp->d_devsw->d_dump;
|
|
di.priv = dp->d_dev;
|
|
di.blocksize = dl->d_secsize;
|
|
di.mediaoffset = (off_t)(dl->d_partitions[dkpart(dev)].p_offset +
|
|
dp->d_slice->dss_slices[dkslice(dev)].ds_offset) * DEV_BSIZE;
|
|
di.mediasize =
|
|
(off_t)(dl->d_partitions[dkpart(dev)].p_size) * DEV_BSIZE;
|
|
if (di.mediasize == 0)
|
|
return (EINVAL);
|
|
return(set_dumper(&di));
|
|
}
|
|
|
|
void
|
|
disk_invalidate (struct disk *disk)
|
|
{
|
|
if (disk->d_slice)
|
|
dsgone(&disk->d_slice);
|
|
}
|
|
|
|
void
|
|
disk_destroy(dev_t dev)
|
|
{
|
|
LIST_REMOVE(dev->si_disk, d_list);
|
|
free(dev->si_disk->d_label, M_DEVBUF);
|
|
bzero(dev->si_disk, sizeof(*dev->si_disk));
|
|
dev->si_disk = NULL;
|
|
destroy_dev(dev);
|
|
return;
|
|
}
|
|
|
|
struct disk *
|
|
disk_enumerate(struct disk *disk)
|
|
{
|
|
if (!disk)
|
|
return (LIST_FIRST(&disklist));
|
|
else
|
|
return (LIST_NEXT(disk, d_list));
|
|
}
|
|
|
|
static int
|
|
sysctl_disks(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct disk *disk;
|
|
int error, first;
|
|
|
|
disk = NULL;
|
|
first = 1;
|
|
|
|
while ((disk = disk_enumerate(disk))) {
|
|
if (!first) {
|
|
error = SYSCTL_OUT(req, " ", 1);
|
|
if (error)
|
|
return error;
|
|
} else {
|
|
first = 0;
|
|
}
|
|
error = SYSCTL_OUT(req, disk->d_dev->si_name, strlen(disk->d_dev->si_name));
|
|
if (error)
|
|
return error;
|
|
}
|
|
error = SYSCTL_OUT(req, "", 1);
|
|
return error;
|
|
}
|
|
|
|
SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
|
|
sysctl_disks, "A", "names of available disks");
|
|
|
|
/*
|
|
* The cdevsw functions
|
|
*/
|
|
|
|
static int
|
|
diskopen(dev_t dev, int oflags, int devtype, struct thread *td)
|
|
{
|
|
dev_t pdev;
|
|
struct disk *dp;
|
|
int error;
|
|
|
|
error = 0;
|
|
pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART);
|
|
|
|
dp = pdev->si_disk;
|
|
if (!dp)
|
|
return (ENXIO);
|
|
|
|
while (dp->d_flags & DISKFLAG_LOCK) {
|
|
dp->d_flags |= DISKFLAG_WANTED;
|
|
error = tsleep(dp, PRIBIO | PCATCH, "diskopen", hz);
|
|
if (error)
|
|
return (error);
|
|
}
|
|
dp->d_flags |= DISKFLAG_LOCK;
|
|
|
|
if (!dsisopen(dp->d_slice)) {
|
|
if (!pdev->si_iosize_max)
|
|
pdev->si_iosize_max = dev->si_iosize_max;
|
|
error = dp->d_devsw->d_open(pdev, oflags, devtype, td);
|
|
dp->d_label->d_secsize = dp->d_sectorsize;
|
|
dp->d_label->d_secperunit = dp->d_mediasize / dp->d_sectorsize;
|
|
dp->d_label->d_nsectors = dp->d_fwsectors;
|
|
dp->d_label->d_ntracks = dp->d_fwheads;
|
|
}
|
|
|
|
/* Inherit properties from the whole/raw dev_t */
|
|
inherit_raw(pdev, dev);
|
|
|
|
if (error)
|
|
goto out;
|
|
|
|
error = dsopen(dev, devtype, dp->d_dsflags, &dp->d_slice, dp->d_label);
|
|
|
|
if (!dsisopen(dp->d_slice))
|
|
dp->d_devsw->d_close(pdev, oflags, devtype, td);
|
|
out:
|
|
dp->d_flags &= ~DISKFLAG_LOCK;
|
|
if (dp->d_flags & DISKFLAG_WANTED) {
|
|
dp->d_flags &= ~DISKFLAG_WANTED;
|
|
wakeup(dp);
|
|
}
|
|
|
|
return(error);
|
|
}
|
|
|
|
static int
|
|
diskclose(dev_t dev, int fflag, int devtype, struct thread *td)
|
|
{
|
|
struct disk *dp;
|
|
int error;
|
|
dev_t pdev;
|
|
|
|
error = 0;
|
|
pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART);
|
|
dp = pdev->si_disk;
|
|
if (!dp)
|
|
return (ENXIO);
|
|
dsclose(dev, devtype, dp->d_slice);
|
|
if (!dsisopen(dp->d_slice))
|
|
error = dp->d_devsw->d_close(dp->d_dev, fflag, devtype, td);
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
diskstrategy(struct bio *bp)
|
|
{
|
|
dev_t pdev;
|
|
struct disk *dp;
|
|
|
|
pdev = dkmodpart(dkmodslice(bp->bio_dev, WHOLE_DISK_SLICE), RAW_PART);
|
|
dp = pdev->si_disk;
|
|
bp->bio_resid = bp->bio_bcount;
|
|
if (dp != bp->bio_dev->si_disk)
|
|
inherit_raw(pdev, bp->bio_dev);
|
|
|
|
if (!dp) {
|
|
biofinish(bp, NULL, ENXIO);
|
|
return;
|
|
}
|
|
|
|
if (dscheck(bp, dp->d_slice) <= 0) {
|
|
biodone(bp);
|
|
return;
|
|
}
|
|
|
|
if (bp->bio_bcount == 0) {
|
|
biodone(bp);
|
|
return;
|
|
}
|
|
|
|
KASSERT(dp->d_devsw != NULL, ("NULL devsw"));
|
|
KASSERT(dp->d_devsw->d_strategy != NULL, ("NULL d_strategy"));
|
|
dp->d_devsw->d_strategy(bp);
|
|
return;
|
|
|
|
}
|
|
|
|
static int
|
|
diskioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
|
|
{
|
|
struct disk *dp;
|
|
int error;
|
|
u_int u;
|
|
dev_t pdev;
|
|
|
|
pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART);
|
|
dp = pdev->si_disk;
|
|
if (!dp)
|
|
return (ENXIO);
|
|
if (cmd == DIOCSKERNELDUMP) {
|
|
u = *(u_int *)data;
|
|
return (diskdumpconf(u, dev, dp));
|
|
}
|
|
if (cmd == DIOCGFRONTSTUFF) {
|
|
*(off_t *)data = 8192; /* XXX: crude but enough) */
|
|
return (0);
|
|
}
|
|
error = dsioctl(dev, cmd, data, fflag, &dp->d_slice);
|
|
if (error == ENOIOCTL)
|
|
error = dp->d_devsw->d_ioctl(dev, cmd, data, fflag, td);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
diskpsize(dev_t dev)
|
|
{
|
|
struct disk *dp;
|
|
dev_t pdev;
|
|
|
|
pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART);
|
|
dp = pdev->si_disk;
|
|
if (!dp)
|
|
return (-1);
|
|
if (dp != dev->si_disk) {
|
|
dev->si_drv1 = pdev->si_drv1;
|
|
dev->si_drv2 = pdev->si_drv2;
|
|
/* XXX: don't set bp->b_dev->si_disk (?) */
|
|
}
|
|
return (dssize(dev, &dp->d_slice));
|
|
}
|
|
|
|
SYSCTL_INT(_debug_sizeof, OID_AUTO, disklabel, CTLFLAG_RD,
|
|
0, sizeof(struct disklabel), "sizeof(struct disklabel)");
|
|
|
|
SYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD,
|
|
0, sizeof(struct diskslices), "sizeof(struct diskslices)");
|
|
|
|
SYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD,
|
|
0, sizeof(struct disk), "sizeof(struct disk)");
|
|
|
|
#endif /* NO_GEOM */
|
|
|
|
/*-
|
|
* Disk error is the preface to plaintive error messages
|
|
* about failing disk transfers. It prints messages of the form
|
|
* "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
|
|
* blkdone should be -1 if the position of the error is unknown.
|
|
* The message is printed with printf.
|
|
*/
|
|
void
|
|
disk_err(struct bio *bp, const char *what, int blkdone, int nl)
|
|
{
|
|
daddr_t sn;
|
|
|
|
printf("%s: %s ", devtoname(bp->bio_dev), what);
|
|
switch(bp->bio_cmd) {
|
|
case BIO_READ: printf("cmd=read "); break;
|
|
case BIO_WRITE: printf("cmd=write "); break;
|
|
case BIO_DELETE: printf("cmd=delete "); break;
|
|
case BIO_GETATTR: printf("cmd=getattr "); break;
|
|
case BIO_SETATTR: printf("cmd=setattr "); break;
|
|
default: printf("cmd=%x ", bp->bio_cmd); break;
|
|
}
|
|
sn = bp->bio_blkno;
|
|
if (bp->bio_bcount <= DEV_BSIZE) {
|
|
printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
|
|
return;
|
|
}
|
|
if (blkdone >= 0) {
|
|
sn += blkdone;
|
|
printf("fsbn %jd of ", (intmax_t)sn);
|
|
}
|
|
printf("%jd-%jd", (intmax_t)bp->bio_blkno,
|
|
(intmax_t)(bp->bio_blkno + (bp->bio_bcount - 1) / DEV_BSIZE));
|
|
if (nl)
|
|
printf("\n");
|
|
}
|
|
|
|
/*
|
|
* Seek sort for disks.
|
|
*
|
|
* The buf_queue keep two queues, sorted in ascending block order. The first
|
|
* queue holds those requests which are positioned after the current block
|
|
* (in the first request); the second, which starts at queue->switch_point,
|
|
* holds requests which came in after their block number was passed. Thus
|
|
* we implement a one way scan, retracting after reaching the end of the drive
|
|
* to the first request on the second queue, at which time it becomes the
|
|
* first queue.
|
|
*
|
|
* A one-way scan is natural because of the way UNIX read-ahead blocks are
|
|
* allocated.
|
|
*/
|
|
|
|
void
|
|
bioq_disksort(bioq, bp)
|
|
struct bio_queue_head *bioq;
|
|
struct bio *bp;
|
|
{
|
|
struct bio *bq;
|
|
struct bio *bn;
|
|
struct bio *be;
|
|
|
|
if (!atomic_cmpset_int(&bioq->busy, 0, 1))
|
|
panic("Recursing in bioq_disksort()");
|
|
be = TAILQ_LAST(&bioq->queue, bio_queue);
|
|
/*
|
|
* If the queue is empty or we are an
|
|
* ordered transaction, then it's easy.
|
|
*/
|
|
if ((bq = bioq_first(bioq)) == NULL) {
|
|
bioq_insert_tail(bioq, bp);
|
|
bioq->busy = 0;
|
|
return;
|
|
} else if (bioq->insert_point != NULL) {
|
|
|
|
/*
|
|
* A certain portion of the list is
|
|
* "locked" to preserve ordering, so
|
|
* we can only insert after the insert
|
|
* point.
|
|
*/
|
|
bq = bioq->insert_point;
|
|
} else {
|
|
|
|
/*
|
|
* If we lie before the last removed (currently active)
|
|
* request, and are not inserting ourselves into the
|
|
* "locked" portion of the list, then we must add ourselves
|
|
* to the second request list.
|
|
*/
|
|
if (bp->bio_pblkno < bioq->last_pblkno) {
|
|
|
|
bq = bioq->switch_point;
|
|
/*
|
|
* If we are starting a new secondary list,
|
|
* then it's easy.
|
|
*/
|
|
if (bq == NULL) {
|
|
bioq->switch_point = bp;
|
|
bioq_insert_tail(bioq, bp);
|
|
bioq->busy = 0;
|
|
return;
|
|
}
|
|
/*
|
|
* If we lie ahead of the current switch point,
|
|
* insert us before the switch point and move
|
|
* the switch point.
|
|
*/
|
|
if (bp->bio_pblkno < bq->bio_pblkno) {
|
|
bioq->switch_point = bp;
|
|
TAILQ_INSERT_BEFORE(bq, bp, bio_queue);
|
|
bioq->busy = 0;
|
|
return;
|
|
}
|
|
} else {
|
|
if (bioq->switch_point != NULL)
|
|
be = TAILQ_PREV(bioq->switch_point,
|
|
bio_queue, bio_queue);
|
|
/*
|
|
* If we lie between last_pblkno and bq,
|
|
* insert before bq.
|
|
*/
|
|
if (bp->bio_pblkno < bq->bio_pblkno) {
|
|
TAILQ_INSERT_BEFORE(bq, bp, bio_queue);
|
|
bioq->busy = 0;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Request is at/after our current position in the list.
|
|
* Optimize for sequential I/O by seeing if we go at the tail.
|
|
*/
|
|
if (bp->bio_pblkno > be->bio_pblkno) {
|
|
TAILQ_INSERT_AFTER(&bioq->queue, be, bp, bio_queue);
|
|
bioq->busy = 0;
|
|
return;
|
|
}
|
|
|
|
/* Otherwise, insertion sort */
|
|
while ((bn = TAILQ_NEXT(bq, bio_queue)) != NULL) {
|
|
|
|
/*
|
|
* We want to go after the current request if it is the end
|
|
* of the first request list, or if the next request is a
|
|
* larger cylinder than our request.
|
|
*/
|
|
if (bn == bioq->switch_point
|
|
|| bp->bio_pblkno < bn->bio_pblkno)
|
|
break;
|
|
bq = bn;
|
|
}
|
|
TAILQ_INSERT_AFTER(&bioq->queue, bq, bp, bio_queue);
|
|
bioq->busy = 0;
|
|
}
|
|
|
|
|