From 733a1f3f52039ead3d4f2cc4647b62e526a62e63 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 26 Oct 2011 21:50:10 +0000 Subject: [PATCH] Clarify disks/volumes above 2TiB support in geom_raid: - add support for volumes above 2TiB with Promise metadata format; - enforse and document other limitations: - Intel and Promise metadata formats do not support disks above 2TiB; - NVIDIA metadata format does not support volumes above 2TiB. Sponsored by: iXsystems, Inc. MFC after: 2 weeks --- sbin/geom/class/raid/graid.8 | 5 ++++- sys/geom/raid/md_intel.c | 34 ++++++++++++++++++++++++++-------- sys/geom/raid/md_nvidia.c | 30 ++++++++++++++++-------------- sys/geom/raid/md_promise.c | 25 ++++++++++++++++++++++++- 4 files changed, 70 insertions(+), 24 deletions(-) diff --git a/sbin/geom/class/raid/graid.8 b/sbin/geom/class/raid/graid.8 index d1c92a2148af..4166371b98cb 100644 --- a/sbin/geom/class/raid/graid.8 +++ b/sbin/geom/class/raid/graid.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 22, 2011 +.Dd October 26, 2011 .Dt GRAID 8 .Os .Sh NAME @@ -250,6 +250,9 @@ If you started migration using BIOS or in some other way, make sure to complete it there. Do not run GEOM RAID class on migrating volumes under pain of possible data corruption! +.Sh 2TiB BARRIERS +Intel and Promise metadata formats do not support disks above 2TiB. +NVIDIA metadata format does not support volumes above 2TiB. .Sh EXIT STATUS Exit status is 0 on success, and non-zero if the command fails. .Sh SEE ALSO diff --git a/sys/geom/raid/md_intel.c b/sys/geom/raid/md_intel.c index 59d88358a911..2b11d3beba6b 100644 --- a/sys/geom/raid/md_intel.c +++ b/sys/geom/raid/md_intel.c @@ -1172,15 +1172,18 @@ g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp, g_access(cp, -1, 0, 0); if (meta == NULL) { if (g_raid_aggressive_spare) { - if (vendor == 0x8086) { + if (vendor != 0x8086) { + G_RAID_DEBUG(1, + "Intel vendor mismatch 0x%04x != 0x8086", + vendor); + } else if (pp->mediasize / pp->sectorsize > UINT32_MAX) { + G_RAID_DEBUG(1, + "Intel disk '%s' is too big.", pp->name); + } else { G_RAID_DEBUG(1, "No Intel metadata, forcing spare."); spare = 2; goto search; - } else { - G_RAID_DEBUG(1, - "Intel vendor mismatch 0x%04x != 0x8086", - vendor); } } return (G_RAID_MD_TASTE_FAIL); @@ -1194,9 +1197,9 @@ g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp, } if (meta->disk[disk_pos].sectors != (pp->mediasize / pp->sectorsize)) { - G_RAID_DEBUG(1, "Intel size mismatch %u != %u", - meta->disk[disk_pos].sectors, - (u_int)(pp->mediasize / pp->sectorsize)); + G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju", + (off_t)meta->disk[disk_pos].sectors, + (off_t)(pp->mediasize / pp->sectorsize)); goto fail1; } @@ -1449,6 +1452,13 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md, cp->private = disk; g_topology_unlock(); + if (pp->mediasize / pp->sectorsize > UINT32_MAX) { + gctl_error(req, + "Disk '%s' is too big.", diskname); + error = -8; + break; + } + error = g_raid_md_get_label(cp, &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN); if (error != 0) { @@ -1940,6 +1950,14 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md, pp = cp->provider; g_topology_unlock(); + if (pp->mediasize / pp->sectorsize > UINT32_MAX) { + gctl_error(req, + "Disk '%s' is too big.", diskname); + g_raid_kill_consumer(sc, cp); + error = -8; + break; + } + /* Read disk serial. */ error = g_raid_md_get_label(cp, &serial[0], INTEL_SERIAL_LEN); diff --git a/sys/geom/raid/md_nvidia.c b/sys/geom/raid/md_nvidia.c index c2b300bf598a..0cbab4b4fa32 100644 --- a/sys/geom/raid/md_nvidia.c +++ b/sys/geom/raid/md_nvidia.c @@ -1033,7 +1033,7 @@ g_raid_md_ctl_nvidia(struct g_raid_md_object *md, char arg[16]; const char *verb, *volname, *levelname, *diskname; int *nargs, *force; - off_t size, sectorsize, strip; + off_t size, sectorsize, strip, volsize; intmax_t *sizearg, *striparg; int numdisks, i, len, level, qual, update; int error; @@ -1182,7 +1182,20 @@ g_raid_md_ctl_nvidia(struct g_raid_md_object *md, gctl_error(req, "Size too small."); return (-13); } - if (size > 0xffffffffffffllu * sectorsize) { + + if (level == G_RAID_VOLUME_RL_RAID0 || + level == G_RAID_VOLUME_RL_CONCAT || + level == G_RAID_VOLUME_RL_SINGLE) + volsize = size * numdisks; + else if (level == G_RAID_VOLUME_RL_RAID1) + volsize = size; + else if (level == G_RAID_VOLUME_RL_RAID5) + volsize = size * (numdisks - 1); + else { /* RAID1E */ + volsize = ((size * numdisks) / strip / 2) * + strip; + } + if (volsize > 0xffffffffllu * sectorsize) { gctl_error(req, "Size too big."); return (-14); } @@ -1196,18 +1209,7 @@ g_raid_md_ctl_nvidia(struct g_raid_md_object *md, vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; vol->v_strip_size = strip; vol->v_disks_count = numdisks; - if (level == G_RAID_VOLUME_RL_RAID0 || - level == G_RAID_VOLUME_RL_CONCAT || - level == G_RAID_VOLUME_RL_SINGLE) - vol->v_mediasize = size * numdisks; - else if (level == G_RAID_VOLUME_RL_RAID1) - vol->v_mediasize = size; - else if (level == G_RAID_VOLUME_RL_RAID5) - vol->v_mediasize = size * (numdisks - 1); - else { /* RAID1E */ - vol->v_mediasize = ((size * numdisks) / strip / 2) * - strip; - } + vol->v_mediasize = volsize; vol->v_sectorsize = sectorsize; g_raid_start_volume(vol); diff --git a/sys/geom/raid/md_promise.c b/sys/geom/raid/md_promise.c index c47517451bd8..55b4b213f979 100644 --- a/sys/geom/raid/md_promise.c +++ b/sys/geom/raid/md_promise.c @@ -121,7 +121,8 @@ struct promise_raid_conf { uint64_t rebuild_lba64; /* Per-volume rebuild position. */ uint32_t magic_4; uint32_t magic_5; - uint32_t filler3[325]; + uint32_t total_sectors_high; + uint32_t filler3[324]; uint32_t checksum; } __packed; @@ -213,6 +214,7 @@ g_raid_md_promise_print(struct promise_raid_conf *meta) printf("rebuild_lba64 %ju\n", meta->rebuild_lba64); printf("magic_4 0x%08x\n", meta->magic_4); printf("magic_5 0x%08x\n", meta->magic_5); + printf("total_sectors_high 0x%08x\n", meta->total_sectors_high); printf("=================================================\n"); } @@ -867,6 +869,9 @@ g_raid_md_promise_start(struct g_raid_volume *vol) vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ vol->v_disks_count = meta->total_disks; vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ + if (meta->total_sectors_high < 256) /* If value looks sane. */ + vol->v_mediasize |= + ((off_t)meta->total_sectors_high << 32) * 512; //ZZZ vol->v_sectorsize = 512; //ZZZ for (i = 0; i < vol->v_disks_count; i++) { sd = &vol->v_subdisks[i]; @@ -1318,6 +1323,13 @@ g_raid_md_ctl_promise(struct g_raid_md_object *md, cp->private = disk; g_topology_unlock(); + if (pp->mediasize / pp->sectorsize > UINT32_MAX) { + gctl_error(req, + "Disk '%s' is too big.", diskname); + error = -8; + break; + } + /* Read kernel dumping information. */ disk->d_kd.offset = 0; disk->d_kd.length = OFF_MAX; @@ -1609,8 +1621,17 @@ g_raid_md_ctl_promise(struct g_raid_md_object *md, error = -4; break; } + pp = cp->provider; g_topology_unlock(); + if (pp->mediasize / pp->sectorsize > UINT32_MAX) { + gctl_error(req, + "Disk '%s' is too big.", diskname); + g_raid_kill_consumer(sc, cp); + error = -8; + break; + } + pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); disk = g_raid_create_disk(sc); @@ -1716,6 +1737,8 @@ g_raid_md_write_promise(struct g_raid_md_object *md, struct g_raid_volume *tvol, meta->array_width /= 2; meta->array_number = vol->v_global_id; meta->total_sectors = vol->v_mediasize / vol->v_sectorsize; + meta->total_sectors_high = + (vol->v_mediasize / vol->v_sectorsize) >> 32; meta->cylinders = meta->total_sectors / (255 * 63) - 1; meta->heads = 254; meta->sectors = 63;