From 1b4509372f3d474dd7b05789ab5eb225f85fd27c Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Tue, 23 Feb 2010 16:46:34 +0000 Subject: [PATCH] MFC r201684. Teach the (gpt)zfsboot and zfsloader raidz code to use its buffers more efficiently. Before this patch, in the worst case memory use would increase exponentially on the number of drives in the raidz vdev. Submitted by: Matt Reimer Sponsored by: VPOP Technologies, Inc. Silence from: dfr --- sys/cddl/boot/zfs/zfssubr.c | 50 +++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/sys/cddl/boot/zfs/zfssubr.c b/sys/cddl/boot/zfs/zfssubr.c index 4013986c18c3..25d349b1ce42 100644 --- a/sys/cddl/boot/zfs/zfssubr.c +++ b/sys/cddl/boot/zfs/zfssubr.c @@ -454,7 +454,7 @@ vdev_raidz_reconstruct_q(raidz_col_t *cols, int nparity, int acols, int x) static void vdev_raidz_reconstruct_pq(raidz_col_t *cols, int nparity, int acols, - int x, int y) + int x, int y, void *temp_p, void *temp_q) { uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp; void *pdata, *qdata; @@ -478,10 +478,8 @@ vdev_raidz_reconstruct_pq(raidz_col_t *cols, int nparity, int acols, xsize = cols[x].rc_size; ysize = cols[y].rc_size; - cols[VDEV_RAIDZ_P].rc_data = - zfs_alloc_temp(cols[VDEV_RAIDZ_P].rc_size); - cols[VDEV_RAIDZ_Q].rc_data = - zfs_alloc_temp(cols[VDEV_RAIDZ_Q].rc_size); + cols[VDEV_RAIDZ_P].rc_data = temp_p; + cols[VDEV_RAIDZ_Q].rc_data = temp_q; cols[x].rc_size = 0; cols[y].rc_size = 0; @@ -551,9 +549,12 @@ vdev_raidz_read(vdev_t *vdev, const blkptr_t *bp, void *buf, uint64_t f = b % dcols; uint64_t o = (b / dcols) << unit_shift; uint64_t q, r, coff; - int c, c1, bc, col, acols, devidx, asize, n; + int c, c1, bc, col, acols, devidx, asize, n, max_rc_size; static raidz_col_t cols[16]; raidz_col_t *rc, *rc1; + void *orig, *orig1, *temp_p, *temp_q; + + orig = orig1 = temp_p = temp_q = NULL; q = s / (dcols - nparity); r = s - q * (dcols - nparity); @@ -561,6 +562,7 @@ vdev_raidz_read(vdev_t *vdev, const blkptr_t *bp, void *buf, acols = (q == 0 ? bc : dcols); asize = 0; + max_rc_size = 0; for (c = 0; c < acols; c++) { col = f + c; @@ -577,6 +579,8 @@ vdev_raidz_read(vdev_t *vdev, const blkptr_t *bp, void *buf, cols[c].rc_tried = 0; cols[c].rc_skipped = 0; asize += cols[c].rc_size; + if (cols[c].rc_size > max_rc_size) + max_rc_size = cols[c].rc_size; } asize = roundup(asize, (nparity + 1) << unit_shift); @@ -777,8 +781,13 @@ reconstruct: //ASSERT(c != acols); //ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO || rc->rc_error == ESTALE); + if (temp_p == NULL) + temp_p = zfs_alloc_temp(max_rc_size); + if (temp_q == NULL) + temp_q = zfs_alloc_temp(max_rc_size); + vdev_raidz_reconstruct_pq(cols, nparity, acols, - c1, c); + c1, c, temp_p, temp_q); if (zio_checksum_error(bp, buf) == 0) return (0); @@ -845,18 +854,12 @@ reconstruct: return (EIO); } - asize = 0; - for (c = 0; c < acols; c++) { - rc = &cols[c]; - if (rc->rc_size > asize) - asize = rc->rc_size; - } if (cols[VDEV_RAIDZ_P].rc_error == 0) { /* * Attempt to reconstruct the data from parity P. */ - void *orig; - orig = zfs_alloc_temp(asize); + if (orig == NULL) + orig = zfs_alloc_temp(max_rc_size); for (c = nparity; c < acols; c++) { rc = &cols[c]; @@ -874,8 +877,8 @@ reconstruct: /* * Attempt to reconstruct the data from parity Q. */ - void *orig; - orig = zfs_alloc_temp(asize); + if (orig == NULL) + orig = zfs_alloc_temp(max_rc_size); for (c = nparity; c < acols; c++) { rc = &cols[c]; @@ -895,9 +898,14 @@ reconstruct: /* * Attempt to reconstruct the data from both P and Q. */ - void *orig, *orig1; - orig = zfs_alloc_temp(asize); - orig1 = zfs_alloc_temp(asize); + if (orig == NULL) + orig = zfs_alloc_temp(max_rc_size); + if (orig1 == NULL) + orig1 = zfs_alloc_temp(max_rc_size); + if (temp_p == NULL) + temp_p = zfs_alloc_temp(max_rc_size); + if (temp_q == NULL) + temp_q = zfs_alloc_temp(max_rc_size); for (c = nparity; c < acols - 1; c++) { rc = &cols[c]; @@ -909,7 +917,7 @@ reconstruct: memcpy(orig1, rc1->rc_data, rc1->rc_size); vdev_raidz_reconstruct_pq(cols, nparity, - acols, c, c1); + acols, c, c1, temp_p, temp_q); if (zio_checksum_error(bp, buf) == 0) return (0);