From d5f998ba7065549829c0d83ba35d1862167115e6 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 12 Dec 2015 14:08:29 +0000 Subject: [PATCH] In md(4) over vnode, correct handling of the unaligned unmapped io requests which page alignment + size is greater than MAXPHYS. Right now md(4) over vnode would use the physical buffer of the size MAXPHYS to map a data of size MAXPHYS + page offset of the user buffer. This typically corrupts next pbuf, or, if the pbuf used was the last pbuf in the map, the next page after the pbuf's map. Split request up to the size of io which fits into pbuf KVA with alignment, and retry if a part of the bio is left unprocessed. Reported by: Fabian Keil Tested by: Fabian Keil, pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/dev/md/md.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index a47066ef24fb..6405ad5f02b5 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -836,8 +836,8 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) struct buf *pb; bus_dma_segment_t *vlist; struct thread *td; - off_t len, zerosize; - int ma_offs; + off_t iolen, len, zerosize; + int ma_offs, npages; switch (bp->bio_cmd) { case BIO_READ: @@ -858,6 +858,7 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) pb = NULL; piov = NULL; ma_offs = bp->bio_ma_offset; + len = bp->bio_length; /* * VNODE I/O @@ -890,7 +891,6 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) auio.uio_iovcnt = howmany(bp->bio_length, zerosize); piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK); auio.uio_iov = piov; - len = bp->bio_length; while (len > 0) { piov->iov_base = __DECONST(void *, zero_region); piov->iov_len = len; @@ -904,7 +904,6 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK); auio.uio_iov = piov; vlist = (bus_dma_segment_t *)bp->bio_data; - len = bp->bio_length; while (len > 0) { piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr + ma_offs); @@ -920,11 +919,20 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) piov = auio.uio_iov; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pb = getpbuf(&md_vnode_pbuf_freecnt); - pmap_qenter((vm_offset_t)pb->b_data, bp->bio_ma, bp->bio_ma_n); - aiov.iov_base = (void *)((vm_offset_t)pb->b_data + ma_offs); - aiov.iov_len = bp->bio_length; + bp->bio_resid = len; +unmapped_step: + npages = atop(min(MAXPHYS, round_page(len + (ma_offs & + PAGE_MASK)))); + iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); + KASSERT(iolen > 0, ("zero iolen")); + pmap_qenter((vm_offset_t)pb->b_data, + &bp->bio_ma[atop(ma_offs)], npages); + aiov.iov_base = (void *)((vm_offset_t)pb->b_data + + (ma_offs & PAGE_MASK)); + aiov.iov_len = iolen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; + auio.uio_resid = iolen; } else { aiov.iov_base = bp->bio_data; aiov.iov_len = bp->bio_length; @@ -948,15 +956,21 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) vn_finished_write(mp); } - if (pb) { - pmap_qremove((vm_offset_t)pb->b_data, bp->bio_ma_n); + if (pb != NULL) { + pmap_qremove((vm_offset_t)pb->b_data, npages); + if (error == 0) { + len -= iolen; + bp->bio_resid -= iolen; + ma_offs += iolen; + if (len > 0) + goto unmapped_step; + } relpbuf(pb, &md_vnode_pbuf_freecnt); } - if (piov != NULL) - free(piov, M_MD); - - bp->bio_resid = auio.uio_resid; + free(piov, M_MD); + if (pb == NULL) + bp->bio_resid = auio.uio_resid; return (error); }