diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index abbbb2106be2..5a4d8a9b754d 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -76,14 +76,16 @@ MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); int memrw(struct cdev *dev, struct uio *uio, int flags) { - int o; - u_long c = 0, v; struct iovec *iov; - int error = 0; + u_long c, v; + int error, o, sflags; vm_offset_t addr, eaddr; GIANT_REQUIRED; + error = 0; + c = 0; + sflags = curthread_pflags_set(TDP_DEVMEMIO); while (uio->uio_resid > 0 && error == 0) { iov = uio->uio_iov; if (iov->iov_len == 0) { @@ -98,7 +100,15 @@ memrw(struct cdev *dev, struct uio *uio, int flags) kmemphys: o = v & PAGE_MASK; c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o)); - error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio); + v = PHYS_TO_DMAP(v); + if (v < DMAP_MIN_ADDRESS || + (v > DMAP_MIN_ADDRESS + dmaplimit && + v <= DMAP_MAX_ADDRESS) || + pmap_kextract(v) == 0) { + error = EFAULT; + goto ret; + } + error = uiomove((void *)v, (int)c, uio); continue; } else if (dev2unit(dev) == CDEV_MINOR_KMEM) { @@ -119,22 +129,30 @@ memrw(struct cdev *dev, struct uio *uio, int flags) addr = trunc_page(v); eaddr = round_page(v + c); - if (addr < VM_MIN_KERNEL_ADDRESS) - return (EFAULT); - for (; addr < eaddr; addr += PAGE_SIZE) - if (pmap_extract(kernel_pmap, addr) == 0) - return (EFAULT); - + if (addr < VM_MIN_KERNEL_ADDRESS) { + error = EFAULT; + goto ret; + } + for (; addr < eaddr; addr += PAGE_SIZE) { + if (pmap_extract(kernel_pmap, addr) == 0) { + error = EFAULT; + goto ret; + } + } if (!kernacc((caddr_t)(long)v, c, uio->uio_rw == UIO_READ ? - VM_PROT_READ : VM_PROT_WRITE)) - return (EFAULT); + VM_PROT_READ : VM_PROT_WRITE)) { + error = EFAULT; + goto ret; + } error = uiomove((caddr_t)(long)v, (int)c, uio); continue; } /* else panic! */ } +ret: + curthread_pflags_restore(sflags); return (error); } diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index f9fa8f2492b2..9f3ac4f8c8bd 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -789,6 +789,12 @@ trap_pfault(frame, usermode) frame->tf_rip = (long)curpcb->pcb_onfault; return (0); } + if ((td->td_pflags & TDP_DEVMEMIO) != 0) { + KASSERT(curpcb->pcb_onfault != NULL, + ("/dev/mem without pcb_onfault")); + frame->tf_rip = (long)curpcb->pcb_onfault; + return (0); + } trap_fatal(frame, eva); return (-1); } diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index cfc3ed70f6df..5aecb5b8823a 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -157,6 +157,8 @@ userret(struct thread *td, struct trapframe *frame) td->td_rw_rlocks)); KASSERT((td->td_pflags & TDP_NOFAULTING) == 0, ("userret: Returning with pagefaults disabled")); + KASSERT((td->td_pflags & TDP_DEVMEMIO) == 0, + ("userret: Returning with /dev/mem i/o leaked")); KASSERT(td->td_no_sleeping == 0, ("userret: Returning with sleep disabled")); KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, diff --git a/sys/sys/proc.h b/sys/sys/proc.h index bd2e10af60eb..a0fd11c780e1 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -428,6 +428,7 @@ do { \ #define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */ #define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */ #define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */ +#define TDP_DEVMEMIO 0x20000000 /* Accessing memory for /dev/mem */ /* * Reasons that the current thread can not be run yet. diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 4a6495f58ad4..ba7692c2af51 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -269,6 +269,10 @@ RetryFault:; map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { + if ((curthread->td_pflags & TDP_DEVMEMIO) != 0) { + vm_map_unlock_read(fs.map); + return (KERN_FAILURE); + } panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); }