Consistently handle negative or wrapping offsets in the mmap(2) syscalls.

For regular files and posix shared memory, POSIX requires that [offset, offset + size) range is legitimate. At the maping time, check that offset is not negative. Allowing negative offsets might expose the data that filesystem put into vm_object for internal use, esp. due to OFF_TO_IDX() signess treatment. Fault handler verifies that the mapped range is valid, assuming that mmap(2) checked that arithmetic gives no undefined results. For device mappings, leave the semantic of negative offsets to the driver. Correct object page index calculation to not erronously propagate sign. In either case, disallow overflow of offset + size. Update mmap(2) man page to explain the requirement of the range validity, and behaviour when the range becomes invalid after mapping. Reported and tested by: royger (previous version) Reviewed by: alc Sponsored by: The FreeBSD Foundation MFC after: 2 weeks
svn path=/head/; revision=313690
2017-02-12 21:05:44 +00:00 · 2017-02-12 21:05:44 +00:00 · 987ff18184 · 2020-12-20 02:59:44 +00:00
commit 987ff18184
parent f2277b64ec
6 changed files with 90 additions and 14 deletions
--- a/lib/libc/sys/mmap.2
+++ b/lib/libc/sys/mmap.2
@ -28,7 +28,7 @@
 .\"	@(#)mmap.2	8.4 (Berkeley) 5/11/95
 .\" $FreeBSD$
 .\"
-.Dd November 25, 2016
+.Dd February 4, 2017
 .Dt MMAP 2
 .Os
 .Sh NAME
@ -53,11 +53,37 @@ starting at byte offset
 .Fa offset .
 If
 .Fa len
-is not a multiple of the pagesize, the mapped region may extend past the
+is not a multiple of the page size, the mapped region may extend past the
 specified range.
 Any such extension beyond the end of the mapped object will be zero-filled.
 .Pp
 If
+.Fa fd
+references a regular file or a shared memory object, the range of
+bytes starting at
+.Fa offset
+and continuing for
+.Fa len
+bytes must be legitimate for the possible (not necessarily
+current) offsets in the object.
+In particular, the
+.Fa offset
+value cannot be negative.
+If the object is truncated and the process later accesses a pages that
+is wholly within the truncated region, the access is aborted and a
+.Dv SIGBUS
+signal is delivered to the process.
+.Pp
+If
+.Fa fd
+references a device file, the interpretation of the
+.Fa offset
+value is device specific and defined by the device driver.
+The virtual memory subsystem does not impose any restrictitions on the
+.Fa offset
+value in this case, passing it unchanged to the driver.
+.Pp
+If
 .Fa addr
 is non-zero, it is used as a hint to the system.
 (As a convenience to the system, the actual address of the region may differ
@ -157,7 +183,7 @@ If
 .Dv MAP_FIXED
 is specified,
 .Fa addr
-must be a multiple of the pagesize.
+must be a multiple of the page size.
 If
 .Dv MAP_EXCL
 is not specified, a successful
@ -361,6 +387,12 @@ The
 argument
 is not a valid open file descriptor.
 .It Bq Er EINVAL
+An invalid (negative) value was passed in the
+.Fa offset
+argument, when
+.Fa fd
+referenced a regular file or shared memory.
+.It Bq Er EINVAL
 An invalid value was passed in the
 .Fa prot
 argument.
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@ -891,20 +891,20 @@ shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize,
 		return (EACCES);
 	maxprot &= cap_maxprot;

+	/* See comment in vn_mmap(). */
+	if (
+#ifdef _LP64
+	    objsize > OFF_MAX ||
+#endif
+	    foff < 0 || foff > OFF_MAX - objsize)
+		return (EINVAL);
+
 #ifdef MAC
 	error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags);
 	if (error != 0)
 		return (error);
 #endif
 	
-	/*
-	 * XXXRW: This validation is probably insufficient, and subject to
-	 * sign errors.  It should be fixed.
-	 */
-	if (foff >= shmfd->shm_size ||
-	    foff + objsize > round_page(shmfd->shm_size))
-		return (EINVAL);
-
 	mtx_lock(&shm_timestamp_lock);
 	vfs_timestamp(&shmfd->shm_atime);
 	mtx_unlock(&shm_timestamp_lock);
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@ -2455,6 +2455,24 @@ vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
 	}
 	maxprot &= cap_maxprot;

+	/*
+	 * For regular files and shared memory, POSIX requires that
+	 * the value of foff be a legitimate offset within the data
+	 * object.  In particular, negative offsets are invalid.
+	 * Blocking negative offsets and overflows here avoids
+	 * possible wraparound or user-level access into reserved
+	 * ranges of the data object later.  In contrast, POSIX does
+	 * not dictate how offsets are used by device drivers, so in
+	 * the case of a device mapping a negative offset is passed
+	 * on.
+	 */
+	if (
+#ifdef _LP64
+	    size > OFF_MAX ||
+#endif
+	    foff < 0 || foff > OFF_MAX - size)
+		return (EINVAL);
+
 	writecounted = FALSE;
 	error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp,
 	    &foff, &object, &writecounted);
--- a/sys/vm/device_pager.c
+++ b/sys/vm/device_pager.c
@ -139,8 +139,18 @@ cdev_pager_allocate(void *handle, enum obj_type tp, struct cdev_pager_ops *ops,
 	if (foff & PAGE_MASK)
 		return (NULL);

+	/*
+	 * Treat the mmap(2) file offset as an unsigned value for a
+	 * device mapping.  This, in effect, allows a user to pass all
+	 * possible off_t values as the mapping cookie to the driver.  At
+	 * this point, we know that both foff and size are a multiple
+	 * of the page size.  Do a check to avoid wrap.
+	 */
 	size = round_page(size);
-	pindex = OFF_TO_IDX(foff + size);
+	pindex = UOFF_TO_IDX(foff) + UOFF_TO_IDX(size);
+	if (pindex > OBJ_MAX_SIZE || pindex < UOFF_TO_IDX(foff) ||
+	    pindex < UOFF_TO_IDX(size))
+		return (NULL);

 	if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0)
 		return (NULL);
--- a/sys/vm/sg_pager.c
+++ b/sys/vm/sg_pager.c
@ -96,8 +96,9 @@ sg_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
 	 * to map beyond that.
 	 */
 	size = round_page(size);
-	pindex = OFF_TO_IDX(foff + size);
-	if (pindex > npages)
+	pindex = UOFF_TO_IDX(foff) + UOFF_TO_IDX(size);
+	if (pindex > npages || pindex < UOFF_TO_IDX(foff) ||
+	    pindex < UOFF_TO_IDX(size))
 		return (NULL);

 	/*
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@ -183,8 +183,23 @@ struct vm_object {
 #define	OBJ_DISCONNECTWNT 0x4000	/* disconnect from vnode wanted */
 #define	OBJ_TMPFS	0x8000		/* has tmpfs vnode allocated */

+/*
+ * Helpers to perform conversion between vm_object page indexes and offsets.
+ * IDX_TO_OFF() converts an index into an offset.
+ * OFF_TO_IDX() converts an offset into an index.  Since offsets are signed
+ *   by default, the sign propagation in OFF_TO_IDX(), when applied to
+ *   negative offsets, is intentional and returns a vm_object page index
+ *   that cannot be created by a userspace mapping.
+ * UOFF_TO_IDX() treats the offset as an unsigned value and converts it
+ *   into an index accordingly.  Use it only when the full range of offset
+ *   values are allowed.  Currently, this only applies to device mappings.
+ * OBJ_MAX_SIZE specifies the maximum page index corresponding to the
+ *   maximum unsigned offset.
+ */
 #define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
 #define	OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))
+#define	UOFF_TO_IDX(off) (((vm_pindex_t)(off)) >> PAGE_SHIFT)
+#define	OBJ_MAX_SIZE	(UOFF_TO_IDX(UINT64_MAX) + 1)

 #ifdef	_KERNEL