From 1eaa36523cb921e90d61b20531ed525aba0cfe7e Mon Sep 17 00:00:00 2001 From: Ka Ho Ng Date: Tue, 24 Aug 2021 17:04:02 +0800 Subject: [PATCH] fspacectl(2): Clarifies the return values rmacklem@ spotted two things in the system call: - Upon returning from a successful operation, vop_stddeallocate can update rmsr.r_offset to a value greater than file size. This behavior, although being harmless, can be confusing. - The EINVAL return value for rqsr.r_offset + rqsr.r_len > OFF_MAX is undocumented. This commit has the following changes: - vop_stddeallocate and shm_deallocate to bound the the affected area further by the file size. - The EINVAL case for rqsr.r_offset + rqsr.r_len > OFF_MAX is documented. - The fspacectl(2), vn_deallocate(9) and VOP_DEALLOCATE(9)'s return len is explicitly documented the be the value 0, and the return offset is restricted to be the smallest of off + len and current file size suggested by kib@. This semantic allows callers to interact better with potential file size growth after the call. Sponsored by: The FreeBSD Foundation Reviewed by: imp, kib Differential Revision: https://reviews.freebsd.org/D31604 --- lib/libc/sys/fspacectl.2 | 27 ++++++++++++++++++++++----- share/man/man9/VOP_DEALLOCATE.9 | 8 ++++++++ share/man/man9/vn_deallocate.9 | 8 ++++++++ sys/kern/uipc_shm.c | 11 +++++++++-- sys/kern/vfs_default.c | 8 ++++++-- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/lib/libc/sys/fspacectl.2 b/lib/libc/sys/fspacectl.2 index 2f581d1c1fb8..0e369785b883 100644 --- a/lib/libc/sys/fspacectl.2 +++ b/lib/libc/sys/fspacectl.2 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 4, 2021 +.Dd August 18, 2021 .Dt FSPACECTL 2 .Os .Sh NAME @@ -67,6 +67,17 @@ argument is non-NULL, the .Fa spacectl_range structure it points to is updated to contain the unprocessed operation range after the system call returns. +.Pp +For a successful completion without an unprocessed part in the requested +operation range, +.Fa "rmsr->r_len" +is updated to be the value 0, and +.Fa "rmsr->r_offset" +is updated to be the smallest of +.Fa "rqsr->r_offset" + +.Fa "rqsr->r_len" ; +and the end-of-file offset. +The file descriptor's file offset is not used or modified by the system call. Both .Fa rqsr and @@ -92,9 +103,9 @@ Zero a region in the file specified by the .Fa rqsr argument. The -.Va "rqsr->r_offset" +.Fa "rqsr->r_offset" has to be a value greater than or equal to 0, and the -.Va "rqsr->r_len" +.Fa "rqsr->r_len" has to be a value greater than 0. .Pp If the file system supports hole-punching, @@ -132,11 +143,17 @@ If the argument is .Dv SPACECTL_DEALLOC , either the -.Fa "range->r_offset" +.Fa "rqsr->r_offset" argument was less than zero, or the -.Fa "range->r_len" +.Fa "rqsr->r_len" argument was less than or equal to zero. .It Bq Er EINVAL +The value of +.Fa "rqsr->r_offset" + +.Fa "rqsr->r_len" +is greater than +.Dv OFF_MAX . +.It Bq Er EINVAL An invalid or unsupported flag is included in .Fa flags . .It Bq Er EINVAL diff --git a/share/man/man9/VOP_DEALLOCATE.9 b/share/man/man9/VOP_DEALLOCATE.9 index dbfe048f2235..2ec915c6fef3 100644 --- a/share/man/man9/VOP_DEALLOCATE.9 +++ b/share/man/man9/VOP_DEALLOCATE.9 @@ -74,6 +74,14 @@ and are updated to reflect the portion of the range that still needs to be zeroed/deallocated on return. Partial result is considered a successful operation. +For a successful completion without an unprocessed portion of the range, +.Fa *len +is updated to be the value 0, and +.Fa *offset +is updated to be the smallest of +.Fa *offset + +.Fa *len +passed to the call and the end-of-file offset. .Sh LOCKS The vnode should be locked on entry and will still be locked on exit. .Sh RETURN VALUES diff --git a/share/man/man9/vn_deallocate.9 b/share/man/man9/vn_deallocate.9 index 29edcd57ff5d..08f4e92ec597 100644 --- a/share/man/man9/vn_deallocate.9 +++ b/share/man/man9/vn_deallocate.9 @@ -95,6 +95,14 @@ Attempt to bypass buffer cache. and .Fa *length are updated to reflect the unprocessed operation range of the call. +For a successful completion, +.Fa *length +is updated to be the value 0, and +.Fa *offset +is updated to be the smallest of +.Fa *offset + +.Fa *length +passed to the call and the end-of-file offset. .Sh RETURN VALUES Upon successful completion, the value 0 is returned; otherwise the appropriate error is returned. diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 16d1e22a898b..60815ef24c26 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -1905,6 +1905,8 @@ shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags) off = *offset; len = *length; KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows")); + if (off + len > shmfd->shm_size) + len = shmfd->shm_size - off; object = shmfd->shm_object; startofs = off & PAGE_MASK; endofs = (off + len) & PAGE_MASK; @@ -1913,6 +1915,13 @@ shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags) pi = OFF_TO_IDX(off + PAGE_MASK); error = 0; + /* Handle the case when offset is beyond shm size */ + if ((off_t)len < 0) { + *offset = shmfd->shm_size; + *length = 0; + return (0); + } + VM_OBJECT_WLOCK(object); if (startofs != 0) { @@ -1974,8 +1983,6 @@ shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, break; } error = shm_deallocate(shmfd, &off, &len, flags); - if (error != 0) - break; *offset = off; *length = len; break; diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index d9328cd39b00..d5df9cd8bf7b 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1138,14 +1138,13 @@ vop_stddeallocate(struct vop_deallocate_args *ap) vp = ap->a_vp; offset = *ap->a_offset; - len = *ap->a_len; cred = ap->a_cred; error = VOP_GETATTR(vp, &va, cred); if (error) return (error); - len = omin(OFF_MAX - offset, *ap->a_len); + len = omin((off_t)va.va_size - offset, *ap->a_len); while (len > 0) { noff = offset; error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred); @@ -1185,6 +1184,11 @@ vop_stddeallocate(struct vop_deallocate_args *ap) if (should_yield()) break; } + /* Handle the case when offset is beyond EOF */ + if (len < 0) { + offset += len; + len = 0; + } out: *ap->a_offset = offset; *ap->a_len = len;