posix_fallocate: push vnop implementation into the fileop layer

This opens the door for other descriptor types to implement
posix_fallocate(2) as needed.

Reviewed by:	kib, bcr (manpages)
Differential Revision:	https://reviews.freebsd.org/D23042
This commit is contained in:
Kyle Evans 2020-01-08 19:05:32 +00:00
parent f57d4d4641
commit 2856d85ecb
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=356510
5 changed files with 115 additions and 95 deletions

View File

@ -28,7 +28,7 @@
.\" @(#)open.2 8.2 (Berkeley) 11/16/93
.\" $FreeBSD$
.\"
.Dd November 4, 2017
.Dd January 5, 2020
.Dt POSIX_FALLOCATE 2
.Os
.Sh NAME
@ -115,7 +115,8 @@ An I/O error occurred while reading from or writing to a file system.
.It Bq Er ENODEV
The
.Fa fd
argument does not refer to a regular file.
argument does not refer to a file that supports
.Nm .
.It Bq Er ENOSPC
There is insufficient free space remaining on the file system storage
media.

View File

@ -818,6 +818,47 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
return (error);
}
int
sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
{
int error;
error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len);
return (kern_posix_error(td, error));
}
int
kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
{
struct file *fp;
int error;
AUDIT_ARG_FD(fd);
if (offset < 0 || len <= 0)
return (EINVAL);
/* Check for wrap. */
if (offset > OFF_MAX - len)
return (EFBIG);
AUDIT_ARG_FD(fd);
error = fget(td, fd, &cap_pwrite_rights, &fp);
if (error != 0)
return (error);
AUDIT_ARG_FILE(td->td_proc, fp);
if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
error = ESPIPE;
goto out;
}
if ((fp->f_flag & FWRITE) == 0) {
error = EBADF;
goto out;
}
error = fo_fallocate(fp, offset, len, td);
out:
fdrop(fp, td);
return (error);
}
int
poll_no_poll(int events)
{

View File

@ -4565,99 +4565,6 @@ kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
return (error);
}
int
kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
{
struct file *fp;
struct mount *mp;
struct vnode *vp;
off_t olen, ooffset;
int error;
#ifdef AUDIT
int audited_vnode1 = 0;
#endif
AUDIT_ARG_FD(fd);
if (offset < 0 || len <= 0)
return (EINVAL);
/* Check for wrap. */
if (offset > OFF_MAX - len)
return (EFBIG);
AUDIT_ARG_FD(fd);
error = fget(td, fd, &cap_pwrite_rights, &fp);
if (error != 0)
return (error);
AUDIT_ARG_FILE(td->td_proc, fp);
if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
error = ESPIPE;
goto out;
}
if ((fp->f_flag & FWRITE) == 0) {
error = EBADF;
goto out;
}
if (fp->f_type != DTYPE_VNODE) {
error = ENODEV;
goto out;
}
vp = fp->f_vnode;
if (vp->v_type != VREG) {
error = ENODEV;
goto out;
}
/* Allocating blocks may take a long time, so iterate. */
for (;;) {
olen = len;
ooffset = offset;
bwillwrite();
mp = NULL;
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error != 0)
break;
error = vn_lock(vp, LK_EXCLUSIVE);
if (error != 0) {
vn_finished_write(mp);
break;
}
#ifdef AUDIT
if (!audited_vnode1) {
AUDIT_ARG_VNODE1(vp);
audited_vnode1 = 1;
}
#endif
#ifdef MAC
error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
if (error == 0)
#endif
error = VOP_ALLOCATE(vp, &offset, &len);
VOP_UNLOCK(vp);
vn_finished_write(mp);
if (olen + ooffset != offset + len) {
panic("offset + len changed from %jx/%jx to %jx/%jx",
ooffset, olen, offset, len);
}
if (error != 0 || len == 0)
break;
KASSERT(olen > len, ("Iteration did not make progress?"));
maybe_yield();
}
out:
fdrop(fp, td);
return (error);
}
int
sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
{
int error;
error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len);
return (kern_posix_error(td, error));
}
/*
* Unlike madvise(2), we do not make a best effort to remember every
* possible caching hint. Instead, we remember the last setting with

View File

@ -103,6 +103,7 @@ static fo_kqfilter_t vn_kqfilter;
static fo_stat_t vn_statfile;
static fo_close_t vn_closefile;
static fo_mmap_t vn_mmap;
static fo_fallocate_t vn_fallocate;
struct fileops vnops = {
.fo_read = vn_io_fault,
@ -119,6 +120,7 @@ struct fileops vnops = {
.fo_seek = vn_seek,
.fo_fill_kinfo = vn_fill_kinfo,
.fo_mmap = vn_mmap,
.fo_fallocate = vn_fallocate,
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
};
@ -3150,3 +3152,60 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
free(dat, M_TEMP);
return (error);
}
static int
vn_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
{
struct mount *mp;
struct vnode *vp;
off_t olen, ooffset;
int error;
#ifdef AUDIT
int audited_vnode1 = 0;
#endif
vp = fp->f_vnode;
if (vp->v_type != VREG)
return (ENODEV);
/* Allocating blocks may take a long time, so iterate. */
for (;;) {
olen = len;
ooffset = offset;
bwillwrite();
mp = NULL;
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error != 0)
break;
error = vn_lock(vp, LK_EXCLUSIVE);
if (error != 0) {
vn_finished_write(mp);
break;
}
#ifdef AUDIT
if (!audited_vnode1) {
AUDIT_ARG_VNODE1(vp);
audited_vnode1 = 1;
}
#endif
#ifdef MAC
error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
if (error == 0)
#endif
error = VOP_ALLOCATE(vp, &offset, &len);
VOP_UNLOCK(vp);
vn_finished_write(mp);
if (olen + ooffset != offset + len) {
panic("offset + len changed from %jx/%jx to %jx/%jx",
ooffset, olen, offset, len);
}
if (error != 0 || len == 0)
break;
KASSERT(olen > len, ("Iteration did not make progress?"));
maybe_yield();
}
return (error);
}

View File

@ -125,6 +125,8 @@ typedef int fo_mmap_t(struct file *fp, vm_map_t map, vm_offset_t *addr,
typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job);
typedef int fo_add_seals_t(struct file *fp, int flags);
typedef int fo_get_seals_t(struct file *fp, int *flags);
typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len,
struct thread *td);
typedef int fo_flags_t;
struct fileops {
@ -145,6 +147,7 @@ struct fileops {
fo_aio_queue_t *fo_aio_queue;
fo_add_seals_t *fo_add_seals;
fo_get_seals_t *fo_get_seals;
fo_fallocate_t *fo_fallocate;
fo_flags_t fo_flags; /* DFLAG_* below */
};
@ -446,6 +449,15 @@ fo_get_seals(struct file *fp, int *seals)
return ((*fp->f_ops->fo_get_seals)(fp, seals));
}
static __inline int
fo_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
{
if (fp->f_ops->fo_fallocate == NULL)
return (ENODEV);
return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td));
}
#endif /* _KERNEL */
#endif /* !SYS_FILE_H */