Add the posix_fallocate(2) syscall. The default implementation in
vop_stdallocate() is filesystem agnostic and will run as slow as a read/write loop in userspace; however, it serves to correctly implement the functionality for filesystems that do not implement a VOP_ALLOCATE. Note that __FreeBSD_version was already bumped today to 900036 for any ports which would like to use this function. Also reserve space in the syscall table for posix_fadvise(2). Reviewed by: -arch (previous version)
This commit is contained in:
parent
fe51d6c1d1
commit
d91f88f7f3
@ -96,7 +96,7 @@ MAN+= abort2.2 accept.2 access.2 acct.2 adjtime.2 \
|
||||
mq_setattr.2 \
|
||||
msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \
|
||||
msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \
|
||||
pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \
|
||||
pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \
|
||||
pselect.2 ptrace.2 quotactl.2 \
|
||||
read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
|
||||
rtprio.2
|
||||
|
@ -364,6 +364,7 @@ FBSD_1.2 {
|
||||
cap_enter;
|
||||
cap_getmode;
|
||||
getloginclass;
|
||||
posix_fallocate;
|
||||
rctl_get_racct;
|
||||
rctl_get_rules;
|
||||
rctl_get_limits;
|
||||
|
146
lib/libc/sys/posix_fallocate.2
Normal file
146
lib/libc/sys/posix_fallocate.2
Normal file
@ -0,0 +1,146 @@
|
||||
.\" Copyright (c) 1980, 1991, 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 4. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)open.2 8.2 (Berkeley) 11/16/93
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd April 13, 2011
|
||||
.Dt POSIX_FALLOCATE 2
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm posix_fallocate
|
||||
.Nd pre-allocate storage for a range in a file
|
||||
.Sh LIBRARY
|
||||
.Lb libc
|
||||
.Sh SYNOPSIS
|
||||
.In fcntl.h
|
||||
.Ft int
|
||||
.Fn posix_fallocate "int fd" "off_t offset" "off_t len"
|
||||
.Sh DESCRIPTION
|
||||
Required storage for the range
|
||||
.Fa offset
|
||||
to
|
||||
.Fa offset +
|
||||
.Fa len
|
||||
in the file referenced by
|
||||
.Fa fd
|
||||
is guarateed to be allocated upon successful return.
|
||||
That is, if
|
||||
.Fn posix_fallocate
|
||||
returns successfully, subsequent writes to the specified file data
|
||||
will not fail due to lack of free space on the file system storage
|
||||
media.
|
||||
Any existing file data in the specified range is unmodified.
|
||||
If
|
||||
.Fa offset +
|
||||
.Fa len
|
||||
is beyond the current file size, then
|
||||
.Fn posix_fallocate
|
||||
will adjust the file size to
|
||||
.Fa offset +
|
||||
.Fa len .
|
||||
Otherwise, the file size will not be changed.
|
||||
.Pp
|
||||
Space allocated by
|
||||
.Fn posix_fallocate
|
||||
will be freed by a successful call to
|
||||
.Xr creat 2
|
||||
or
|
||||
.Xr open 2
|
||||
that truncates the size of the file.
|
||||
Space allocated via
|
||||
.Fn posix_fallocate
|
||||
may be freed by a successful call to
|
||||
.Xr ftruncate 2
|
||||
that reduces the file size to a size smaller than
|
||||
.Fa offset +
|
||||
.Fa len .
|
||||
.Pp
|
||||
.Sh RETURN VALUES
|
||||
If successful,
|
||||
.Fn posix_fallocate
|
||||
returns zero.
|
||||
It returns -1 on failure, and sets
|
||||
.Va errno
|
||||
to indicate the error.
|
||||
.Sh ERRORS
|
||||
Possible failure conditions:
|
||||
.Bl -tag -width Er
|
||||
.It Bq Er EBADF
|
||||
The
|
||||
.Fa fd
|
||||
argument is not a valid file descriptor.
|
||||
.It Bq Er EBADF
|
||||
The
|
||||
.Fa fd
|
||||
argument references a file that was opened without write permission.
|
||||
.It Bq Er EFBIG
|
||||
The value of
|
||||
.Fa offset +
|
||||
.Fa len
|
||||
is greater than the maximum file size.
|
||||
.It Bq Er EINTR
|
||||
A signal was caught during execution.
|
||||
.It Bq Er EINVAL
|
||||
The
|
||||
.Fa len
|
||||
argument was zero or the
|
||||
.Fa offset
|
||||
argument was less than zero.
|
||||
.It Bq Er EIO
|
||||
An I/O error occurred while reading from or writing to a file system.
|
||||
.It Bq Er ENODEV
|
||||
The
|
||||
.Fa fd
|
||||
argument does not refer to a regular file.
|
||||
.It Bq Er ENOSPC
|
||||
There is insufficient free space remaining on the file system storage
|
||||
media.
|
||||
.It Bq Er ESPIPE
|
||||
The
|
||||
.Fa fd
|
||||
argument is associated with a pipe or FIFO.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr creat 2 ,
|
||||
.Xr ftruncate 2 ,
|
||||
.Xr open 2 ,
|
||||
.Xr unlink 2
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Fn posix_fallocate
|
||||
system call conforms to
|
||||
.St -p1003.1-2004 .
|
||||
.Sh HISTORY
|
||||
The
|
||||
.Fn posix_fallocate
|
||||
function appeared in
|
||||
.Fx 9.0 .
|
||||
.Sh AUTHORS
|
||||
.Fn posix_fallocate
|
||||
and this manual page were initially written by
|
||||
.An Matthew Fleming Aq mdf@FreeBSD.org .
|
@ -2790,3 +2790,15 @@ freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap)
|
||||
bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
|
||||
return (copyout(&stat32, uap->stat, version));
|
||||
}
|
||||
|
||||
int
|
||||
freebsd32_posix_fallocate(struct thread *td,
|
||||
struct freebsd32_posix_fallocate_args *uap)
|
||||
{
|
||||
struct posix_fallocate_args ap;
|
||||
|
||||
ap.fd = uap->fd;
|
||||
ap.offset = (uap->offsetlo | ((off_t)uap->offsethi << 32));
|
||||
ap.len = (uap->lenlo | ((off_t)uap->lenhi << 32));
|
||||
return (posix_fallocate(td, &ap));
|
||||
}
|
||||
|
@ -986,3 +986,7 @@
|
||||
529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \
|
||||
size_t inbuflen, void *outbufp, \
|
||||
size_t outbuflen); }
|
||||
530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\
|
||||
uint32_t offsetlo, uint32_t offsethi,\
|
||||
uint32_t lenlo, uint32_t lenhi); }
|
||||
531 AUE_NULL UNIMPL posix_fadvise
|
||||
|
@ -944,5 +944,8 @@
|
||||
529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \
|
||||
size_t inbuflen, void *outbufp, \
|
||||
size_t outbuflen); }
|
||||
530 AUE_NULL STD { int posix_fallocate(int fd, \
|
||||
off_t offset, off_t len); }
|
||||
531 AUE_NULL UNIMPL posix_fadvise
|
||||
; Please copy any additions and changes to the following compatability tables:
|
||||
; sys/compat/freebsd32/syscalls.master
|
||||
|
@ -99,6 +99,7 @@ struct vop_vector default_vnodeops = {
|
||||
.vop_advlock = vop_stdadvlock,
|
||||
.vop_advlockasync = vop_stdadvlockasync,
|
||||
.vop_advlockpurge = vop_stdadvlockpurge,
|
||||
.vop_allocate = vop_stdallocate,
|
||||
.vop_bmap = vop_stdbmap,
|
||||
.vop_close = VOP_NULL,
|
||||
.vop_fsync = VOP_NULL,
|
||||
@ -855,6 +856,136 @@ out:
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vop_stdallocate(struct vop_allocate_args *ap)
|
||||
{
|
||||
#ifdef __notyet__
|
||||
struct statfs sfs;
|
||||
#endif
|
||||
struct iovec aiov;
|
||||
struct vattr vattr, *vap;
|
||||
struct uio auio;
|
||||
off_t len, cur, offset;
|
||||
uint8_t *buf;
|
||||
struct thread *td;
|
||||
struct vnode *vp;
|
||||
size_t iosize;
|
||||
int error, locked;
|
||||
|
||||
buf = NULL;
|
||||
error = 0;
|
||||
locked = 1;
|
||||
td = curthread;
|
||||
vap = &vattr;
|
||||
vp = ap->a_vp;
|
||||
len = ap->a_len;
|
||||
offset = ap->a_offset;
|
||||
|
||||
error = VOP_GETATTR(vp, vap, td->td_ucred);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
iosize = vap->va_blocksize;
|
||||
if (iosize == 0)
|
||||
iosize = BLKDEV_IOSIZE;
|
||||
if (iosize > MAXPHYS)
|
||||
iosize = MAXPHYS;
|
||||
buf = malloc(iosize, M_TEMP, M_WAITOK);
|
||||
|
||||
#ifdef __notyet__
|
||||
/*
|
||||
* Check if the filesystem sets f_maxfilesize; if not use
|
||||
* VOP_SETATTR to perform the check.
|
||||
*/
|
||||
error = VFS_STATFS(vp->v_mount, &sfs, td);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
if (sfs.f_maxfilesize) {
|
||||
if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
|
||||
offset + len > sfs.f_maxfilesize) {
|
||||
error = EFBIG;
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
if (offset + len > vap->va_size) {
|
||||
VATTR_NULL(vap);
|
||||
vap->va_size = offset + len;
|
||||
error = VOP_SETATTR(vp, vap, td->td_ucred);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (len > 0) {
|
||||
if (should_yield()) {
|
||||
VOP_UNLOCK(vp, 0);
|
||||
locked = 0;
|
||||
kern_yield(-1);
|
||||
error = vn_lock(vp, LK_EXCLUSIVE);
|
||||
if (error != 0)
|
||||
break;
|
||||
locked = 1;
|
||||
error = VOP_GETATTR(vp, vap, td->td_ucred);
|
||||
if (error != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read and write back anything below the nominal file
|
||||
* size. There's currently no way outside the filesystem
|
||||
* to know whether this area is sparse or not.
|
||||
*/
|
||||
cur = iosize;
|
||||
if ((offset % iosize) != 0)
|
||||
cur -= (offset % iosize);
|
||||
if (cur > len)
|
||||
cur = len;
|
||||
if (offset < vap->va_size) {
|
||||
aiov.iov_base = buf;
|
||||
aiov.iov_len = cur;
|
||||
auio.uio_iov = &aiov;
|
||||
auio.uio_iovcnt = 1;
|
||||
auio.uio_offset = offset;
|
||||
auio.uio_resid = cur;
|
||||
auio.uio_segflg = UIO_SYSSPACE;
|
||||
auio.uio_rw = UIO_READ;
|
||||
auio.uio_td = td;
|
||||
error = VOP_READ(vp, &auio, 0, td->td_ucred);
|
||||
if (error != 0)
|
||||
break;
|
||||
if (auio.uio_resid > 0) {
|
||||
bzero(buf + cur - auio.uio_resid,
|
||||
auio.uio_resid);
|
||||
}
|
||||
} else {
|
||||
bzero(buf, cur);
|
||||
}
|
||||
|
||||
aiov.iov_base = buf;
|
||||
aiov.iov_len = cur;
|
||||
auio.uio_iov = &aiov;
|
||||
auio.uio_iovcnt = 1;
|
||||
auio.uio_offset = offset;
|
||||
auio.uio_resid = cur;
|
||||
auio.uio_segflg = UIO_SYSSPACE;
|
||||
auio.uio_rw = UIO_WRITE;
|
||||
auio.uio_td = td;
|
||||
|
||||
error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
len -= cur;
|
||||
offset += cur;
|
||||
}
|
||||
|
||||
out:
|
||||
KASSERT(locked || error != 0, ("How'd I get unlocked with no error?"));
|
||||
if (locked && error != 0)
|
||||
VOP_UNLOCK(vp, 0);
|
||||
free(buf, M_TEMP);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* vfs default ops
|
||||
* used to fill the vfs function table to get reasonable default return values.
|
||||
|
@ -4671,3 +4671,83 @@ out:
|
||||
VFS_UNLOCK_GIANT(vfslocked);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
|
||||
{
|
||||
struct file *fp;
|
||||
struct mount *mp;
|
||||
struct vnode *vp;
|
||||
int error, vfslocked, vnlocked;
|
||||
|
||||
fp = NULL;
|
||||
mp = NULL;
|
||||
vfslocked = 0;
|
||||
vnlocked = 0;
|
||||
error = fget(td, fd, &fp);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
switch (fp->f_type) {
|
||||
case DTYPE_VNODE:
|
||||
break;
|
||||
case DTYPE_PIPE:
|
||||
case DTYPE_FIFO:
|
||||
error = ESPIPE;
|
||||
goto out;
|
||||
default:
|
||||
error = ENODEV;
|
||||
goto out;
|
||||
}
|
||||
if ((fp->f_flag & FWRITE) == 0) {
|
||||
error = EBADF;
|
||||
goto out;
|
||||
}
|
||||
vp = fp->f_vnode;
|
||||
if (vp->v_type != VREG) {
|
||||
error = ENODEV;
|
||||
goto out;
|
||||
}
|
||||
if (offset < 0 || len <= 0) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
/* Check for wrap. */
|
||||
if (offset > OFF_MAX - len) {
|
||||
error = EFBIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bwillwrite();
|
||||
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
|
||||
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
error = vn_lock(vp, LK_EXCLUSIVE);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
vnlocked = 1;
|
||||
#ifdef MAC
|
||||
error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
#endif
|
||||
error = VOP_ALLOCATE(vp, offset, len);
|
||||
if (error != 0)
|
||||
vnlocked = 0;
|
||||
out:
|
||||
if (vnlocked)
|
||||
VOP_UNLOCK(vp, 0);
|
||||
vn_finished_write(mp);
|
||||
VFS_UNLOCK_GIANT(vfslocked);
|
||||
if (fp != NULL)
|
||||
fdrop(fp, td);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
|
||||
{
|
||||
|
||||
return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
|
||||
}
|
||||
|
@ -608,6 +608,7 @@ vop_vptofh {
|
||||
IN struct fid *fhp;
|
||||
};
|
||||
|
||||
|
||||
%% vptocnp vp L L L
|
||||
%% vptocnp vpp - U -
|
||||
|
||||
@ -618,3 +619,12 @@ vop_vptocnp {
|
||||
INOUT char *buf;
|
||||
INOUT int *buflen;
|
||||
};
|
||||
|
||||
|
||||
%% allocate vp E E U
|
||||
|
||||
vop_allocate {
|
||||
IN struct vnode *vp;
|
||||
IN off_t offset;
|
||||
IN off_t len;
|
||||
};
|
||||
|
@ -278,7 +278,7 @@ struct oflock {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros.
|
||||
* XXX missing posix_fadvise() and POSIX_FADV_* macros.
|
||||
*/
|
||||
|
||||
#ifndef _KERNEL
|
||||
@ -289,6 +289,9 @@ int fcntl(int, int, ...);
|
||||
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809
|
||||
int openat(int, const char *, int, ...);
|
||||
#endif
|
||||
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
|
||||
int posix_fallocate(int, off_t, off_t);
|
||||
#endif
|
||||
#if __BSD_VISIBLE
|
||||
int flock(int, int);
|
||||
#endif
|
||||
|
@ -689,6 +689,7 @@ int vop_stdaccessx(struct vop_accessx_args *ap);
|
||||
int vop_stdadvlock(struct vop_advlock_args *ap);
|
||||
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
|
||||
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
|
||||
int vop_stdallocate(struct vop_allocate_args *ap);
|
||||
int vop_stdpathconf(struct vop_pathconf_args *);
|
||||
int vop_stdpoll(struct vop_poll_args *);
|
||||
int vop_stdvptocnp(struct vop_vptocnp_args *ap);
|
||||
|
Loading…
x
Reference in New Issue
Block a user