Implement proper Linux /dev/fd and /proc/self/fd behavior by adding

Linux specific things to the native fdescfs file system.

Unlike FreeBSD, the Linux fdescfs is a directory containing a symbolic
links to the actual files, which the process has open.
A readlink(2) call on this file returns a full path in case of regular file
or a string in a special format (type:[inode], anon_inode:<file-type>, etc..).
As well as in a FreeBSD, opening the file in the Linux fdescfs directory is
equivalent to duplicating the corresponding file descriptor.

Here we have mutually exclusive requirements:
- in case of readlink(2) call fdescfs lookup() method should return VLNK
vnode otherwise our kern_readlink() fail with EINVAL error;
- in the other calls fdescfs lookup() method should return non VLNK vnode.

For what new vnode v_flag VV_READLINK was added, which is set if fdescfs has beed
mounted with linrdlnk option an modified kern_readlinkat() to properly handle it.

For now For Linux ABI compatibility mount fdescfs volume with linrdlnk option:

    mount -t fdescfs -o linrdlnk null /compat/linux/dev/fd

Reviewed by:	kib@
MFC after:	1 week
Relnotes:	yes
This commit is contained in:
Dmitry Chagin 2017-08-01 03:40:19 +00:00
parent d2ffc7af30
commit 77d3337c9f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=321839
7 changed files with 85 additions and 7 deletions

View File

@ -24,7 +24,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd February 8, 2010 .Dd August 1, 2017
.Dt LINUX 4 .Dt LINUX 4
.Os .Os
.Sh NAME .Sh NAME
@ -127,9 +127,11 @@ regardless of whether the
module is statically linked into the kernel module is statically linked into the kernel
or loaded as a module. or loaded as a module.
.Sh FILES .Sh FILES
.Bl -tag -width /compat/linux/proc -compact .Bl -tag -width /compat/linux/dev/fd -compact
.It Pa /compat/linux .It Pa /compat/linux
minimal Linux run-time environment minimal Linux run-time environment
.It Pa /compat/linux/dev/fd
limited Linux file-descriptor file system
.It Pa /compat/linux/proc .It Pa /compat/linux/proc
limited Linux process file system limited Linux process file system
.It Pa /compat/linux/sys .It Pa /compat/linux/sys
@ -138,6 +140,7 @@ limited Linux system file system
.Sh SEE ALSO .Sh SEE ALSO
.Xr brandelf 1 , .Xr brandelf 1 ,
.Xr elf 5 , .Xr elf 5 ,
.Xr fdescfs 5 ,
.Xr linprocfs 5 , .Xr linprocfs 5 ,
.Xr linsysfs 5 .Xr linsysfs 5
.Sh HISTORY .Sh HISTORY

View File

@ -34,7 +34,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd September 18, 2010 .Dd August 1, 2017
.Dt FDESCFS 5 .Dt FDESCFS 5
.Os .Os
.Sh NAME .Sh NAME
@ -92,6 +92,14 @@ and
files are created by default when devfs alone is mounted. files are created by default when devfs alone is mounted.
.Nm .Nm
creates entries for all file descriptors opened by the process. creates entries for all file descriptors opened by the process.
.Pp
For
.Xr linux 4
ABI compatibility mount
.Nm
volume with
.Cm linrdlnk
option.
.Sh FILES .Sh FILES
.Bl -tag -width /dev/stderr -compact .Bl -tag -width /dev/stderr -compact
.It Pa /dev/fd/# .It Pa /dev/fd/#
@ -103,6 +111,12 @@ volume located on
.Pa /dev/fd : .Pa /dev/fd :
.Pp .Pp
.Dl "mount -t fdescfs null /dev/fd" .Dl "mount -t fdescfs null /dev/fd"
.Pp
For
.Xr linux 4
ABI compatibility:
.Pp
.Dl "mount -t fdescfs -o linrdlnk null /compat/linux/dev/fd"
.Sh SEE ALSO .Sh SEE ALSO
.Xr devfs 5 , .Xr devfs 5 ,
.Xr mount 8 .Xr mount 8

View File

@ -38,7 +38,9 @@
#define _FS_FDESC_H_ #define _FS_FDESC_H_
/* Private mount flags for fdescfs. */ /* Private mount flags for fdescfs. */
#define FMNT_UNMOUNTF 0x01 #define FMNT_UNMOUNTF 0x01
#define FMNT_LINRDLNKF 0x02
struct fdescmount { struct fdescmount {
struct vnode *f_root; /* Root node */ struct vnode *f_root; /* Root node */
int flags; int flags;

View File

@ -101,6 +101,8 @@ fdesc_mount(struct mount *mp)
*/ */
mp->mnt_data = fmp; mp->mnt_data = fmp;
fmp->flags = 0; fmp->flags = 0;
if (vfs_getopt(mp->mnt_optnew, "linrdlnk", NULL, NULL) == 0)
fmp->flags |= FMNT_LINRDLNKF;
error = fdesc_allocvp(Froot, -1, FD_ROOT, mp, &rvp); error = fdesc_allocvp(Froot, -1, FD_ROOT, mp, &rvp);
if (error) { if (error) {
free(fmp, M_FDESCMNT); free(fmp, M_FDESCMNT);

View File

@ -69,6 +69,7 @@ static vop_getattr_t fdesc_getattr;
static vop_lookup_t fdesc_lookup; static vop_lookup_t fdesc_lookup;
static vop_open_t fdesc_open; static vop_open_t fdesc_open;
static vop_readdir_t fdesc_readdir; static vop_readdir_t fdesc_readdir;
static vop_readlink_t fdesc_readlink;
static vop_reclaim_t fdesc_reclaim; static vop_reclaim_t fdesc_reclaim;
static vop_setattr_t fdesc_setattr; static vop_setattr_t fdesc_setattr;
@ -81,6 +82,7 @@ static struct vop_vector fdesc_vnodeops = {
.vop_open = fdesc_open, .vop_open = fdesc_open,
.vop_pathconf = vop_stdpathconf, .vop_pathconf = vop_stdpathconf,
.vop_readdir = fdesc_readdir, .vop_readdir = fdesc_readdir,
.vop_readlink = fdesc_readlink,
.vop_reclaim = fdesc_reclaim, .vop_reclaim = fdesc_reclaim,
.vop_setattr = fdesc_setattr, .vop_setattr = fdesc_setattr,
}; };
@ -195,6 +197,8 @@ fdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp,
fd->fd_type = ftype; fd->fd_type = ftype;
fd->fd_fd = fd_fd; fd->fd_fd = fd_fd;
fd->fd_ix = ix; fd->fd_ix = ix;
if (ftype == Fdesc && fmp->flags & FMNT_LINRDLNKF)
vp->v_vflag |= VV_READLINK;
error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL); error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL);
if (error != 0) { if (error != 0) {
*vpp = NULLVP; *vpp = NULLVP;
@ -420,7 +424,7 @@ fdesc_getattr(struct vop_getattr_args *ap)
break; break;
case Fdesc: case Fdesc:
vap->va_type = VCHR; vap->va_type = (vp->v_vflag & VV_READLINK) == 0 ? VCHR : VLNK;
vap->va_nlink = 1; vap->va_nlink = 1;
vap->va_size = 0; vap->va_size = 0;
vap->va_rdev = makedev(0, vap->va_fileid); vap->va_rdev = makedev(0, vap->va_fileid);
@ -490,6 +494,7 @@ fdesc_setattr(struct vop_setattr_args *ap)
static int static int
fdesc_readdir(struct vop_readdir_args *ap) fdesc_readdir(struct vop_readdir_args *ap)
{ {
struct fdescmount *fmp;
struct uio *uio = ap->a_uio; struct uio *uio = ap->a_uio;
struct filedesc *fdp; struct filedesc *fdp;
struct dirent d; struct dirent d;
@ -499,6 +504,7 @@ fdesc_readdir(struct vop_readdir_args *ap)
if (VTOFDESC(ap->a_vp)->fd_type != Froot) if (VTOFDESC(ap->a_vp)->fd_type != Froot)
panic("fdesc_readdir: not dir"); panic("fdesc_readdir: not dir");
fmp = VFSTOFDESC(ap->a_vp->v_mount);
if (ap->a_ncookies != NULL) if (ap->a_ncookies != NULL)
*ap->a_ncookies = 0; *ap->a_ncookies = 0;
@ -530,7 +536,8 @@ fdesc_readdir(struct vop_readdir_args *ap)
break; break;
dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); dp->d_namlen = sprintf(dp->d_name, "%d", fcnt);
dp->d_reclen = UIO_MX; dp->d_reclen = UIO_MX;
dp->d_type = DT_CHR; dp->d_type = (fmp->flags & FMNT_LINRDLNKF) == 0 ?
DT_CHR : DT_LNK;
dp->d_fileno = i + FD_DESC; dp->d_fileno = i + FD_DESC;
break; break;
} }
@ -567,3 +574,52 @@ fdesc_reclaim(struct vop_reclaim_args *ap)
vp->v_data = NULL; vp->v_data = NULL;
return (0); return (0);
} }
static int
fdesc_readlink(struct vop_readlink_args *va)
{
struct vnode *vp, *vn;
cap_rights_t rights;
struct thread *td;
struct uio *uio;
struct file *fp;
char *freepath, *fullpath;
size_t pathlen;
int lockflags, fd_fd;
int error;
freepath = NULL;
vn = va->a_vp;
if (VTOFDESC(vn)->fd_type != Fdesc)
panic("fdesc_readlink: not fdescfs link");
fd_fd = ((struct fdescnode *)vn->v_data)->fd_fd;
lockflags = VOP_ISLOCKED(vn);
VOP_UNLOCK(vn, 0);
td = curthread;
error = fget_cap(td, fd_fd, cap_rights_init(&rights), &fp, NULL);
if (error != 0)
goto out;
switch (fp->f_type) {
case DTYPE_VNODE:
vp = fp->f_vnode;
error = vn_fullpath(td, vp, &fullpath, &freepath);
break;
default:
fullpath = "anon_inode:[unknown]";
break;
}
if (error == 0) {
uio = va->a_uio;
pathlen = strlen(fullpath);
error = uiomove(fullpath, pathlen, uio);
}
if (freepath != NULL)
free(freepath, M_TEMP);
fdrop(fp, td);
out:
vn_lock(vn, lockflags | LK_RETRY);
return (error);
}

View File

@ -2484,7 +2484,7 @@ kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
return (error); return (error);
} }
#endif #endif
if (vp->v_type != VLNK) if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0)
error = EINVAL; error = EINVAL;
else { else {
aiov.iov_base = buf; aiov.iov_base = buf;

View File

@ -250,6 +250,7 @@ struct xvnode {
#define VV_DELETED 0x0400 /* should be removed */ #define VV_DELETED 0x0400 /* should be removed */
#define VV_MD 0x0800 /* vnode backs the md device */ #define VV_MD 0x0800 /* vnode backs the md device */
#define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */
#define VV_READLINK 0x2000 /* fdescfs linux vnode */
#define VMP_TMPMNTFREELIST 0x0001 /* Vnode is on mnt's tmp free list */ #define VMP_TMPMNTFREELIST 0x0001 /* Vnode is on mnt's tmp free list */