diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c index 57a6774464ed..85d7f2a97196 100644 --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -390,18 +391,6 @@ kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, return kobject_add_complete(kobj, parent); } -static void -linux_file_dtor(void *cdp) -{ - struct linux_file *filp; - - linux_set_current(curthread); - filp = cdp; - filp->f_op->release(filp->f_vnode, filp); - vdrop(filp->f_vnode); - kfree(filp); -} - static void linux_kq_lock(void *arg) { @@ -438,7 +427,7 @@ linux_kq_lock_unowned(void *arg) } static void -linux_dev_kqfilter_poll(struct linux_file *, int); +linux_file_kqfilter_poll(struct linux_file *, int); struct linux_file * linux_file_alloc(void) @@ -678,24 +667,31 @@ static struct cdev_pager_ops linux_cdev_pager_ops[2] = { }, }; +#define OPW(fp,td,code) ({ \ + struct file *__fpop; \ + int __retval; \ + \ + __fpop = (td)->td_fpop; \ + (td)->td_fpop = (fp); \ + __retval = (code); \ + (td)->td_fpop = __fpop; \ + __retval; \ +}) + static int -linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *file) { struct linux_cdev *ldev; struct linux_file *filp; - struct file *file; int error; - file = td->td_fpop; ldev = dev->si_drv1; - if (ldev == NULL) - return (ENODEV); filp = linux_file_alloc(); filp->f_dentry = &filp->f_dentry_store; filp->f_op = ldev->ops; + filp->f_mode = file->f_flag; filp->f_flags = file->f_flag; - vhold(file->f_vnode); filp->f_vnode = file->f_vnode; filp->_file = file; @@ -704,35 +700,17 @@ linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) if (filp->f_op->open) { error = -filp->f_op->open(file->f_vnode, filp); if (error) { - vdrop(filp->f_vnode); kfree(filp); - goto done; + return (error); } } - error = devfs_set_cdevpriv(filp, linux_file_dtor); - if (error) { - filp->f_op->release(file->f_vnode, filp); - vdrop(filp->f_vnode); - kfree(filp); - } -done: - return (error); -} -static int -linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) -{ - struct linux_file *filp; - struct file *file; - int error; + /* hold on to the vnode - used for fstat() */ + vhold(filp->f_vnode); - file = td->td_fpop; - if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) - return (error); - filp->f_flags = file->f_flag; - devfs_clear_cdevpriv(); - - return (0); + /* release the file from devfs */ + finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); + return (ENXIO); } #define LINUX_IOCTL_MIN_PTR 0x10000UL @@ -850,24 +828,12 @@ linux_access_ok(int rw, const void *uaddr, size_t len) } static int -linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, - struct thread *td) +linux_file_ioctl_sub(struct file *fp, struct linux_file *filp, + u_long cmd, caddr_t data, struct thread *td) { - struct linux_file *filp; - struct file *file; unsigned size; int error; - file = td->td_fpop; - if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) - return (error); - filp->f_flags = file->f_flag; - - /* the LinuxKPI supports blocking and non-blocking I/O */ - if (cmd == FIONBIO || cmd == FIOASYNC) - return (0); - - linux_set_current(td); size = IOCPARM_LEN(cmd); /* refer to logic in sys_ioctl() */ if (size > 0) { @@ -888,17 +854,17 @@ linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, if (td->td_proc->p_elf_machine == EM_386) { /* try the compat IOCTL handler first */ if (filp->f_op->compat_ioctl != NULL) - error = -filp->f_op->compat_ioctl(filp, cmd, (u_long)data); + error = -OPW(fp, td, filp->f_op->compat_ioctl(filp, cmd, (u_long)data)); else error = ENOTTY; /* fallback to the regular IOCTL handler, if any */ if (error == ENOTTY && filp->f_op->unlocked_ioctl != NULL) - error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); + error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); } else #endif if (filp->f_op->unlocked_ioctl != NULL) - error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); + error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data)); else error = ENOTTY; if (size > 0) { @@ -908,118 +874,15 @@ linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, if (error == EWOULDBLOCK) { /* update kqfilter status, if any */ - linux_dev_kqfilter_poll(filp, + linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); } else if (error == ERESTARTSYS) error = ERESTART; return (error); } -static int -linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) -{ - struct linux_file *filp; - struct thread *td; - struct file *file; - ssize_t bytes; - int error; - - td = curthread; - file = td->td_fpop; - if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) - return (error); - filp->f_flags = file->f_flag; - /* XXX no support for I/O vectors currently */ - if (uio->uio_iovcnt != 1) - return (EOPNOTSUPP); - linux_set_current(td); - if (filp->f_op->read) { - bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, - uio->uio_iov->iov_len, &uio->uio_offset); - if (bytes >= 0) { - uio->uio_iov->iov_base = - ((uint8_t *)uio->uio_iov->iov_base) + bytes; - uio->uio_iov->iov_len -= bytes; - uio->uio_resid -= bytes; - } else { - error = -bytes; - if (error == ERESTARTSYS) - error = ERESTART; - } - } else - error = ENXIO; - - /* update kqfilter status, if any */ - linux_dev_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ); - - return (error); -} - -static int -linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) -{ - struct linux_file *filp; - struct thread *td; - struct file *file; - ssize_t bytes; - int error; - - td = curthread; - file = td->td_fpop; - if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) - return (error); - filp->f_flags = file->f_flag; - /* XXX no support for I/O vectors currently */ - if (uio->uio_iovcnt != 1) - return (EOPNOTSUPP); - linux_set_current(td); - if (filp->f_op->write) { - bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, - uio->uio_iov->iov_len, &uio->uio_offset); - if (bytes >= 0) { - uio->uio_iov->iov_base = - ((uint8_t *)uio->uio_iov->iov_base) + bytes; - uio->uio_iov->iov_len -= bytes; - uio->uio_resid -= bytes; - } else { - error = -bytes; - if (error == ERESTARTSYS) - error = ERESTART; - } - } else - error = ENXIO; - - /* update kqfilter status, if any */ - linux_dev_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE); - - return (error); -} - #define LINUX_POLL_TABLE_NORMAL ((poll_table *)1) -static int -linux_dev_poll(struct cdev *dev, int events, struct thread *td) -{ - struct linux_file *filp; - struct file *file; - int revents; - - if (devfs_get_cdevpriv((void **)&filp) != 0) - goto error; - - file = td->td_fpop; - filp->f_flags = file->f_flag; - linux_set_current(td); - if (filp->f_op->poll != NULL) - revents = filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL) & events; - else - revents = 0; - - return (revents); -error: - return (events & (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); -} - /* * This function atomically updates the poll wakeup state and returns * the previous state at the time of update. @@ -1127,7 +990,7 @@ linux_poll_wakeup(struct linux_file *filp) } static void -linux_dev_kqfilter_detach(struct knote *kn) +linux_file_kqfilter_detach(struct knote *kn) { struct linux_file *filp = kn->kn_hook; @@ -1137,7 +1000,7 @@ linux_dev_kqfilter_detach(struct knote *kn) } static int -linux_dev_kqfilter_read_event(struct knote *kn, long hint) +linux_file_kqfilter_read_event(struct knote *kn, long hint) { struct linux_file *filp = kn->kn_hook; @@ -1147,7 +1010,7 @@ linux_dev_kqfilter_read_event(struct knote *kn, long hint) } static int -linux_dev_kqfilter_write_event(struct knote *kn, long hint) +linux_file_kqfilter_write_event(struct knote *kn, long hint) { struct linux_file *filp = kn->kn_hook; @@ -1158,24 +1021,26 @@ linux_dev_kqfilter_write_event(struct knote *kn, long hint) static struct filterops linux_dev_kqfiltops_read = { .f_isfd = 1, - .f_detach = linux_dev_kqfilter_detach, - .f_event = linux_dev_kqfilter_read_event, + .f_detach = linux_file_kqfilter_detach, + .f_event = linux_file_kqfilter_read_event, }; static struct filterops linux_dev_kqfiltops_write = { .f_isfd = 1, - .f_detach = linux_dev_kqfilter_detach, - .f_event = linux_dev_kqfilter_write_event, + .f_detach = linux_file_kqfilter_detach, + .f_event = linux_file_kqfilter_write_event, }; static void -linux_dev_kqfilter_poll(struct linux_file *filp, int kqflags) +linux_file_kqfilter_poll(struct linux_file *filp, int kqflags) { int temp; if (filp->f_kqflags & kqflags) { + struct thread *td = curthread; + /* get the latest polling state */ - temp = filp->f_op->poll(filp, NULL); + temp = OPW(filp->_file, td, filp->f_op->poll(filp, NULL)); spin_lock(&filp->f_kqlock); /* clear kqflags */ @@ -1196,17 +1061,14 @@ linux_dev_kqfilter_poll(struct linux_file *filp, int kqflags) } static int -linux_dev_kqfilter(struct cdev *dev, struct knote *kn) +linux_file_kqfilter(struct file *file, struct knote *kn) { struct linux_file *filp; - struct file *file; struct thread *td; int error; td = curthread; - file = td->td_fpop; - if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) - return (error); + filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; if (filp->f_op->poll == NULL) return (EINVAL); @@ -1218,12 +1080,14 @@ linux_dev_kqfilter(struct cdev *dev, struct knote *kn) kn->kn_fop = &linux_dev_kqfiltops_read; kn->kn_hook = filp; knlist_add(&filp->f_selinfo.si_note, kn, 1); + error = 0; break; case EVFILT_WRITE: filp->f_kqflags |= LINUX_KQ_FLAG_HAS_WRITE; kn->kn_fop = &linux_dev_kqfiltops_write; kn->kn_hook = filp; knlist_add(&filp->f_selinfo.si_note, kn, 1); + error = 0; break; default: error = EINVAL; @@ -1235,32 +1099,28 @@ linux_dev_kqfilter(struct cdev *dev, struct knote *kn) linux_set_current(td); /* update kqfilter status, if any */ - linux_dev_kqfilter_poll(filp, + linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); } return (error); } static int -linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, - vm_size_t size, struct vm_object **object, int nprot) +linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset, + vm_size_t size, struct vm_object **object, int nprot, + struct thread *td) { struct vm_area_struct *vmap; struct mm_struct *mm; struct linux_file *filp; - struct thread *td; - struct file *file; vm_memattr_t attr; int error; - td = curthread; - file = td->td_fpop; - if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) - return (error); - filp->f_flags = file->f_flag; + filp = (struct linux_file *)fp->f_data; + filp->f_flags = fp->f_flag; if (filp->f_op->mmap == NULL) - return (ENODEV); + return (EOPNOTSUPP); linux_set_current(td); @@ -1288,7 +1148,7 @@ linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) { error = EINTR; } else { - error = -filp->f_op->mmap(filp, vmap); + error = -OPW(fp, td, filp->f_op->mmap(filp, vmap)); up_write(&vmap->vm_mm->mmap_sem); } @@ -1351,11 +1211,11 @@ linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, if (vm_no_fault) { *object = cdev_pager_allocate(vm_private_data, OBJT_DEVICE, &linux_cdev_pager_ops[1], size, nprot, *offset, - curthread->td_ucred); + td->td_ucred); } else { *object = cdev_pager_allocate(vm_private_data, OBJT_MGTDEVICE, &linux_cdev_pager_ops[0], size, nprot, *offset, - curthread->td_ucred); + td->td_ucred); } /* check if allocating the VM object failed */ @@ -1376,7 +1236,7 @@ linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, (vm_paddr_t)vmap->vm_pfn << PAGE_SHIFT, vmap->vm_len); *object = vm_pager_allocate(OBJT_SG, sg, vmap->vm_len, - nprot, 0, curthread->td_ucred); + nprot, 0, td->td_ucred); linux_cdev_handle_free(vmap); @@ -1397,15 +1257,7 @@ linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, struct cdevsw linuxcdevsw = { .d_version = D_VERSION, - .d_flags = D_TRACKCLOSE, - .d_open = linux_dev_open, - .d_close = linux_dev_close, - .d_read = linux_dev_read, - .d_write = linux_dev_write, - .d_ioctl = linux_dev_ioctl, - .d_mmap_single = linux_dev_mmap_single, - .d_poll = linux_dev_poll, - .d_kqfilter = linux_dev_kqfilter, + .d_fdopen = linux_dev_fdopen, .d_name = "lkpidev", }; @@ -1425,18 +1277,61 @@ linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, return (EOPNOTSUPP); linux_set_current(td); if (filp->f_op->read) { - bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, - uio->uio_iov->iov_len, &uio->uio_offset); + bytes = OPW(file, td, filp->f_op->read(filp, uio->uio_iov->iov_base, + uio->uio_iov->iov_len, &uio->uio_offset)); if (bytes >= 0) { uio->uio_iov->iov_base = ((uint8_t *)uio->uio_iov->iov_base) + bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; - } else + } else { error = -bytes; + if (error == ERESTARTSYS) + error = ERESTART; + } } else error = ENXIO; + /* update kqfilter status, if any */ + linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ); + + return (error); +} + +static int +linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct linux_file *filp; + ssize_t bytes; + int error; + + error = 0; + filp = (struct linux_file *)file->f_data; + filp->f_flags = file->f_flag; + /* XXX no support for I/O vectors currently */ + if (uio->uio_iovcnt != 1) + return (EOPNOTSUPP); + linux_set_current(td); + if (filp->f_op->write) { + bytes = OPW(file, td, filp->f_op->write(filp, uio->uio_iov->iov_base, + uio->uio_iov->iov_len, &uio->uio_offset)); + if (bytes >= 0) { + uio->uio_iov->iov_base = + ((uint8_t *)uio->uio_iov->iov_base) + bytes; + uio->uio_iov->iov_len -= bytes; + uio->uio_resid -= bytes; + } else { + error = -bytes; + if (error == ERESTARTSYS) + error = ERESTART; + } + } else + error = ENXIO; + + /* update kqfilter status, if any */ + linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE); + return (error); } @@ -1451,7 +1346,7 @@ linux_file_poll(struct file *file, int events, struct ucred *active_cred, filp->f_flags = file->f_flag; linux_set_current(td); if (filp->f_op->poll != NULL) - revents = filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL) & events; + revents = OPW(file, td, filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL)) & events; else revents = 0; @@ -1465,11 +1360,16 @@ linux_file_close(struct file *file, struct thread *td) int error; filp = (struct linux_file *)file->f_data; + + KASSERT(file_count(filp) == 0, ("File refcount(%d) is not zero", file_count(filp))); + filp->f_flags = file->f_flag; linux_set_current(td); linux_poll_wait_dequeue(filp); - error = -filp->f_op->release(NULL, filp); + error = -OPW(file, td, filp->f_op->release(filp->f_vnode, filp)); funsetown(&filp->f_sigio); + if (filp->f_vnode != NULL) + vdrop(filp->f_vnode); kfree(filp); return (error); @@ -1493,30 +1393,128 @@ linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, case FIOASYNC: if (filp->f_op->fasync == NULL) break; - error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); + error = -OPW(fp, td, filp->f_op->fasync(0, filp, fp->f_flag & FASYNC)); break; case FIOSETOWN: error = fsetown(*(int *)data, &filp->f_sigio); - if (error == 0) - error = filp->f_op->fasync(0, filp, - fp->f_flag & FASYNC); + if (error == 0) { + if (filp->f_op->fasync == NULL) + break; + error = -OPW(fp, td, filp->f_op->fasync(0, filp, + fp->f_flag & FASYNC)); + } break; case FIOGETOWN: *(int *)data = fgetown(&filp->f_sigio); break; default: - error = ENOTTY; + error = linux_file_ioctl_sub(fp, filp, cmd, data, td); break; } return (error); } +static int +linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot, + vm_prot_t *maxprotp, int *flagsp, struct file *fp, + vm_ooffset_t *foff, vm_object_t *objp) +{ + /* + * Character devices do not provide private mappings + * of any kind: + */ + if ((*maxprotp & VM_PROT_WRITE) == 0 && + (prot & VM_PROT_WRITE) != 0) + return (EACCES); + if ((*flagsp & (MAP_PRIVATE | MAP_COPY)) != 0) + return (EINVAL); + + return (linux_file_mmap_single(fp, foff, objsize, objp, (int)prot, td)); +} + +static int +linux_file_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, + vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, + struct thread *td) +{ + struct linux_file *filp; + struct mount *mp; + struct vnode *vp; + vm_object_t object; + vm_prot_t maxprot; + int error; + + filp = (struct linux_file *)fp->f_data; + + vp = filp->f_vnode; + if (vp == NULL) + return (EOPNOTSUPP); + + /* + * Ensure that file and memory protections are + * compatible. + */ + mp = vp->v_mount; + if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { + maxprot = VM_PROT_NONE; + if ((prot & VM_PROT_EXECUTE) != 0) + return (EACCES); + } else + maxprot = VM_PROT_EXECUTE; + if ((fp->f_flag & FREAD) != 0) + maxprot |= VM_PROT_READ; + else if ((prot & VM_PROT_READ) != 0) + return (EACCES); + + /* + * If we are sharing potential changes via MAP_SHARED and we + * are trying to get write permission although we opened it + * without asking for it, bail out. + * + * Note that most character devices always share mappings. + * + * Rely on linux_file_mmap_sub() to fail invalid MAP_PRIVATE + * requests rather than doing it here. + */ + if ((flags & MAP_SHARED) != 0) { + if ((fp->f_flag & FWRITE) != 0) + maxprot |= VM_PROT_WRITE; + else if ((prot & VM_PROT_WRITE) != 0) + return (EACCES); + } + maxprot &= cap_maxprot; + + error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp, &foff, + &object); + if (error != 0) + return (error); + + error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, + foff, FALSE, td); + if (error != 0) + vm_object_deallocate(object); + return (error); +} + static int linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) { + struct linux_file *filp; + struct vnode *vp; + int error; - return (EOPNOTSUPP); + filp = (struct linux_file *)fp->f_data; + if (filp->f_vnode == NULL) + return (EOPNOTSUPP); + + vp = filp->f_vnode; + + vn_lock(vp, LK_SHARED | LK_RETRY); + error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); + VOP_UNLOCK(vp, 0); + + return (error); } static int @@ -1544,14 +1542,15 @@ linux_iminor(struct inode *inode) struct fileops linuxfileops = { .fo_read = linux_file_read, - .fo_write = invfo_rdwr, + .fo_write = linux_file_write, .fo_truncate = invfo_truncate, - .fo_kqfilter = invfo_kqfilter, + .fo_kqfilter = linux_file_kqfilter, .fo_stat = linux_file_stat, .fo_fill_kinfo = linux_file_fill_kinfo, .fo_poll = linux_file_poll, .fo_close = linux_file_close, .fo_ioctl = linux_file_ioctl, + .fo_mmap = linux_file_mmap, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile,