freebsd-skq/sys/ufs/lfs/lfs_vfsops.c
Garrett Wollman 57bf258e3d Fix all areas of the system (or at least all those in LINT) to avoid storing
socket addresses in mbufs.  (Socket buffers are the one exception.)  A number
of kernel APIs needed to get fixed in order to make this happen.  Also,
fix three protocol families which kept PCBs in mbufs to not malloc them
instead.  Delete some old compatibility cruft while we're at it, and add
some new routines in the in_cksum family.
1997-08-16 19:16:27 +00:00

732 lines
19 KiB
C

/*
* Copyright (c) 1989, 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)lfs_vfsops.c 8.20 (Berkeley) 6/10/95
* $Id: lfs_vfsops.c,v 1.20 1997/08/02 14:33:21 bde Exp $
*/
#include "opt_quota.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/fcntl.h>
#include <sys/disklabel.h>
#include <sys/malloc.h>
#include <miscfs/specfs/specdev.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
static int lfs_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
struct vnode **, int *, struct ucred **));
static int lfs_mount __P((struct mount *, char *, caddr_t,
struct nameidata *, struct proc *));
static int lfs_mountfs __P((struct vnode *, struct mount *, struct proc *));
static int lfs_statfs __P((struct mount *, struct statfs *, struct proc *));
static int lfs_sync __P((struct mount *, int, struct ucred *, struct proc *));
static int lfs_unmount __P((struct mount *, int, struct proc *));
static int lfs_vget __P((struct mount *, ino_t, struct vnode **));
static int lfs_vptofh __P((struct vnode *, struct fid *));
struct vfsops lfs_vfsops = {
lfs_mount,
ufs_start,
lfs_unmount,
ufs_root,
ufs_quotactl,
lfs_statfs,
lfs_sync,
lfs_vget,
lfs_fhtovp,
lfs_vptofh,
lfs_init,
#ifdef notyet
lfs_sysctl,
#endif
};
VFS_SET(lfs_vfsops, lfs, MOUNT_LFS, 0);
/*
* Called by main() when ufs is going to be mounted as root.
*/
int
lfs_mountroot()
{
struct fs *fs;
struct mount *mp;
struct proc *p = curproc; /* XXX */
int error;
/*
* Get vnodes for swapdev and rootdev.
*/
if ((error = bdevvp(swapdev, &swapdev_vp)) ||
(error = bdevvp(rootdev, &rootvp))) {
printf("lfs_mountroot: can't setup bdevvp's");
return (error);
}
if (error = vfs_rootmountalloc("lfs", "root_device", &mp))
return (error);
if (error = lfs_mountfs(rootvp, mp, p)) {
mp->mnt_vfc->vfc_refcount--;
vfs_unbusy(mp, p);
free(mp, M_MOUNT);
return (error);
}
simple_lock(&mountlist_slock);
CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
simple_unlock(&mountlist_slock);
(void)lfs_statfs(mp, &mp->mnt_stat, p);
vfs_unbusy(mp, p);
return (0);
}
/*
* lfs_mount
*
* Called when mounting local physical media
*
* PARAMETERS:
* mountroot
* mp mount point structure
* path NULL (flag for root mount!!!)
* data <unused>
* ndp <unused>
* p process (user credentials check [statfs])
*
* mount
* mp mount point structure
* path path to mount point
* data pointer to argument struct in user space
* ndp mount point namei() return (used for
* credentials on reload), reused to look
* up block device.
* p process (user credentials check)
*
* RETURNS: 0 Success
* !0 error number (errno.h)
*
* LOCK STATE:
*
* ENTRY
* mount point is locked
* EXIT
* mount point is locked
*
* NOTES:
* A NULL path can be used for a flag since the mount
* system call will fail with EFAULT in copyinstr in
* namei() if it is a genuine NULL from the user.
*
* Root mounts are not currently supported.
*/
static int
lfs_mount(mp, path, data, ndp, p)
register struct mount *mp;
char *path;
caddr_t data;
struct nameidata *ndp;
struct proc *p;
{
struct vnode *devvp;
struct ufs_args args;
struct ufsmount *ump = 0;
register struct lfs *fs; /* LFS */
u_int size;
int err;
mode_t accessmode;
/*
* Use NULL path to flag a root mount
*/
if( path == NULL) {
/*
***
* Mounting root file system
***
*/
/* XXX -- implement*/
panic("lfs_mountroot: can't setup bdevvp for root");
}
/*
***
* Mounting non-root file system or updating a file system
***
*/
/* copy in user arguments*/
if (err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
goto error_1;
/* Until LFS can do NFS right. XXX */
if (args.export.ex_flags & MNT_EXPORTED) {
err = EINVAL;
goto error_1;
}
/*
* If updating, check whether changing from read-only to
* read/write; if there is no device name, that's all we do.
*/
if (mp->mnt_flag & MNT_UPDATE) {
ump = VFSTOUFS(mp);
if (fs->lfs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
/*
* If upgrade to read-write by non-root, then verify
* that user has necessary permissions on the device.
*/
if (p->p_ucred->cr_uid != 0) {
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY,
p);
if (err = VOP_ACCESS(ump->um_devvp,
VREAD | VWRITE, p->p_ucred, p)) {
VOP_UNLOCK(ump->um_devvp, 0, p);
return (err);
}
VOP_UNLOCK(ump->um_devvp, 0, p);
}
fs->lfs_ronly = 0;
}
if (args.fspec == 0) {
/*
* Process export requests. Jumping to "success"
* will return the vfs_export() error code.
*/
err = vfs_export(mp, &ump->um_export, &args.export);
goto success;
}
}
/*
* Not an update, or updating the name: look up the name
* and verify that it refers to a sensible block device.
*/
NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
if (err = namei(ndp))
goto error_1;
devvp = ndp->ni_vp;
if (devvp->v_type != VBLK) {
err = ENOTBLK;
goto error_2;
}
if (major(devvp->v_rdev) >= nblkdev) {
err = ENXIO;
goto error_2;
}
/*
* If mount by non-root, then verify that user has necessary
* permissions on the device.
*/
if (p->p_ucred->cr_uid != 0) {
accessmode = VREAD;
if ((mp->mnt_flag & MNT_RDONLY) == 0)
accessmode |= VWRITE;
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
if (err = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) {
vput(devvp);
return (err);
}
VOP_UNLOCK(devvp, 0, p);
}
if ((mp->mnt_flag & MNT_UPDATE) == 0)
err = lfs_mountfs(devvp, mp, p); /* LFS */
else {
if (devvp != ump->um_devvp)
err = EINVAL; /* needs translation */
else
vrele(devvp);
}
if (err) {
goto error_2;
}
ump = VFSTOUFS(mp);
fs = ump->um_lfs; /* LFS */
#ifdef NOTLFS /* LFS */
(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
MNAMELEN);
(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
&size);
bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
(void) ufs_statfs(mp, &mp->mnt_stat, p);
#else
(void)copyinstr(path, fs->lfs_fsmnt, sizeof(fs->lfs_fsmnt) - 1, &size);
bzero(fs->lfs_fsmnt + size, sizeof(fs->lfs_fsmnt) - size);
bcopy((caddr_t)fs->lfs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
MNAMELEN);
(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
&size);
bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
(void) lfs_statfs(mp, &mp->mnt_stat, p);
#endif
error_2: /* error with devvp held*/
/* release devvp before failing*/
vrele(devvp);
error_1: /* no state to back out*/
success:
return( err);
}
/*
* Common code for mount and mountroot
* LFS specific
*/
static int
lfs_mountfs(devvp, mp, p)
register struct vnode *devvp;
struct mount *mp;
struct proc *p;
{
register struct lfs *fs;
register struct ufsmount *ump;
struct vnode *vp;
struct buf *bp;
struct partinfo dpart;
dev_t dev;
int error, i, ronly, size;
struct ucred *cred;
cred = p ? p->p_ucred : NOCRED;
/*
* Disallow multiple mounts of the same device.
* Disallow mounting of a device that is currently in use
* (except for root, which might share swap device for miniroot).
* Flush out any old buffers remaining from a previous use.
*/
if (error = vfs_mountedon(devvp))
return (error);
if (vcount(devvp) > 1 && devvp != rootvp)
return (EBUSY);
if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
return (error);
ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
return (error);
if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
size = DEV_BSIZE;
else {
size = dpart.disklab->d_secsize;
#ifdef NEVER_USED
dpart.part->p_fstype = FS_LFS;
dpart.part->p_fsize = fs->lfs_fsize; /* frag size */
dpart.part->p_frag = fs->lfs_frag; /* frags per block */
dpart.part->p_cpg = fs->lfs_segshift; /* segment shift */
#endif
}
/* Don't free random space on error. */
bp = NULL;
ump = NULL;
/* Read in the superblock. */
if (error = bread(devvp, LFS_LABELPAD / size, LFS_SBPAD, cred, &bp))
goto out;
fs = (struct lfs *)bp->b_data;
/* Check the basics. */
if (fs->lfs_magic != LFS_MAGIC || fs->lfs_bsize > MAXBSIZE ||
fs->lfs_bsize < sizeof(struct lfs)) {
error = EINVAL; /* XXX needs translation */
goto out;
}
/* Allocate the mount structure, copy the superblock into it. */
ump = (struct ufsmount *)malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
fs = ump->um_lfs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK);
bcopy(bp->b_data, fs, sizeof(struct lfs));
if (sizeof(struct lfs) < LFS_SBPAD) /* XXX why? */
bp->b_flags |= B_INVAL;
brelse(bp);
bp = NULL;
/* Set up the I/O information */
fs->lfs_iocount = 0;
/* Set up the ifile and lock aflags */
fs->lfs_doifile = 0;
fs->lfs_writer = 0;
fs->lfs_dirops = 0;
fs->lfs_seglock = 0;
/* Set the file system readonly/modify bits. */
fs->lfs_ronly = ronly;
if (ronly == 0)
fs->lfs_fmod = 1;
/* Initialize the mount structure. */
dev = devvp->v_rdev;
mp->mnt_data = (qaddr_t)ump;
mp->mnt_stat.f_fsid.val[0] = (long)dev;
mp->mnt_stat.f_fsid.val[1] = lfs_mount_type;
mp->mnt_maxsymlinklen = fs->lfs_maxsymlinklen;
mp->mnt_flag |= MNT_LOCAL;
ump->um_mountp = mp;
ump->um_dev = dev;
ump->um_devvp = devvp;
ump->um_bptrtodb = 0;
ump->um_seqinc = 1 << fs->lfs_fsbtodb;
ump->um_nindir = fs->lfs_nindir;
for (i = 0; i < MAXQUOTAS; i++)
ump->um_quotas[i] = NULLVP;
devvp->v_specflags |= SI_MOUNTEDON;
/*
* We use the ifile vnode for almost every operation. Instead of
* retrieving it from the hash table each time we retrieve it here,
* artificially increment the reference count and keep a pointer
* to it in the incore copy of the superblock.
*/
if (error = VFS_VGET(mp, LFS_IFILE_INUM, &vp))
goto out;
fs->lfs_ivnode = vp;
VREF(vp);
vput(vp);
return (0);
out:
if (bp)
brelse(bp);
(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
if (ump) {
free(ump->um_lfs, M_UFSMNT);
free(ump, M_UFSMNT);
mp->mnt_data = (qaddr_t)0;
}
return (error);
}
/*
* unmount system call
*/
static int
lfs_unmount(mp, mntflags, p)
struct mount *mp;
int mntflags;
struct proc *p;
{
register struct ufsmount *ump;
register struct lfs *fs;
int i, error, flags, ronly;
flags = 0;
if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
ump = VFSTOUFS(mp);
fs = ump->um_lfs;
#ifdef QUOTA
if (mp->mnt_flag & MNT_QUOTA) {
if (error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags))
return (error);
for (i = 0; i < MAXQUOTAS; i++) {
if (ump->um_quotas[i] == NULLVP)
continue;
quotaoff(p, mp, i);
}
/*
* Here we fall through to vflush again to ensure
* that we have gotten rid of all the system vnodes.
*/
}
#endif
if (error = vflush(mp, fs->lfs_ivnode, flags))
return (error);
fs->lfs_clean = 1;
if (error = VFS_SYNC(mp, 1, p->p_ucred, p))
return (error);
if (fs->lfs_ivnode->v_dirtyblkhd.lh_first)
panic("lfs_unmount: still dirty blocks on ifile vnode");
vrele(fs->lfs_ivnode);
vgone(fs->lfs_ivnode);
ronly = fs->lfs_ronly;
ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
error = VOP_CLOSE(ump->um_devvp,
ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
vrele(ump->um_devvp);
free(fs, M_UFSMNT);
free(ump, M_UFSMNT);
mp->mnt_data = (qaddr_t)0;
mp->mnt_flag &= ~MNT_LOCAL;
return (error);
}
/*
* Get file system statistics.
*/
static int
lfs_statfs(mp, sbp, p)
struct mount *mp;
register struct statfs *sbp;
struct proc *p;
{
register struct lfs *fs;
register struct ufsmount *ump;
ump = VFSTOUFS(mp);
fs = ump->um_lfs;
if (fs->lfs_magic != LFS_MAGIC)
panic("lfs_statfs: magic");
sbp->f_bsize = fs->lfs_fsize;
sbp->f_iosize = fs->lfs_bsize;
sbp->f_blocks = dbtofrags(fs,fs->lfs_dsize);
sbp->f_bfree = dbtofrags(fs, fs->lfs_bfree);
/*
* To compute the available space. Subtract the minimum free
* from the total number of blocks in the file system. Set avail
* to the smaller of this number and fs->lfs_bfree.
*/
sbp->f_bavail = fs->lfs_dsize * (100 - fs->lfs_minfree) / 100;
sbp->f_bavail =
sbp->f_bavail > fs->lfs_bfree ? fs->lfs_bfree : sbp->f_bavail;
sbp->f_bavail = dbtofrags(fs, sbp->f_bavail);
sbp->f_files = fs->lfs_nfiles;
sbp->f_ffree = sbp->f_bfree * INOPB(fs);
if (sbp != &mp->mnt_stat) {
sbp->f_type = mp->mnt_vfc->vfc_typenum;
bcopy((caddr_t)mp->mnt_stat.f_mntonname,
(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
}
return (0);
}
/*
* Go through the disk queues to initiate sandbagged IO;
* go through the inodes to write those that have been modified;
* initiate the writing of the super block if it has been modified.
*
* Note: we are always called with the filesystem marked `MPBUSY'.
*/
static int
lfs_sync(mp, waitfor, cred, p)
struct mount *mp;
int waitfor;
struct ucred *cred;
struct proc *p;
{
int error;
/* All syncs must be checkpoints until roll-forward is implemented. */
error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0));
#ifdef QUOTA
qsync(mp);
#endif
return (error);
}
/*
* Look up an LFS dinode number to find its incore vnode. If not already
* in core, read it in from the specified device. Return the inode locked.
* Detection and handling of mount points must be done by the calling routine.
*/
static int
lfs_vget(mp, ino, vpp)
struct mount *mp;
ino_t ino;
struct vnode **vpp;
{
register struct lfs *fs;
register struct inode *ip;
struct buf *bp;
struct ifile *ifp;
struct vnode *vp;
struct ufsmount *ump;
ufs_daddr_t daddr;
dev_t dev;
int error;
ump = VFSTOUFS(mp);
dev = ump->um_dev;
if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
return (0);
/* Translate the inode number to a disk address. */
fs = ump->um_lfs;
if (ino == LFS_IFILE_INUM)
daddr = fs->lfs_idaddr;
else {
LFS_IENTRY(ifp, fs, ino, bp);
daddr = ifp->if_daddr;
brelse(bp);
if (daddr == LFS_UNUSED_DADDR)
return (ENOENT);
}
/* Allocate new vnode/inode. */
if (error = lfs_vcreate(mp, ino, &vp)) {
*vpp = NULL;
return (error);
}
/*
* Put it onto its hash chain and lock it so that other requests for
* this inode will block if they arrive while we are sleeping waiting
* for old data structures to be purged or for the contents of the
* disk portion of this inode to be read.
*/
ip = VTOI(vp);
ufs_ihashins(ip);
/*
* XXX
* This may not need to be here, logically it should go down with
* the i_devvp initialization.
* Ask Kirk.
*/
ip->i_lfs = ump->um_lfs;
/* Read in the disk contents for the inode, copy into the inode. */
if (error =
bread(ump->um_devvp, daddr, (int)fs->lfs_bsize, NOCRED, &bp)) {
/*
* The inode does not contain anything useful, so it would
* be misleading to leave it on its hash chain. With mode
* still zero, it will be unlinked and returned to the free
* list by vput().
*/
vput(vp);
brelse(bp);
*vpp = NULL;
return (error);
}
ip->i_din = *lfs_ifind(fs, ino, (struct dinode *)bp->b_data);
brelse(bp);
/*
* Initialize the vnode from the inode, check for aliases. In all
* cases re-init ip, the underlying vnode/inode may have changed.
*/
if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
vput(vp);
*vpp = NULL;
return (error);
}
/*
* Finish inode initialization now that aliasing has been resolved.
*/
ip->i_devvp = ump->um_devvp;
VREF(ip->i_devvp);
*vpp = vp;
return (0);
}
/*
* File handle to vnode
*
* Have to be really careful about stale file handles:
* - check that the inode number is valid
* - call lfs_vget() to get the locked inode
* - check for an unallocated inode (i_mode == 0)
* - check that the given client host has export rights and return
* those rights via. exflagsp and credanonp
*
* XXX
* use ifile to see if inode is allocated instead of reading off disk
* what is the relationship between my generational number and the NFS
* generational number.
*/
static int
lfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
register struct mount *mp;
struct fid *fhp;
struct sockaddr *nam;
struct vnode **vpp;
int *exflagsp;
struct ucred **credanonp;
{
register struct ufid *ufhp;
ufhp = (struct ufid *)fhp;
if (ufhp->ufid_ino < ROOTINO)
return (ESTALE);
return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
}
/*
* Vnode pointer to File handle
*/
/* ARGSUSED */
static int
lfs_vptofh(vp, fhp)
struct vnode *vp;
struct fid *fhp;
{
register struct inode *ip;
register struct ufid *ufhp;
ip = VTOI(vp);
ufhp = (struct ufid *)fhp;
ufhp->ufid_len = sizeof(struct ufid);
ufhp->ufid_ino = ip->i_number;
ufhp->ufid_gen = ip->i_gen;
return (0);
}
/*
* Initialize the filesystem, most work done by ufs_init.
*/
int lfs_mount_type;
int
lfs_init(vfsp)
struct vfsconf *vfsp;
{
lfs_mount_type = vfsp->vfc_typenum;
return (ufs_init(vfsp));
}