343 lines
13 KiB
Groff
343 lines
13 KiB
Groff
|
.\" Copyright (c) 1983, 1991, 1993
|
||
|
.\" The Regents of the University of California. All rights reserved.
|
||
|
.\"
|
||
|
.\" Redistribution and use in source and binary forms, with or without
|
||
|
.\" modification, are permitted provided that the following conditions
|
||
|
.\" are met:
|
||
|
.\" 1. Redistributions of source code must retain the above copyright
|
||
|
.\" notice, this list of conditions and the following disclaimer.
|
||
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||
|
.\" notice, this list of conditions and the following disclaimer in the
|
||
|
.\" documentation and/or other materials provided with the distribution.
|
||
|
.\" 3. All advertising materials mentioning features or use of this software
|
||
|
.\" must display the following acknowledgement:
|
||
|
.\" This product includes software developed by the University of
|
||
|
.\" California, Berkeley and its contributors.
|
||
|
.\" 4. Neither the name of the University nor the names of its contributors
|
||
|
.\" may be used to endorse or promote products derived from this software
|
||
|
.\" without specific prior written permission.
|
||
|
.\"
|
||
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||
|
.\" SUCH DAMAGE.
|
||
|
.\"
|
||
|
.\" @(#)fs.5 8.2 (Berkeley) 4/19/94
|
||
|
.\"
|
||
|
.Dd April 19, 1994
|
||
|
.Dt FS 5
|
||
|
.Os BSD 4.2
|
||
|
.Sh NAME
|
||
|
.Nm fs ,
|
||
|
.Nm inode
|
||
|
.Nd format of file system volume
|
||
|
.Sh SYNOPSIS
|
||
|
.Fd #include <sys/types.h>
|
||
|
.Fd #include <ufs/fs.h>
|
||
|
.Fd #include <ufs/inode.h>
|
||
|
.Sh DESCRIPTION
|
||
|
The files
|
||
|
.Aq Pa fs.h
|
||
|
and
|
||
|
.Aq Pa inode.h
|
||
|
declare several structures, defined variables and macros
|
||
|
which are used to create and manage the underlying format of
|
||
|
file system objects on random access devices (disks).
|
||
|
.Pp
|
||
|
The block size and number of blocks which
|
||
|
comprise a file system are parameters of the file system.
|
||
|
Sectors beginning at
|
||
|
.Dv BBLOCK
|
||
|
and continuing for
|
||
|
.Dv BBSIZE
|
||
|
are used
|
||
|
for a disklabel and for some hardware primary
|
||
|
and secondary bootstrapping programs.
|
||
|
.Pp
|
||
|
The actual file system begins at sector
|
||
|
.Dv SBLOCK
|
||
|
with the
|
||
|
.Em super-block
|
||
|
that is of size
|
||
|
.Dv SBSIZE .
|
||
|
The following structure described the super-block and is
|
||
|
from the file
|
||
|
.Aq Pa ufs/fs.h :
|
||
|
.Bd -literal
|
||
|
#define FS_MAGIC 0x011954
|
||
|
struct fs {
|
||
|
struct fs *fs_link; /* linked list of file systems */
|
||
|
struct fs *fs_rlink; /* used for incore super blocks */
|
||
|
daddr_t fs_sblkno; /* addr of super-block in filesys */
|
||
|
daddr_t fs_cblkno; /* offset of cyl-block in filesys */
|
||
|
daddr_t fs_iblkno; /* offset of inode-blocks in filesys */
|
||
|
daddr_t fs_dblkno; /* offset of first data after cg */
|
||
|
long fs_cgoffset; /* cylinder group offset in cylinder */
|
||
|
long fs_cgmask; /* used to calc mod fs_ntrak */
|
||
|
time_t fs_time; /* last time written */
|
||
|
long fs_size; /* number of blocks in fs */
|
||
|
long fs_dsize; /* number of data blocks in fs */
|
||
|
long fs_ncg; /* number of cylinder groups */
|
||
|
long fs_bsize; /* size of basic blocks in fs */
|
||
|
long fs_fsize; /* size of frag blocks in fs */
|
||
|
long fs_frag; /* number of frags in a block in fs */
|
||
|
/* these are configuration parameters */
|
||
|
long fs_minfree; /* minimum percentage of free blocks */
|
||
|
long fs_rotdelay; /* num of ms for optimal next block */
|
||
|
long fs_rps; /* disk revolutions per second */
|
||
|
/* these fields can be computed from the others */
|
||
|
long fs_bmask; /* ``blkoff'' calc of blk offsets */
|
||
|
long fs_fmask; /* ``fragoff'' calc of frag offsets */
|
||
|
long fs_bshift; /* ``lblkno'' calc of logical blkno */
|
||
|
long fs_fshift; /* ``numfrags'' calc number of frags */
|
||
|
/* these are configuration parameters */
|
||
|
long fs_maxcontig; /* max number of contiguous blks */
|
||
|
long fs_maxbpg; /* max number of blks per cyl group */
|
||
|
/* these fields can be computed from the others */
|
||
|
long fs_fragshift; /* block to frag shift */
|
||
|
long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
|
||
|
long fs_sbsize; /* actual size of super block */
|
||
|
long fs_csmask; /* csum block offset */
|
||
|
long fs_csshift; /* csum block number */
|
||
|
long fs_nindir; /* value of NINDIR */
|
||
|
long fs_inopb; /* value of INOPB */
|
||
|
long fs_nspf; /* value of NSPF */
|
||
|
/* yet another configuration parameter */
|
||
|
long fs_optim; /* optimization preference, see below */
|
||
|
/* these fields are derived from the hardware */
|
||
|
long fs_npsect; /* # sectors/track including spares */
|
||
|
long fs_interleave; /* hardware sector interleave */
|
||
|
long fs_trackskew; /* sector 0 skew, per track */
|
||
|
long fs_headswitch; /* head switch time, usec */
|
||
|
long fs_trkseek; /* track-to-track seek, usec */
|
||
|
/* sizes determined by number of cylinder groups and their sizes */
|
||
|
daddr_t fs_csaddr; /* blk addr of cyl grp summary area */
|
||
|
long fs_cssize; /* size of cyl grp summary area */
|
||
|
long fs_cgsize; /* cylinder group size */
|
||
|
/* these fields are derived from the hardware */
|
||
|
long fs_ntrak; /* tracks per cylinder */
|
||
|
long fs_nsect; /* sectors per track */
|
||
|
long fs_spc; /* sectors per cylinder */
|
||
|
/* this comes from the disk driver partitioning */
|
||
|
long fs_ncyl; /* cylinders in file system */
|
||
|
/* these fields can be computed from the others */
|
||
|
long fs_cpg; /* cylinders per group */
|
||
|
long fs_ipg; /* inodes per group */
|
||
|
long fs_fpg; /* blocks per group * fs_frag */
|
||
|
/* this data must be re-computed after crashes */
|
||
|
struct csum fs_cstotal; /* cylinder summary information */
|
||
|
/* these fields are cleared at mount time */
|
||
|
char fs_fmod; /* super block modified flag */
|
||
|
char fs_clean; /* file system is clean flag */
|
||
|
char fs_ronly; /* mounted read-only flag */
|
||
|
char fs_flags; /* currently unused flag */
|
||
|
char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
|
||
|
/* these fields retain the current block allocation info */
|
||
|
long fs_cgrotor; /* last cg searched */
|
||
|
struct csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */
|
||
|
long fs_cpc; /* cyl per cycle in postbl */
|
||
|
short fs_opostbl[16][8]; /* old rotation block list head */
|
||
|
long fs_sparecon[56]; /* reserved for future constants */
|
||
|
quad fs_qbmask; /* ~fs_bmask - for use with quad size */
|
||
|
quad fs_qfmask; /* ~fs_fmask - for use with quad size */
|
||
|
long fs_postblformat; /* format of positional layout tables */
|
||
|
long fs_nrpos; /* number of rotational positions */
|
||
|
long fs_postbloff; /* (short) rotation block list head */
|
||
|
long fs_rotbloff; /* (u_char) blocks for each rotation */
|
||
|
long fs_magic; /* magic number */
|
||
|
u_char fs_space[1]; /* list of blocks for each rotation */
|
||
|
/* actually longer */
|
||
|
};
|
||
|
.Ed
|
||
|
.Pp
|
||
|
Each disk drive contains some number of file systems.
|
||
|
A file system consists of a number of cylinder groups.
|
||
|
Each cylinder group has inodes and data.
|
||
|
.Pp
|
||
|
A file system is described by its super-block, which in turn
|
||
|
describes the cylinder groups. The super-block is critical
|
||
|
data and is replicated in each cylinder group to protect against
|
||
|
catastrophic loss. This is done at file system creation
|
||
|
time and the critical
|
||
|
super-block data does not change, so the copies need not be
|
||
|
referenced further unless disaster strikes.
|
||
|
.Pp
|
||
|
Addresses stored in inodes are capable of addressing fragments
|
||
|
of `blocks'. File system blocks of at most size
|
||
|
.Dv MAXBSIZE
|
||
|
can
|
||
|
be optionally broken into 2, 4, or 8 pieces, each of which is
|
||
|
addressable; these pieces may be
|
||
|
.Dv DEV_BSIZE ,
|
||
|
or some multiple of
|
||
|
a
|
||
|
.Dv DEV_BSIZE
|
||
|
unit.
|
||
|
.Pp
|
||
|
Large files consist of exclusively large data blocks. To avoid
|
||
|
undue wasted disk space, the last data block of a small file is
|
||
|
allocated as only as many fragments of a large block as are
|
||
|
necessary. The file system format retains only a single pointer
|
||
|
to such a fragment, which is a piece of a single large block that
|
||
|
has been divided. The size of such a fragment is determinable from
|
||
|
information in the inode, using the
|
||
|
.Fn blksize fs ip lbn
|
||
|
macro.
|
||
|
.Pp
|
||
|
The file system records space availability at the fragment level;
|
||
|
to determine block availability, aligned fragments are examined.
|
||
|
.Pp
|
||
|
The root inode is the root of the file system.
|
||
|
Inode 0 can't be used for normal purposes and
|
||
|
historically bad blocks were linked to inode 1,
|
||
|
thus the root inode is 2 (inode 1 is no longer used for
|
||
|
this purpose, however numerous dump tapes make this
|
||
|
assumption, so we are stuck with it).
|
||
|
.Pp
|
||
|
The
|
||
|
.Fa fs_minfree
|
||
|
element gives the minimum acceptable percentage of file system
|
||
|
blocks that may be free. If the freelist drops below this level
|
||
|
only the super-user may continue to allocate blocks.
|
||
|
The
|
||
|
.Fa fs_minfree
|
||
|
element
|
||
|
may be set to 0 if no reserve of free blocks is deemed necessary,
|
||
|
however severe performance degradations will be observed if the
|
||
|
file system is run at greater than 90% full; thus the default
|
||
|
value of
|
||
|
.Fa fs_minfree
|
||
|
is 10%.
|
||
|
.Pp
|
||
|
Empirically the best trade-off between block fragmentation and
|
||
|
overall disk utilization at a loading of 90% comes with a
|
||
|
fragmentation of 8, thus the default fragment size is an eighth
|
||
|
of the block size.
|
||
|
.Pp
|
||
|
The element
|
||
|
.Fa fs_optim
|
||
|
specifies whether the file system should try to minimize the time spent
|
||
|
allocating blocks, or if it should attempt to minimize the space
|
||
|
fragmentation on the disk.
|
||
|
If the value of fs_minfree (see above) is less than 10%,
|
||
|
then the file system defaults to optimizing for space to avoid
|
||
|
running out of full sized blocks.
|
||
|
If the value of minfree is greater than or equal to 10%,
|
||
|
fragmentation is unlikely to be problematical, and
|
||
|
the file system defaults to optimizing for time.
|
||
|
.Pp
|
||
|
.Em Cylinder group related limits :
|
||
|
Each cylinder keeps track of the availability of blocks at different
|
||
|
rotational positions, so that sequential blocks can be laid out
|
||
|
with minimum rotational latency. With the default of 8 distinguished
|
||
|
rotational positions, the resolution of the
|
||
|
summary information is 2ms for a typical 3600 rpm drive.
|
||
|
.Pp
|
||
|
The element
|
||
|
.Fa fs_rotdelay
|
||
|
gives the minimum number of milliseconds to initiate
|
||
|
another disk transfer on the same cylinder.
|
||
|
It is used in determining the rotationally optimal
|
||
|
layout for disk blocks within a file;
|
||
|
the default value for
|
||
|
.Fa fs_rotdelay
|
||
|
is 2ms.
|
||
|
.Pp
|
||
|
Each file system has a statically allocated number of inodes.
|
||
|
An inode is allocated for each
|
||
|
.Dv NBPI
|
||
|
bytes of disk space.
|
||
|
The inode allocation strategy is extremely conservative.
|
||
|
.Pp
|
||
|
.Dv MINBSIZE
|
||
|
is the smallest allowable block size.
|
||
|
With a
|
||
|
.Dv MINBSIZE
|
||
|
of 4096
|
||
|
it is possible to create files of size
|
||
|
2^32 with only two levels of indirection.
|
||
|
.Dv MINBSIZE
|
||
|
must be big enough to hold a cylinder group block,
|
||
|
thus changes to
|
||
|
.Pq Fa struct cg
|
||
|
must keep its size within
|
||
|
.Dv MINBSIZE .
|
||
|
Note that super-blocks are never more than size
|
||
|
.Dv SBSIZE .
|
||
|
.Pp
|
||
|
The path name on which the file system is mounted is maintained in
|
||
|
.Fa fs_fsmnt .
|
||
|
.Dv MAXMNTLEN
|
||
|
defines the amount of space allocated in
|
||
|
the super-block for this name.
|
||
|
The limit on the amount of summary information per file system
|
||
|
is defined by
|
||
|
.Dv MAXCSBUFS.
|
||
|
For a 4096 byte block size, it is currently parameterized for a
|
||
|
maximum of two million cylinders.
|
||
|
.Pp
|
||
|
Per cylinder group information is summarized in blocks allocated
|
||
|
from the first cylinder group's data blocks.
|
||
|
These blocks are read in from
|
||
|
.Fa fs_csaddr
|
||
|
(size
|
||
|
.Fa fs_cssize )
|
||
|
in addition to the super-block.
|
||
|
.Pp
|
||
|
.Sy N.B.:
|
||
|
.Xr sizeof Pq Fa struct csum
|
||
|
must be a power of two in order for
|
||
|
the
|
||
|
.Fn fs_cs
|
||
|
macro to work.
|
||
|
.Pp
|
||
|
The
|
||
|
.Em "Super-block for a file system" :
|
||
|
The size of the rotational layout tables
|
||
|
is limited by the fact that the super-block is of size
|
||
|
.Dv SBSIZE .
|
||
|
The size of these tables is
|
||
|
.Em inversely
|
||
|
proportional to the block
|
||
|
size of the file system. The size of the tables is
|
||
|
increased when sector sizes are not powers of two,
|
||
|
as this increases the number of cylinders
|
||
|
included before the rotational pattern repeats
|
||
|
.Pq Fa fs_cpc .
|
||
|
The size of the rotational layout
|
||
|
tables is derived from the number of bytes remaining in
|
||
|
.Pq Fa struct fs .
|
||
|
.Pp
|
||
|
The number of blocks of data per cylinder group
|
||
|
is limited because cylinder groups are at most one block.
|
||
|
The inode and free block tables
|
||
|
must fit into a single block after deducting space for
|
||
|
the cylinder group structure
|
||
|
.Pq Fa struct cg .
|
||
|
.Pp
|
||
|
The
|
||
|
.Em Inode :
|
||
|
The inode is the focus of all file activity in the
|
||
|
.Tn UNIX
|
||
|
file system.
|
||
|
There is a unique inode allocated
|
||
|
for each active file,
|
||
|
each current directory, each mounted-on file,
|
||
|
text file, and the root.
|
||
|
An inode is `named' by its device/i-number pair.
|
||
|
For further information, see the include file
|
||
|
.Aq Pa sys/inode.h .
|
||
|
.Sh HISTORY
|
||
|
A super-block structure named filsys appeared in
|
||
|
.At v6 .
|
||
|
The file system described in this manual appeared
|
||
|
in
|
||
|
.Bx 4.2 .
|