371 lines
14 KiB
Groff
371 lines
14 KiB
Groff
.\" Copyright (c) 1983, 1991, 1993
|
|
.\" The Regents of the University of California. All rights reserved.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\" 3. All advertising materials mentioning features or use of this software
|
|
.\" must display the following acknowledgement:
|
|
.\" This product includes software developed by the University of
|
|
.\" California, Berkeley and its contributors.
|
|
.\" 4. Neither the name of the University nor the names of its contributors
|
|
.\" may be used to endorse or promote products derived from this software
|
|
.\" without specific prior written permission.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.\" @(#)fs.5 8.2 (Berkeley) 4/19/94
|
|
.\" $Id$
|
|
.\"
|
|
.Dd April 19, 1994
|
|
.Dt FS 5
|
|
.Os BSD 4.2
|
|
.Sh NAME
|
|
.Nm fs ,
|
|
.Nm inode
|
|
.Nd format of file system volume
|
|
.Sh SYNOPSIS
|
|
.Fd #include <sys/types.h>
|
|
.Fd #include <ufs/ffs/fs.h>
|
|
.Fd #include <ufs/ufs/inode.h>
|
|
.Sh DESCRIPTION
|
|
The files
|
|
.Aq Pa fs.h
|
|
and
|
|
.Aq Pa inode.h
|
|
declare several structures, defined variables and macros
|
|
which are used to create and manage the underlying format of
|
|
file system objects on random access devices (disks).
|
|
.Pp
|
|
The block size and number of blocks which
|
|
comprise a file system are parameters of the file system.
|
|
Sectors beginning at
|
|
.Dv BBLOCK
|
|
and continuing for
|
|
.Dv BBSIZE
|
|
are used
|
|
for a disklabel and for some hardware primary
|
|
and secondary bootstrapping programs.
|
|
.Pp
|
|
The actual file system begins at sector
|
|
.Dv SBLOCK
|
|
with the
|
|
.Em super-block
|
|
that is of size
|
|
.Dv SBSIZE .
|
|
The following structure describes the super-block and is
|
|
from the file
|
|
.Aq Pa ufs/ffs/fs.h :
|
|
.Bd -literal
|
|
/*
|
|
* Super block for an FFS file system.
|
|
*/
|
|
struct fs {
|
|
int32_t fs_firstfield; /* historic file system linked list, */
|
|
int32_t fs_unused_1; /* used for incore super blocks */
|
|
ufs_daddr_t fs_sblkno; /* addr of super-block in filesys */
|
|
ufs_daddr_t fs_cblkno; /* offset of cyl-block in filesys */
|
|
ufs_daddr_t fs_iblkno; /* offset of inode-blocks in filesys */
|
|
ufs_daddr_t fs_dblkno; /* offset of first data after cg */
|
|
int32_t fs_cgoffset; /* cylinder group offset in cylinder */
|
|
int32_t fs_cgmask; /* used to calc mod fs_ntrak */
|
|
time_t fs_time; /* last time written */
|
|
int32_t fs_size; /* number of blocks in fs */
|
|
int32_t fs_dsize; /* number of data blocks in fs */
|
|
int32_t fs_ncg; /* number of cylinder groups */
|
|
int32_t fs_bsize; /* size of basic blocks in fs */
|
|
int32_t fs_fsize; /* size of frag blocks in fs */
|
|
int32_t fs_frag; /* number of frags in a block in fs */
|
|
/* these are configuration parameters */
|
|
int32_t fs_minfree; /* minimum percentage of free blocks */
|
|
int32_t fs_rotdelay; /* num of ms for optimal next block */
|
|
int32_t fs_rps; /* disk revolutions per second */
|
|
/* these fields can be computed from the others */
|
|
int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */
|
|
int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */
|
|
int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */
|
|
int32_t fs_fshift; /* ``numfrags'' calc number of frags */
|
|
/* these are configuration parameters */
|
|
int32_t fs_maxcontig; /* max number of contiguous blks */
|
|
int32_t fs_maxbpg; /* max number of blks per cyl group */
|
|
/* these fields can be computed from the others */
|
|
int32_t fs_fragshift; /* block to frag shift */
|
|
int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
|
|
int32_t fs_sbsize; /* actual size of super block */
|
|
int32_t fs_csmask; /* csum block offset */
|
|
int32_t fs_csshift; /* csum block number */
|
|
int32_t fs_nindir; /* value of NINDIR */
|
|
int32_t fs_inopb; /* value of INOPB */
|
|
int32_t fs_nspf; /* value of NSPF */
|
|
/* yet another configuration parameter */
|
|
int32_t fs_optim; /* optimization preference, see below */
|
|
/* these fields are derived from the hardware */
|
|
int32_t fs_npsect; /* # sectors/track including spares */
|
|
int32_t fs_interleave; /* hardware sector interleave */
|
|
int32_t fs_trackskew; /* sector 0 skew, per track */
|
|
int32_t fs_headswitch; /* head switch time, usec */
|
|
int32_t fs_trkseek; /* track-to-track seek, usec */
|
|
/* sizes determined by number of cylinder groups and their sizes */
|
|
ufs_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */
|
|
int32_t fs_cssize; /* size of cyl grp summary area */
|
|
int32_t fs_cgsize; /* cylinder group size */
|
|
/* these fields are derived from the hardware */
|
|
int32_t fs_ntrak; /* tracks per cylinder */
|
|
int32_t fs_nsect; /* sectors per track */
|
|
int32_t fs_spc; /* sectors per cylinder */
|
|
/* this comes from the disk driver partitioning */
|
|
int32_t fs_ncyl; /* cylinders in file system */
|
|
/* these fields can be computed from the others */
|
|
int32_t fs_cpg; /* cylinders per group */
|
|
int32_t fs_ipg; /* inodes per group */
|
|
int32_t fs_fpg; /* blocks per group * fs_frag */
|
|
/* this data must be re-computed after crashes */
|
|
struct csum fs_cstotal;/* cylinder summary information */
|
|
/* these fields are cleared at mount time */
|
|
int8_t fs_fmod; /* super block modified flag */
|
|
int8_t fs_clean; /* file system is clean flag */
|
|
int8_t fs_ronly; /* mounted read-only flag */
|
|
int8_t fs_flags; /* currently unused flag */
|
|
u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
|
|
/* these fields retain the current block allocation info */
|
|
int32_t fs_cgrotor; /* last cg searched */
|
|
struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
|
|
int32_t *fs_maxcluster;/* max cluster in each cyl group */
|
|
int32_t fs_cpc; /* cyl per cycle in postbl */
|
|
int16_t fs_opostbl[16][8]; /* old rotation block list head */
|
|
int32_t fs_sparecon[50]; /* reserved for future constants */
|
|
int32_t fs_contigsumsize; /* size of cluster summary array */
|
|
int32_t fs_maxsymlinklen;/* max length of an internal symlink */
|
|
int32_t fs_inodefmt; /* format of on-disk inodes */
|
|
u_int64_t fs_maxfilesize;/* maximum representable file size */
|
|
int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */
|
|
int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */
|
|
int32_t fs_state; /* validate fs_clean field */
|
|
int32_t fs_postblformat;/* format of positional layout tables */
|
|
int32_t fs_nrpos; /* number of rotational positions */
|
|
int32_t fs_postbloff; /* (u_int16) rotation block list head */
|
|
int32_t fs_rotbloff; /* (u_int8) blocks for each rotation */
|
|
int32_t fs_magic; /* magic number */
|
|
u_int8_t fs_space[1]; /* list of blocks for each rotation */
|
|
/* actually longer */
|
|
};
|
|
|
|
/*
|
|
* Filesystem identification
|
|
*/
|
|
#define FS_MAGIC 0x011954 /* the fast filesystem magic number */
|
|
#define FS_OKAY 0x7c269d38 /* superblock checksum */
|
|
#define FS_42INODEFMT -1 /* 4.2BSD inode format */
|
|
#define FS_44INODEFMT 2 /* 4.4BSD inode format */
|
|
/*
|
|
* Preference for optimization.
|
|
*/
|
|
#define FS_OPTTIME 0 /* minimize allocation time */
|
|
#define FS_OPTSPACE 1 /* minimize disk fragmentation */
|
|
|
|
/*
|
|
* Rotational layout table format types
|
|
*/
|
|
#define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */
|
|
#define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */
|
|
.Ed
|
|
.Pp
|
|
Each disk drive contains some number of file systems.
|
|
A file system consists of a number of cylinder groups.
|
|
Each cylinder group has inodes and data.
|
|
.Pp
|
|
A file system is described by its super-block, which in turn
|
|
describes the cylinder groups. The super-block is critical
|
|
data and is replicated in each cylinder group to protect against
|
|
catastrophic loss. This is done at file system creation
|
|
time and the critical
|
|
super-block data does not change, so the copies need not be
|
|
referenced further unless disaster strikes.
|
|
.Pp
|
|
Addresses stored in inodes are capable of addressing fragments
|
|
of `blocks'. File system blocks of at most size
|
|
.Dv MAXBSIZE
|
|
can
|
|
be optionally broken into 2, 4, or 8 pieces, each of which is
|
|
addressable; these pieces may be
|
|
.Dv DEV_BSIZE ,
|
|
or some multiple of
|
|
a
|
|
.Dv DEV_BSIZE
|
|
unit.
|
|
.Pp
|
|
Large files consist of exclusively large data blocks. To avoid
|
|
undue wasted disk space, the last data block of a small file is
|
|
allocated as only as many fragments of a large block as are
|
|
necessary. The file system format retains only a single pointer
|
|
to such a fragment, which is a piece of a single large block that
|
|
has been divided. The size of such a fragment is determinable from
|
|
information in the inode, using the
|
|
.Fn blksize fs ip lbn
|
|
macro.
|
|
.Pp
|
|
The file system records space availability at the fragment level;
|
|
to determine block availability, aligned fragments are examined.
|
|
.Pp
|
|
The root inode is the root of the file system.
|
|
Inode 0 can't be used for normal purposes and
|
|
historically bad blocks were linked to inode 1,
|
|
thus the root inode is 2 (inode 1 is no longer used for
|
|
this purpose, however numerous dump tapes make this
|
|
assumption, so we are stuck with it).
|
|
.Pp
|
|
The
|
|
.Fa fs_minfree
|
|
element gives the minimum acceptable percentage of file system
|
|
blocks that may be free. If the freelist drops below this level
|
|
only the super-user may continue to allocate blocks.
|
|
The
|
|
.Fa fs_minfree
|
|
element
|
|
may be set to 0 if no reserve of free blocks is deemed necessary,
|
|
however severe performance degradations will be observed if the
|
|
file system is run at greater than 90% full; thus the default
|
|
value of
|
|
.Fa fs_minfree
|
|
is 10%.
|
|
.Pp
|
|
Empirically the best trade-off between block fragmentation and
|
|
overall disk utilization at a loading of 90% comes with a
|
|
fragmentation of 8, thus the default fragment size is an eighth
|
|
of the block size.
|
|
.Pp
|
|
The element
|
|
.Fa fs_optim
|
|
specifies whether the file system should try to minimize the time spent
|
|
allocating blocks, or if it should attempt to minimize the space
|
|
fragmentation on the disk.
|
|
If the value of fs_minfree (see above) is less than 10%,
|
|
then the file system defaults to optimizing for space to avoid
|
|
running out of full sized blocks.
|
|
If the value of minfree is greater than or equal to 10%,
|
|
fragmentation is unlikely to be problematical, and
|
|
the file system defaults to optimizing for time.
|
|
.Pp
|
|
.Em Cylinder group related limits :
|
|
Each cylinder keeps track of the availability of blocks at different
|
|
rotational positions, so that sequential blocks can be laid out
|
|
with minimum rotational latency. With the default of 8 distinguished
|
|
rotational positions, the resolution of the
|
|
summary information is 2ms for a typical 3600 rpm drive.
|
|
.Pp
|
|
The element
|
|
.Fa fs_rotdelay
|
|
gives the minimum number of milliseconds to initiate
|
|
another disk transfer on the same cylinder.
|
|
It is used in determining the rotationally optimal
|
|
layout for disk blocks within a file;
|
|
the default value for
|
|
.Fa fs_rotdelay
|
|
is 2ms.
|
|
.Pp
|
|
Each file system has a statically allocated number of inodes.
|
|
An inode is allocated for each
|
|
.Dv NBPI
|
|
bytes of disk space.
|
|
The inode allocation strategy is extremely conservative.
|
|
.Pp
|
|
.Dv MINBSIZE
|
|
is the smallest allowable block size.
|
|
With a
|
|
.Dv MINBSIZE
|
|
of 4096
|
|
it is possible to create files of size
|
|
2^32 with only two levels of indirection.
|
|
.Dv MINBSIZE
|
|
must be big enough to hold a cylinder group block,
|
|
thus changes to
|
|
.Pq Fa struct cg
|
|
must keep its size within
|
|
.Dv MINBSIZE .
|
|
Note that super-blocks are never more than size
|
|
.Dv SBSIZE .
|
|
.Pp
|
|
The path name on which the file system is mounted is maintained in
|
|
.Fa fs_fsmnt .
|
|
.Dv MAXMNTLEN
|
|
defines the amount of space allocated in
|
|
the super-block for this name.
|
|
The limit on the amount of summary information per file system
|
|
is defined by
|
|
.Dv MAXCSBUFS.
|
|
For a 4096 byte block size, it is currently parameterized for a
|
|
maximum of two million cylinders.
|
|
.Pp
|
|
Per cylinder group information is summarized in blocks allocated
|
|
from the first cylinder group's data blocks.
|
|
These blocks are read in from
|
|
.Fa fs_csaddr
|
|
(size
|
|
.Fa fs_cssize )
|
|
in addition to the super-block.
|
|
.Pp
|
|
.Sy N.B.:
|
|
.Fn sizeof "struct csum"
|
|
must be a power of two in order for
|
|
the
|
|
.Fn fs_cs
|
|
macro to work.
|
|
.Pp
|
|
The
|
|
.Em "Super-block for a file system" :
|
|
The size of the rotational layout tables
|
|
is limited by the fact that the super-block is of size
|
|
.Dv SBSIZE .
|
|
The size of these tables is
|
|
.Em inversely
|
|
proportional to the block
|
|
size of the file system. The size of the tables is
|
|
increased when sector sizes are not powers of two,
|
|
as this increases the number of cylinders
|
|
included before the rotational pattern repeats
|
|
.Pq Fa fs_cpc .
|
|
The size of the rotational layout
|
|
tables is derived from the number of bytes remaining in
|
|
.Pq Fa struct fs .
|
|
.Pp
|
|
The number of blocks of data per cylinder group
|
|
is limited because cylinder groups are at most one block.
|
|
The inode and free block tables
|
|
must fit into a single block after deducting space for
|
|
the cylinder group structure
|
|
.Pq Fa struct cg .
|
|
.Pp
|
|
The
|
|
.Em Inode :
|
|
The inode is the focus of all file activity in the
|
|
.Tn UNIX
|
|
file system.
|
|
There is a unique inode allocated
|
|
for each active file,
|
|
each current directory, each mounted-on file,
|
|
text file, and the root.
|
|
An inode is `named' by its device/i-number pair.
|
|
For further information, see the include file
|
|
.Aq Pa sys/inode.h .
|
|
.Sh HISTORY
|
|
A super-block structure named filsys appeared in
|
|
.At v6 .
|
|
The file system described in this manual appeared
|
|
in
|
|
.Bx 4.2 .
|