freebsd-dev/usr.bin/catman/catman.c
Mark Murray bf7bec3366 Add C rewrites of catman and makewhatis. These aren't quite as
the submitter supplied them, as I did some WARNS=n fixups (mostly
const-ification).

Submitted by:	John Rochester <john@jrochester.org>
2002-05-18 09:19:08 +00:00

785 lines
18 KiB
C

/*-
* Copyright (c) 2002 John Rochester
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <ctype.h>
#include <dirent.h>
#include <err.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define DEFAULT_MANPATH "/usr/share/man"
#define TOP_LEVEL_DIR 0 /* signifies a top-level man directory */
#define MAN_SECTION_DIR 1 /* signifies a man section directory */
#define UNKNOWN 2 /* signifies an unclassifiable directory */
#define TEST_EXISTS 0x01
#define TEST_DIR 0x02
#define TEST_FILE 0x04
#define TEST_READABLE 0x08
#define TEST_WRITABLE 0x10
#define TEST_EXECUTABLE 0x20
static int verbose; /* -v flag: be verbose with warnings */
static int pretend; /* -n, -p flags: print out what would be done
instead of actually doing it */
static int force; /* -f flag: force overwriting all cat pages */
static int rm_junk; /* -r flag: remove garbage pages */
static char *locale; /* user's locale if -L is used */
static char *lang_locale; /* short form of locale */
static int exit_code; /* exit code to use when finished */
/*
* -T argument for nroff
*/
static const char *nroff_device = "ascii";
/*
* Mapping from locale to nroff device
*/
static const char *locale_device[] = {
"KOI8-R", "koi8-r",
"ISO8859-1", "latin1",
"ISO_8859-1", "latin1",
"ISO8859-15", "latin1",
"ISO_8859-15", "latin1",
NULL
};
static uid_t uid;
static gid_t gids[NGROUPS_MAX];
static int ngids;
static int starting_dir;
static char tmp_file[MAXPATHLEN];
struct stat test_st;
/*
* A hashtable is an array of chains composed of this entry structure.
*/
struct hash_entry {
ino_t inode_number;
dev_t device_number;
const char *data;
struct hash_entry *next;
};
#define HASHTABLE_ALLOC 16384 /* allocation for hashtable (power of 2) */
#define HASH_MASK (HASHTABLE_ALLOC - 1)
static struct hash_entry *visited[HASHTABLE_ALLOC];
static struct hash_entry *links[HASHTABLE_ALLOC];
/*
* Inserts a string into a hashtable keyed by inode & device number.
*/
static void
insert_hashtable(struct hash_entry **table,
ino_t inode_number,
dev_t device_number,
const char *data)
{
struct hash_entry *new_entry;
struct hash_entry **chain;
new_entry = (struct hash_entry *) malloc(sizeof(struct hash_entry));
if (new_entry == NULL)
err(1, "can't insert into hashtable");
chain = &table[inode_number & HASH_MASK];
new_entry->inode_number = inode_number;
new_entry->device_number = device_number;
new_entry->data = data;
new_entry->next = *chain;
*chain = new_entry;
}
/*
* Finds a string in a hashtable keyed by inode & device number.
*/
static const char *
find_hashtable(struct hash_entry **table,
ino_t inode_number,
dev_t device_number)
{
struct hash_entry *chain;
chain = table[inode_number & HASH_MASK];
while (chain != NULL) {
if (chain->inode_number == inode_number &&
chain->device_number == device_number)
return chain->data;
chain = chain->next;
}
return NULL;
}
static void
trap_signal(int sig __unused)
{
if (tmp_file[0] != '\0')
unlink(tmp_file);
exit(1);
}
/*
* Deals with junk files in the man or cat section directories.
*/
static void
junk(const char *mandir, const char *name, const char *reason)
{
if (verbose)
fprintf(stderr, "%s/%s: %s\n", mandir, name, reason);
if (rm_junk) {
fprintf(stderr, "rm %s/%s\n", mandir, name);
if (!pretend && unlink(name) < 0)
warn("%s/%s", mandir, name);
}
}
/*
* Returns TOP_LEVEL_DIR for .../man, MAN_SECTION_DIR for .../manXXX,
* and UNKNOWN for everything else.
*/
static int
directory_type(char *dir)
{
char *p;
for (;;) {
p = strrchr(dir, '/');
if (p == NULL || p[1] != '\0')
break;
*p = '\0';
}
if (p == NULL)
p = dir;
else
p++;
if (strncmp(p, "man", 3) == 0) {
p += 3;
if (*p == '\0')
return TOP_LEVEL_DIR;
while (isalnum(*p) || *p == '_') {
if (*++p == '\0')
return MAN_SECTION_DIR;
}
}
return UNKNOWN;
}
/*
* Tests whether the given file name (without a preceding path)
* is a proper man page name (like "mk-amd-map.8.gz").
* Only alphanumerics and '_' are allowed after the last '.' and
* the last '.' can't be the first or last characters.
*/
static int
is_manpage_name(char *name)
{
char *lastdot = NULL;
char *n = name;
while (*n != '\0') {
if (!isalnum(*n)) {
switch (*n) {
case '_':
break;
case '-':
case '+':
case '[':
case ':':
lastdot = NULL;
break;
case '.':
lastdot = n;
break;
default:
return 0;
}
}
n++;
}
return lastdot > name && lastdot + 1 < n;
}
static int
is_gzipped(char *name)
{
int len = strlen(name);
return len >= 4 && strcmp(&name[len - 3], ".gz") == 0;
}
/*
* Converts manXXX to catXXX.
*/
static char *
get_cat_section(char *section)
{
char *cat_section;
cat_section = strdup(section);
strncpy(cat_section, "cat", 3);
return cat_section;
}
/*
* Converts .../man/manXXX to .../man.
*/
static char *
get_mandir(char *section)
{
char *slash;
char *mandir;
slash = strrchr(section, '/');
mandir = (char *) malloc(slash - section + 1);
strncpy(mandir, section, slash - section);
mandir[slash - section] = '\0';
return mandir;
}
/*
* Tests to see if the given directory has already been visited.
*/
static int
already_visited(char *mandir, char *dir, int count_visit)
{
struct stat st;
if (stat(dir, &st) < 0) {
if (mandir != NULL)
warn("%s/%s", mandir, dir);
else
warn("%s", dir);
exit_code = 1;
return 1;
}
if (find_hashtable(visited, st.st_ino, st.st_dev) != NULL) {
if (mandir != NULL)
warnx("already visited %s/%s", mandir, dir);
else
warnx("already visited %s", dir);
return 1;
}
if (count_visit)
insert_hashtable(visited, st.st_ino, st.st_dev, "");
return 0;
}
/*
* Returns a set of TEST_* bits describing a file's type and permissions.
* If mod_time isn't NULL, it will contain the file's modification time.
*/
static int
test_path(char *name, time_t *mod_time)
{
int result;
if (stat(name, &test_st) < 0)
return 0;
result = TEST_EXISTS;
if (mod_time != NULL)
*mod_time = test_st.st_mtime;
if (S_ISDIR(test_st.st_mode))
result |= TEST_DIR;
else if (S_ISREG(test_st.st_mode))
result |= TEST_FILE;
if (test_st.st_uid == uid) {
test_st.st_mode >>= 6;
} else {
int i;
for (i = 0; i < ngids; i++) {
if (test_st.st_gid == gids[i]) {
test_st.st_mode >>= 3;
break;
}
}
}
if (test_st.st_mode & S_IROTH)
result |= TEST_READABLE;
if (test_st.st_mode & S_IWOTH)
result |= TEST_WRITABLE;
if (test_st.st_mode & S_IXOTH)
result |= TEST_EXECUTABLE;
return result;
}
/*
* Checks whether a file is a symbolic link.
*/
static int
is_symlink(char *path)
{
struct stat st;
return lstat(path, &st) >= 0 && S_ISLNK(st.st_mode);
}
/*
* Tests to see if the given directory can be written to.
*/
static void
check_writable(char *mandir)
{
if (verbose && !(test_path(mandir, NULL) & TEST_WRITABLE))
fprintf(stderr, "%s: not writable - will only be able to write to existing cat directories\n", mandir);
}
/*
* If the directory exists, attempt to make it writable, otherwise
* attempt to create it.
*/
static int
make_writable_dir(char *mandir, char *dir)
{
int test;
if ((test = test_path(dir, NULL)) != 0) {
if (!(test & TEST_WRITABLE) && chmod(dir, 0755) < 0) {
warn("%s/%s: chmod", mandir, dir);
exit_code = 1;
return 0;
}
} else {
if (verbose || pretend)
fprintf(stderr, "mkdir %s\n", dir);
if (!pretend) {
unlink(dir);
if (mkdir(dir, 0755) < 0) {
warn("%s/%s: mkdir", mandir, dir);
exit_code = 1;
return 0;
}
}
}
return 1;
}
/*
* Processes a single man page source by using nroff to create
* the preformatted cat page.
*/
static void
process_page(char *mandir, char *src, char *cat, int src_gzipped)
{
int src_test, cat_test;
time_t src_mtime, cat_mtime;
char cmd[MAXPATHLEN];
dev_t src_dev;
ino_t src_ino;
const char *link_name;
src_test = test_path(src, &src_mtime);
if (!(src_test & (TEST_FILE|TEST_READABLE))) {
if (!(src_test & TEST_DIR)) {
warnx("%s/%s: unreadable", mandir, src);
exit_code = 1;
if (rm_junk && is_symlink(src))
junk(mandir, src, "bogus symlink");
}
return;
}
src_dev = test_st.st_dev;
src_ino = test_st.st_ino;
cat_test = test_path(cat, &cat_mtime);
if (cat_test & (TEST_FILE|TEST_READABLE)) {
if (!force && cat_mtime >= src_mtime) {
if (verbose) {
fprintf(stderr, "\t%s/%s: up to date\n",
mandir, src);
}
return;
}
}
/*
* Is the man page a link to one we've already processed?
*/
if ((link_name = find_hashtable(links, src_ino, src_dev)) != NULL) {
if (verbose || pretend) {
fprintf(stderr, "%slink %s -> %s\n",
verbose ? "\t" : "", cat, link_name);
}
if (!pretend)
link(link_name, cat);
return;
}
insert_hashtable(links, src_ino, src_dev, strdup(cat));
if (verbose || pretend) {
fprintf(stderr, "%sformat %s -> %s\n",
verbose ? "\t" : "", src, cat);
if (pretend)
return;
}
snprintf(tmp_file, sizeof tmp_file, "%s.tmp", cat);
snprintf(cmd, sizeof cmd,
"%scat %s | tbl | nroff -T%s -man | col | gzip -cn > %s.tmp",
src_gzipped ? "z" : "", src, nroff_device, cat);
if (system(cmd) != 0)
err(1, "formatting pipeline");
if (rename(tmp_file, cat) < 0)
warn("%s", cat);
tmp_file[0] = '\0';
}
/*
* Scan the man section directory for pages and process each one,
* then check for junk in the corresponding cat section.
*/
static void
scan_section(char *mandir, char *section, char *cat_section)
{
struct dirent **entries;
char **expected = NULL;
int npages;
int nexpected = 0;
int i, e;
char *page_name;
char page_path[MAXPATHLEN];
char cat_path[MAXPATHLEN];
char gzip_path[MAXPATHLEN];
/*
* scan the man section directory for pages
*/
npages = scandir(section, &entries, NULL, alphasort);
if (npages < 0) {
warn("%s/%s", mandir, section);
exit_code = 1;
return;
}
if (verbose || rm_junk) {
/*
* Maintain a list of all cat pages that should exist,
* corresponding to existing man pages.
*/
expected = (char **) calloc(npages, sizeof(char *));
}
for (i = 0; i < npages; free(entries[i++])) {
page_name = entries[i]->d_name;
snprintf(page_path, sizeof page_path, "%s/%s", section,
page_name);
if (!is_manpage_name(page_name)) {
if (!(test_path(page_path, NULL) & TEST_DIR)) {
junk(mandir, page_path,
"invalid man page name");
}
continue;
}
if (is_gzipped(page_name)) {
snprintf(cat_path, sizeof cat_path, "%s/%s",
cat_section, page_name);
if (expected != NULL)
expected[nexpected++] = strdup(page_name);
process_page(mandir, page_path, cat_path, 1);
} else {
/*
* We've got an uncompressed man page,
* check to see if there's a (preferred)
* compressed one.
*/
snprintf(gzip_path, sizeof gzip_path, "%s.gz",
page_path);
if (test_path(gzip_path, NULL) != 0) {
junk(mandir, page_path,
"man page unused due to existing .gz");
} else {
if (verbose) {
fprintf(stderr,
"warning, %s is uncompressed\n",
page_path);
}
snprintf(cat_path, sizeof cat_path, "%s/%s.gz",
cat_section, page_name);
if (expected != NULL) {
asprintf(&expected[nexpected++],
"%s.gz", page_name);
}
process_page(mandir, page_path, cat_path, 0);
}
}
}
free(entries);
if (expected == NULL)
return;
/*
* scan cat sections for junk
*/
npages = scandir(cat_section, &entries, NULL, alphasort);
e = 0;
for (i = 0; i < npages; free(entries[i++])) {
const char *junk_reason;
int cmp = 1;
page_name = entries[i]->d_name;
if (strcmp(page_name, ".") == 0 || strcmp(page_name, "..") == 0)
continue;
/*
* Keep the index into the expected cat page list
* ahead of the name we've found.
*/
while (e < nexpected &&
(cmp = strcmp(page_name, expected[e])) > 0)
free(expected[e++]);
if (cmp == 0)
continue;
/* we have an unexpected page */
if (!is_manpage_name(page_name)) {
junk_reason = "invalid cat page name";
} else if (!is_gzipped(page_name) && e + 1 < nexpected &&
strncmp(page_name, expected[e + 1], strlen(page_name)) == 0 &&
strlen(expected[e + 1]) == strlen(page_name) + 3) {
junk_reason = "cat page unused due to existing .gz";
} else
junk_reason = "cat page without man page";
snprintf(cat_path, sizeof cat_path, "%s/%s", cat_section,
page_name);
junk(mandir, cat_path, junk_reason);
}
free(entries);
while (e < nexpected)
free(expected[e++]);
free(expected);
}
/*
* Processes a single man section.
*/
static void
process_section(char *mandir, char *section)
{
char *cat_section;
if (already_visited(mandir, section, 1))
return;
if (verbose)
fprintf(stderr, " section %s\n", section);
cat_section = get_cat_section(section);
if (make_writable_dir(mandir, cat_section))
scan_section(mandir, section, cat_section);
}
static int
select_sections(struct dirent *entry)
{
return directory_type(entry->d_name) == MAN_SECTION_DIR;
}
/*
* Processes a single top-level man directory. If section isn't NULL,
* it will only process that section sub-directory, otherwise it will
* process all of them.
*/
static void
process_mandir(char *dir_name, char *section)
{
fchdir(starting_dir);
if (already_visited(NULL, dir_name, section == NULL))
return;
check_writable(dir_name);
if (verbose)
fprintf(stderr, "man directory %s\n", dir_name);
if (pretend)
fprintf(stderr, "cd %s\n", dir_name);
if (chdir(dir_name) < 0) {
warn("%s: chdir", dir_name);
exit_code = 1;
return;
}
if (section != NULL) {
process_section(dir_name, section);
} else {
struct dirent **entries;
int nsections;
int i;
nsections = scandir(".", &entries, select_sections, alphasort);
if (nsections < 0) {
warn("%s", dir_name);
exit_code = 1;
return;
}
for (i = 0; i < nsections; i++) {
process_section(dir_name, entries[i]->d_name);
free(entries[i]);
}
free(entries);
}
}
/*
* Processes one argument, which may be a colon-separated list of
* directories.
*/
static void
process_argument(const char *arg)
{
char *dir;
char *mandir;
char *parg;
parg = strdup(arg);
if (parg == NULL)
err(1, "out of memory");
while ((dir = strsep(&parg, ":")) != NULL) {
switch (directory_type(dir)) {
case TOP_LEVEL_DIR:
if (locale != NULL) {
asprintf(&mandir, "%s/%s", dir, locale);
process_mandir(mandir, NULL);
free(mandir);
if (lang_locale != NULL) {
asprintf(&mandir, "%s/%s", dir,
lang_locale);
process_mandir(mandir, NULL);
free(mandir);
}
} else {
process_mandir(dir, NULL);
}
break;
case MAN_SECTION_DIR: {
mandir = get_mandir(dir);
process_mandir(mandir, dir);
break;
}
default:
warnx("%s: directory name not in proper man form", dir);
exit_code = 1;
}
}
free(parg);
}
static void
determine_locale(void)
{
char *sep;
locale = getenv("LC_ALL");
if (locale == NULL)
locale = getenv("LC_CTYPE");
if (locale == NULL)
locale = getenv("LANG");
if (locale == NULL) {
warnx("-L option used, but no locale in environment\n");
return;
}
sep = strchr(locale, '_');
if (sep != NULL && isupper(sep[1]) && isupper(sep[2])) {
asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]);
}
sep = strchr(locale, '.');
if (sep != NULL) {
int i;
sep++;
for (i = 0; locale_device[i] != NULL; i += 2) {
if (strcmp(sep, locale_device[i]) == 0) {
nroff_device = locale_device[i + 1];
break;
}
}
}
if (verbose)
fprintf(stderr, "nroff device is %s\n", nroff_device);
}
static void
usage(void)
{
fprintf(stderr, "usage: %s [-fLnrv] [directories...]\n", getprogname());
exit(1);
}
int
main(int argc, char **argv)
{
int opt;
extern int optind;
if ((uid = getuid()) == 0) {
fprintf(stderr, "don't run %s as root, use:\n echo", argv[0]);
for (optind = 0; optind < argc; optind++) {
fprintf(stderr, " %s", argv[optind]);
}
fprintf(stderr, " | nice -5 su -m man\n");
exit(1);
}
while ((opt = getopt(argc, argv, "vnfLrh")) != -1) {
switch (opt) {
case 'f':
force++;
break;
case 'L':
determine_locale();
break;
case 'n':
pretend++;
break;
case 'r':
rm_junk++;
break;
case 'v':
verbose++;
break;
default:
usage();
/* NOTREACHED */
}
}
ngids = getgroups(NGROUPS_MAX, gids);
if ((starting_dir = open(".", 0)) < 0) {
err(1, ".");
}
umask(022);
signal(SIGINT, trap_signal);
signal(SIGHUP, trap_signal);
signal(SIGQUIT, trap_signal);
signal(SIGTERM, trap_signal);
if (optind == argc) {
const char *manpath = getenv("MANPATH");
if (manpath == NULL)
manpath = DEFAULT_MANPATH;
process_argument(manpath);
} else {
while (optind < argc)
process_argument(argv[optind++]);
}
exit(exit_code);
}