optimized search algorithm

faster IO due mmap(2) [-m | -s]
better error check for damaged databases
support for databases in network byte order (SunOS/sparc)
optional case insensitve search [-i]
optional multiple databases
optional multiple pattern
new enviroment variable LOCATE_PATH for database(s)
[-S] print some statistic about the database
[-l number] limit output to number file names
[-c] suppress normal output; instead print a count of matching file names
This commit is contained in:
Wolfram Schneider 1996-08-31 23:14:54 +00:00
parent 7ae26f1450
commit dbaf42882f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=17980
5 changed files with 957 additions and 121 deletions

View File

@ -1,12 +1,15 @@
# @(#)Makefile 8.1 (Berkeley) 6/6/93
# $Id: Makefile,v 1.3 1996/04/25 15:54:22 wosch Exp wosch $
# $Id: Makefile,v 1.1 1996/08/29 22:39:41 wosch Exp wosch $
PROG= locate
SRCS= util.c locate.c
CFLAGS+= -I. -DMMAP -O2 # -DDEBUG
MAN1= locate.1
MAN8= locate.updatedb.8
SCRIPTS= updatedb mklocatedb concatdb
MLINKS+= locate.updatedb.8 updatedb.8
beforeinstall:
.for script in ${SCRIPTS}
${INSTALL} -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \

View File

@ -0,0 +1,280 @@
/*
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* James A. Woods.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: fastfind.c,v 1.2 1996/08/29 22:39:41 wosch Exp wosch $
*/
#ifndef _LOCATE_STATISTIC_
#define _LOCATE_STATISTIC_
void
statistic (fp, path_fcodes)
FILE *fp; /* open database */
char *path_fcodes; /* for error message */
{
register int lines, chars, size, big;
register u_char *p, *s;
register int c;
int count;
u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
p[c] = check_bigram_char(getc(fp));
s[c] = check_bigram_char(getc(fp));
}
lines = chars = big = 0;
size = NBG + NBG;
for (c = getc(fp), count = 0; c != EOF; size++) {
if (c == SWITCH) {
count += getwf(fp) - OFFSET;
size += sizeof(int);
} else
count += c - OFFSET;
for (p = path + count; (c = getc(fp)) > SWITCH; size++)
if (c < PARITY)
p++;
else {
big++;
p += 2;
}
p++;
lines++;
chars += (p - path);
}
(void)printf("\nDatabase: %s\n", path_fcodes);
(void)printf("Compression: Front: %2.2f%%, ",
(float)(100 * (size + big)) / chars);
(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size);
(void)printf("Total: %2.2f%%\n", (float)(100 * size) / chars);
(void)printf("Filenames: %d, ", lines);
(void)printf("Chars: %d\n", chars);
(void)printf("Database size: %d, ", size);
(void)printf("Bigram chars: %d\n", big);
}
#endif /* _LOCATE_STATISTIC_ */
void
#ifdef FF_MMAP
#ifdef FF_ICASE
fastfind_mmap_icase
#else
fastfind_mmap
#endif
(pathpart, paddr, len, database)
char *pathpart; /* search string */
caddr_t paddr; /* mmap pointer */
int len; /* length of database */
char *database; /* for error message */
#else /* MMAP */
#ifdef FF_ICASE
fastfind_icase
#else /* !FF_ICASE */
fastfind
#endif /* FF_ICASE */
(fp, pathpart, database)
FILE *fp; /* open database */
char *pathpart; /* search string */
char *database; /* for error message */
#endif /* MMAP */
{
register u_char *p, *s, *patend, *q, *foundchar;
register int c, cc;
int count, found, globflag;
u_char *cutoff;
u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
#ifdef FF_ICASE
/* use a lookup table for case insensitive search */
u_char table[UCHAR_MAX];
tolower_word(pathpart);
#endif
/* init bigram table */
#ifdef FF_MMAP
if (len < (2*NBG)) {
(void)fprintf(stderr, "database to small: %s\n", database);
exit(1);
}
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
p[c] = check_bigram_char(*paddr++);
s[c] = check_bigram_char(*paddr++);
}
#else
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
p[c] = check_bigram_char(getc(fp));
s[c] = check_bigram_char(getc(fp));
}
#endif
/* find optimal (last) char for searching */
p = pathpart;
globflag = index(p, '*') || index(p, '?') || index(p, '[');
patend = patprep(p);
cc = *patend;
#ifdef FF_ICASE
/* set patend char to true */
table[TOLOWER(*patend)] = 1;
table[toupper(*patend)] = 1;
#endif
/* main loop */
found = count = 0;
foundchar = 0;
#ifdef FF_MMAP
for (c = (u_char)*paddr++; len-- > 0; ) {
#else
for (c = getc(fp); c != EOF; ) {
#endif
/* go forward or backward */
if (c == SWITCH) { /* big step, an integer */
#ifdef FF_MMAP
count += getwm(paddr) - OFFSET;
len -= INTSIZE; paddr += INTSIZE;
#else
count += getwf(fp) - OFFSET;
#endif
} else { /* slow step, =< 14 chars */
count += c - OFFSET;
}
/* overlay old path */
p = path + count;
foundchar = p - 1;
#ifdef FF_MMAP
for (; (c = (u_char)*paddr++) > SWITCH; len--)
#else
for (; (c = getc(fp)) > SWITCH; )
#endif
if (c < PARITY) {
#ifdef FF_ICASE
if (table[c])
#else
if (c == cc)
#endif
foundchar = p;
*p++ = c;
}
else {
/* bigrams are parity-marked */
TO7BIT(c);
#ifndef FF_ICASE
if (bigram1[c] == cc ||
bigram2[c] == cc)
#else
if (table[bigram1[c]] ||
table[bigram2[c]])
#endif
foundchar = p + 1;
*p++ = bigram1[c];
*p++ = bigram2[c];
}
if (found) { /* previous line matched */
cutoff = path;
*p-- = '\0';
foundchar = p;
} else if (foundchar >= path + count) { /* a char matched */
*p-- = '\0';
cutoff = path + count;
} else /* nothing to do */
continue;
found = 0;
for (s = foundchar; s >= cutoff; s--) {
if (*s == cc
#ifdef FF_ICASE
|| TOLOWER(*s) == cc
#endif
) { /* fast first char check */
for (p = patend - 1, q = s - 1; *p != '\0';
p--, q--)
if (*q != *p
#ifdef FF_ICASE
&& TOLOWER(*q) != *p
#endif
)
break;
if (*p == '\0') { /* fast match success */
found = 1;
if (!globflag || !fnmatch(pathpart, path, 0)) {
if (f_silent)
counter++;
else if (f_limit) {
counter++;
if (f_limit >= counter)
(void)puts(path);
else {
(void)fprintf(stderr, "[show only %d lines]\n", counter - 1);
exit(0);
}
} else
(void)puts(path);
}
break;
}
}
}
}
}

View File

@ -1,3 +1,4 @@
.\" Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
@ -30,21 +31,26 @@
.\" SUCH DAMAGE.
.\"
.\" @(#)locate.1 8.1 (Berkeley) 6/6/93
.\" $Id$
.\"
.Dd June 6, 1993
.Dt LOCATE 1
.Os BSD 4.4
.Sh NAME
.Nm locate
.Nd find files
.Nd find filenames quickly
.Sh SYNOPSIS
.Ar locate
pattern
.Nm
.Op Fl Scims
.Op Fl l Ar limit
.Op Fl d Ar database
pattern ...
.Sh DESCRIPTION
.Nm Locate
searches a database for all pathnames which match the specified
.Ar pattern .
The database is recomputed periodically, and contains the pathnames
The database is recomputed periodically (usually weekly or daily),
and contains the pathnames
of all files which are publicly accessible.
.Pp
Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``[''
@ -59,12 +65,95 @@ including slashes (``/'').
.Pp
As a special case, a pattern containing no globbing characters (``foo'')
is matched as though it were ``*foo*''.
The following options are available:
.Bl -tag -width 10n indent
.It Fl S
Print some statistic about the database and exit.
.It Fl c
Suppress normal output; instead print a count of matching file names.
.It Fl d Ar database
Search in
.Ar database
instead the default file name database.
Multiple
.Fl d
options are allowed. Each additional
.Fl d
option adds the specified database to the list
of databases to be searched.
.Ar database
may be a colon-separated list of databases. A single colon is a reference
to the default database.
$ locate -d $HOME/lib/mydb: foo
will first search string ``foo'' in
.Pa $HOME/lib/mydb
and then in
.Pa /var/db/locate.database .
$ locate -d $HOME/lib/mydb::/cdrom/locate.database foo
will first search string ``foo'' in
.Pa $HOME/lib/mydb
and then in
.Pa /var/db/locate.database
and then in
.Pa /cdrom/locate.database .
``$ locate -d db1 -d db2 -d db3 pattern'' is the same as
``$ locate -d db1:db2:db3 pattern'' or
``$ locate -d db1:db2 -d db3 pattern''.
If
.Ar -
is given as the database name, standard input will be read instead.
For example, you can compress your database
and use:
$ zcat database.gz | locate -d - pattern
This might be useful on machines with a fast CPU and little RAM and slow
I/O. Note: you can only use
.Ar one
pattern for stdin.
.It Fl i
Ignore case distinctions in both the pattern and the database.
.It Fl l Ar number
Limit output to
.Ar number
of file names and exit.
.It Fl m
Use
.Xr mmap 2
instead of the
.Xr stdio 3
library. This is the default behavior. Usually faster in most cases.
.It Fl s
Use the
.Xr stdio 3
library instead of
.Xr mmap 2 .
.Sh FILES
.Bl -tag -width /usr/libexec/locate.updatedb -compact
.It Pa /var/db/locate.database
The actual database
locate database
.It Pa /usr/libexec/locate.updatedb
Script to update the locate database
.It Pa /etc/weekly
Script that usually starts the database rebuild
.El
.Sh ENVIRONMENT
.Bl -tag -width LOCATE_PATH -compact
.It Pa LOCATE_PATH
path to the locate database if set and not empty, ignored if the
.Fl d
option was specified.
.El
.Sh SEE ALSO
.Xr find 1 ,
@ -79,17 +168,48 @@ Script to update the locate database
.%P pp. 8-10
.Re
.Sh BUGS
.Nm Locate
.Nm
may fail to list some files that are present, or may
to list files that have been removed from the system. This is because
locate only reports files that are present in the database, which is
typically only regenerated once a week by the
.Nm /etc/weekly
.Pa /etc/weekly
script. Use
.Xr find 1
to locate files that are of a more transitory nature.
.Nm
database was built by user
.Dq nobody .
.Xr find 1
skip directories,
which are not readable for user
.Dq nobody ,
group
.Dq nobody ,
or
world. E.g. if your HOME directory ist not world-readable, all your
files are
.Ar not
in the database.
The
.Nm
database is not byte order independ. It is not possible
to share the databases between machines with different byte order.
The current
.Nm
implementation understand databases in host byte order or
network byte order. So you can read on a FreeBSD/i386 machine
(little endian)
a locate database which was built on SunOS/sparc machine
(big endian, net).
.Sh HISTORY
The
.Nm locate
command appears in
.Bx 4.4 .
Many new features were
added in
.Fx 2.2 .

View File

@ -1,6 +1,7 @@
/*
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* James A. Woods.
@ -15,8 +16,8 @@
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@ -32,16 +33,19 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: locate.c,v 1.3 1996/08/29 22:39:41 wosch Exp wosch $
*/
#ifndef lint
static char copyright[] =
"@(#) Copyright (c) 1989, 1993\n\
The Regents of the University of California. All rights reserved.\n";
"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\
@(#) Copyright (c) 1989, 1993\n\
The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
/*
@ -54,10 +58,10 @@ static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
*
* The codes are:
*
* 0-28 likeliest differential counts + offset to make nonnegative
* 30 switch code for out-of-range count to follow in next word
* 128-255 bigram codes (128 most common, as determined by 'updatedb')
* 32-127 single character (printable) ascii residue (ie, literal)
* 0-28 likeliest differential counts + offset to make nonnegative
* 30 switch code for out-of-range count to follow in next word
* 128-255 bigram codes (128 most common, as determined by 'updatedb')
* 32-127 single character (printable) ascii residue (ie, literal)
*
* A novel two-tiered string search technique is employed:
*
@ -72,129 +76,291 @@ static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
*/
#include <sys/param.h>
#include <fnmatch.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#ifdef MMAP
# include <sys/types.h>
# include <sys/stat.h>
# include <sys/mman.h>
# include <fcntl.h>
#endif
#include <err.h>
#ifdef sun
#include <netinet/in.h> /* SunOS byteorder(3) htohl(3) */
#ifndef __P
#define __P(x) x
#endif
#endif
#include "locate.h"
#include "pathnames.h"
FILE *fp;
#ifdef DEBUG
# include <sys/time.h>
# include <sys/types.h>
# include <sys/resource.h>
#endif
char *path_fcodes; /* locate database */
int f_mmap; /* use mmap */
int f_icase; /* ignore case */
int f_stdin; /* read database from stdin */
int f_statistic; /* print statistic */
int f_silent; /* suppress output, show only count of matches */
int f_limit; /* limit number of output lines, 0 == infinite */
u_int counter; /* counter for matches [-c] */
void usage __P((void));
void statistic __P((FILE *, char *));
void fastfind __P((FILE *, char *, char *));
void fastfind_icase __P((FILE *, char *, char *));
void fastfind_mmap __P((char *, caddr_t, int, char *));
void fastfind_mmap_icase __P((char *, caddr_t, int, char *));
void search_mmap __P((char *, char **));
void search_fopen __P((char *, char **));
unsigned long cputime __P((void));
extern char **colon __P((char **, char*, char*));
extern void print_matches __P((u_int));
extern int getwm __P((caddr_t));
extern int getwf __P((FILE *));
extern u_char *tolower_word __P((u_char *));
extern int check_bigram_char __P((int));
extern char *patprep __P((char *));
extern char *optarg;
extern int optind;
void fastfind __P((char *pathpart));
char *patprep __P((char *name));
int
main(argc, argv)
int argc;
char *argv[];
int argc;
char **argv;
{
if (argc != 2) {
(void)fprintf(stderr, "usage: locate pattern\n");
exit(1);
}
if (!(fp = fopen(_PATH_FCODES, "r"))) {
(void)fprintf(stderr, "locate: no database file %s.\n",
_PATH_FCODES);
exit(1);
}
while (*++argv)
fastfind(*argv);
exit(0);
register int ch;
char **dbv = NULL;
#ifdef MMAP
f_mmap = 1; /* mmap is default */
#endif
while ((ch = getopt(argc, argv, "Scd:il:ms")) != EOF)
switch(ch) {
case 'S': /* statistic lines */
f_statistic = 1;
break;
case 'l': /* limit number of output lines, 0 == infinite */
f_limit = atoi(optarg);
break;
case 'd': /* database */
dbv = colon(dbv, optarg, _PATH_FCODES);
break;
case 'i': /* ignore case */
f_icase = 1;
break;
case 'm': /* mmap */
#ifdef MMAP
f_mmap = 1;
#else
(void)fprintf(stderr, "mmap(2) not implemented\n");
#endif
break;
case 's': /* stdio lib */
f_mmap = 0;
break;
case 'c': /* suppress output, show only count of matches */
f_silent = 1;
break;
default:
usage();
}
argv += optind;
argc -= optind;
/* to few arguments */
if (argc < 1 && !(f_statistic))
usage();
/* no (valid) database as argument */
if (dbv == NULL || *dbv == NULL) {
/* try to read database from enviroment */
if ((path_fcodes = getenv("LOCATE_PATH")) == NULL ||
*path_fcodes == '\0')
/* use default database */
dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES);
else /* $LOCATE_PATH */
dbv = colon(dbv, path_fcodes, _PATH_FCODES);
}
if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */
for (ch = 0; ch <= UCHAR_MAX; ch++)
myctype[ch] = tolower(ch);
/* foreach database ... */
while((path_fcodes = *dbv) != NULL) {
dbv++;
if (!strcmp(path_fcodes, "-"))
f_stdin = 1;
else
f_stdin = 0;
#ifndef MMAP
f_mmap = 0; /* be paranoid */
#endif
if (!f_mmap || f_stdin || f_statistic)
search_fopen(path_fcodes, argv);
else
search_mmap(path_fcodes, argv);
}
if (f_silent)
print_matches(counter);
exit(0);
}
void
fastfind(pathpart)
char *pathpart;
search_fopen(db, s)
char *db; /* database */
char **s; /* search strings */
{
register char *p, *s;
register int c;
int count, found, globflag;
char *cutoff, *patend, *q;
char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
FILE *fp;
#ifdef DEBUG
long t0;
#endif
/* can only read stdin once */
if (f_stdin) {
fp = stdin;
if (*(s+1) != NULL) {
(void)fprintf(stderr,
"read database from stdin, use only");
(void)fprintf(stderr, " `%s' as pattern\n", *s);
*(s+1) = NULL;
}
}
else if ((fp = fopen(path_fcodes, "r")) == NULL)
err(1, "`%s'", path_fcodes);
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
p[c] = getc(fp), s[c] = getc(fp);
p = pathpart;
globflag = index(p, '*') || index(p, '?') || index(p, '[');
patend = patprep(p);
found = 0;
for (c = getc(fp), count = 0; c != EOF;) {
count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
/* overlay old path */
for (p = path + count; (c = getc(fp)) > SWITCH;)
if (c < PARITY)
*p++ = c;
else { /* bigrams are parity-marked */
c &= PARITY - 1;
*p++ = bigram1[c], *p++ = bigram2[c];
}
*p-- = NULL;
cutoff = (found ? path : path + count);
for (found = 0, s = p; s >= cutoff; s--)
if (*s == *patend) { /* fast first char check */
for (p = patend - 1, q = s - 1; *p != NULL;
p--, q--)
if (*q != *p)
break;
if (*p == NULL) { /* fast match success */
found = 1;
if (!globflag ||
!fnmatch(pathpart, path, 0))
(void)printf("%s\n", path);
break;
}
}
/* count only chars or lines */
if (f_statistic) {
statistic(fp, path_fcodes);
(void)fclose(fp);
return;
}
/* foreach search string ... */
while(*s != NULL) {
#ifdef DEBUG
t0 = cputime();
#endif
if (!f_stdin &&
fseek(fp, (long)0, SEEK_SET) == -1)
err(1, "fseek to begin of ``%s''\n", path_fcodes);
if (f_icase)
fastfind_icase(fp, *s, path_fcodes);
else
fastfind(fp, *s, path_fcodes);
#ifdef DEBUG
(void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
#endif
s++;
}
(void)fclose(fp);
}
#ifdef MMAP
void
search_mmap(db, s)
char *db; /* database */
char **s; /* search strings */
{
struct stat sb;
int fd;
caddr_t p;
off_t len;
#ifdef DEBUG
long t0;
#endif
if ((fd = open(path_fcodes, O_RDONLY)) == -1 ||
fstat(fd, &sb) == -1)
err(1, "`%s'", path_fcodes);
len = sb.st_size;
if ((p = mmap((caddr_t)0, (size_t)len,
PROT_READ, MAP_SHARED,
fd, (off_t)0)) == (caddr_t)-1)
err(1, "mmap ``%s''", path_fcodes);
/* foreach search string ... */
while (*s != NULL) {
#ifdef DEBUG
t0 = cputime();
#endif
if (f_icase)
fastfind_mmap_icase(*s, p, (int)len, path_fcodes);
else
fastfind_mmap(*s, p, (int)len, path_fcodes);
#ifdef DEBUG
(void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
#endif
s++;
}
if (munmap(p, (size_t)len) == -1)
warn("munmap %s\n", path_fcodes);
(void)close(fd);
}
#endif /* MMAP */
#ifdef DEBUG
unsigned long
cputime ()
{
struct rusage rus;
getrusage(0, &rus);
return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000);
}
#endif /* DEBUG */
void
usage ()
{
(void)fprintf(stderr, "usage: locate [-Scims] [-l limit] ");
(void)fprintf(stderr, "[-d database] pattern ...\n\n");
(void)fprintf(stderr, "default database: `%s' or $LOCATE_PATH\n",
_PATH_FCODES);
exit(1);
}
/*
* extract last glob-free subpattern in name for fast pre-match; prepend
* '\0' for backwards match; return end of new pattern
*/
static char globfree[100];
char *
patprep(name)
char *name;
{
register char *endmark, *p, *subp;
/* load fastfind functions */
subp = globfree;
*subp++ = '\0';
p = name + strlen(name) - 1;
/* skip trailing metacharacters (and [] ranges) */
for (; p >= name; p--)
if (index("*?", *p) == 0)
break;
if (p < name)
p = name;
if (*p == ']')
for (p--; p >= name; p--)
if (*p == '[') {
p--;
break;
}
if (p < name)
p = name;
/*
* if pattern has only metacharacters, check every path (force '/'
* search)
*/
if ((p == name) && index("?*[]", *p) != 0)
*subp++ = '/';
else {
for (endmark = p; p >= name; p--)
if (index("]*?", *p) != 0)
break;
for (++p;
(p <= endmark) && subp < (globfree + sizeof(globfree));)
*subp++ = *p++;
}
*subp = '\0';
return(--subp);
}
/* statistic */
/* fastfind_mmap, fastfind_mmap_icase */
#ifdef MMAP
#undef FF_MMAP
#undef FF_ICASE
#define FF_MMAP
#include <fastfind.c>
#define FF_ICASE
#include <fastfind.c>
#endif /* MMAP */
/* fopen */
/* fastfind, fastfind_icase */
#undef FF_MMAP
#undef FF_ICASE
#include <fastfind.c>
#define FF_ICASE
#include <fastfind.c>

View File

@ -0,0 +1,267 @@
/*
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* James A. Woods.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: util.c,v 1.2 1996/08/29 22:39:41 wosch Exp wosch $
*/
#include <stdlib.h>
#include <string.h>
#include <err.h>
#include <sys/param.h>
#include <stdio.h>
#include "locate.h"
char **colon __P((char **, char*, char*));
char *patprep __P((char *));
void print_matches __P((u_int));
u_char *tolower_word __P((u_char *));
int getwm __P((caddr_t));
int getwf __P((FILE *));
int check_bigram_char __P((int));
/*
* Validate bigram chars. If the test failed the database is corrupt
* or the database is obviously not a locate database.
*/
int
check_bigram_char(ch)
int ch;
{
/* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
if (ch == 0 ||
(ch >= ASCII_MIN && ch <= ASCII_MAX))
return(ch);
(void)fprintf(stderr, "locate database header corrupt, bigram ");
(void)fprintf(stderr, "char outside 0, %d-%d: %d\n",
ASCII_MIN, ASCII_MAX, ch);
exit(1);
}
/* split a colon separated string into a char vector
*
* "bla:foo" -> {"foo", "bla"}
* "bla:" -> {"foo", dot}
* "bla" -> {"bla"}
* "" -> do nothing
*
*/
char **
colon(dbv, path, dot)
char **dbv;
char *path;
char *dot; /* default for single ':' */
{
int vlen, slen;
char *c, *ch, *p;
char **pv;
if (dbv == NULL) {
if ((dbv = malloc(sizeof(char **))) == NULL)
err(1, "malloc");
*dbv = NULL;
}
/* empty string */
if (*path == '\0') {
(void)fprintf(stderr, "empty database name, ignored\n");
return(dbv);
}
/* length of string vector */
for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++);
for (ch = c = path; ; ch++) {
if (*ch == ':' ||
(!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
/* single colon -> dot */
if (ch == c)
p = dot;
else {
/* a string */
slen = ch - c;
if ((p = malloc(sizeof(char) * (slen + 1)))
== NULL)
err(1, "malloc");
bcopy(c, p, slen);
*(p + slen) = '\0';
}
/* increase dbv with element p */
if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2)))
== NULL)
err(1, "realloc");
*(dbv + vlen) = p;
*(dbv + ++vlen) = NULL;
c = ch + 1;
}
if (*ch == '\0')
break;
}
return (dbv);
}
void
print_matches(counter)
u_int counter;
{
(void)printf("%d\n", counter);
}
/*
* extract last glob-free subpattern in name for fast pre-match; prepend
* '\0' for backwards match; return end of new pattern
*/
static char globfree[100];
char *
patprep(name)
char *name;
{
register char *endmark, *p, *subp;
subp = globfree;
*subp++ = '\0';
p = name + strlen(name) - 1;
/* skip trailing metacharacters (and [] ranges) */
for (; p >= name; p--)
if (index("*?", *p) == 0)
break;
if (p < name)
p = name;
if (*p == ']')
for (p--; p >= name; p--)
if (*p == '[') {
p--;
break;
}
if (p < name)
p = name;
/*
* if pattern has only metacharacters, check every path (force '/'
* search)
*/
if ((p == name) && index("?*[]", *p) != 0)
*subp++ = '/';
else {
for (endmark = p; p >= name; p--)
if (index("]*?", *p) != 0)
break;
for (++p;
(p <= endmark) && subp < (globfree + sizeof(globfree));)
*subp++ = *p++;
}
*subp = '\0';
return(--subp);
}
/* tolower word */
u_char *
tolower_word(word)
u_char *word;
{
register u_char *p;
for(p = word; *p != '\0'; p++)
*p = TOLOWER(*p);
return(word);
}
/*
* Read integer from mmap pointer.
* Essential a simple ``return *(int *)p'' but avoid sigbus
* for integer alignment (SunOS 4.x, 5.x).
*
* Convert network byte order to host byte order if neccessary.
* So we can read on FreeBSD/i386 (little endian) a locate database
* which was built on SunOS/sparc (big endian).
*/
int
getwm(p)
caddr_t p;
{
static char buf[INTSIZE];
register int i;
for (i = 0; i < INTSIZE; i++)
buf[i] = *p++;
i = *(int *)buf;
if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
i = ntohl(i);
if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
(void)fprintf(stderr,
"integer out of +-MAXPATHLEN (%d): %d\n",
MAXPATHLEN, i);
exit(1);
}
}
return(i);
}
/*
* Read integer from stream.
*
* Convert network byte order to host byte order if neccessary.
* So we can read on FreeBSD/i386 (little endian) a locate database
* which was built on SunOS/sparc (big endian).
*/
int
getwf(fp)
FILE *fp;
{
register int word;
word = getw(fp);
if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
word = ntohl(word);
if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
(void)fprintf(stderr,
"integer out of +-MAXPATHLEN (%d): %d\n",
MAXPATHLEN, word);
exit(1);
}
}
return(word);
}