optimized search algorithm
faster IO due mmap(2) [-m | -s] better error check for damaged databases support for databases in network byte order (SunOS/sparc) optional case insensitve search [-i] optional multiple databases optional multiple pattern new enviroment variable LOCATE_PATH for database(s) [-S] print some statistic about the database [-l number] limit output to number file names [-c] suppress normal output; instead print a count of matching file names
This commit is contained in:
parent
7ae26f1450
commit
dbaf42882f
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=17980
@ -1,12 +1,15 @@
|
||||
# @(#)Makefile 8.1 (Berkeley) 6/6/93
|
||||
# $Id: Makefile,v 1.3 1996/04/25 15:54:22 wosch Exp wosch $
|
||||
# $Id: Makefile,v 1.1 1996/08/29 22:39:41 wosch Exp wosch $
|
||||
|
||||
PROG= locate
|
||||
SRCS= util.c locate.c
|
||||
CFLAGS+= -I. -DMMAP -O2 # -DDEBUG
|
||||
MAN1= locate.1
|
||||
MAN8= locate.updatedb.8
|
||||
SCRIPTS= updatedb mklocatedb concatdb
|
||||
MLINKS+= locate.updatedb.8 updatedb.8
|
||||
|
||||
|
||||
beforeinstall:
|
||||
.for script in ${SCRIPTS}
|
||||
${INSTALL} -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
|
||||
|
280
usr.bin/locate/locate/fastfind.c
Normal file
280
usr.bin/locate/locate/fastfind.c
Normal file
@ -0,0 +1,280 @@
|
||||
/*
|
||||
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* James A. Woods.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id: fastfind.c,v 1.2 1996/08/29 22:39:41 wosch Exp wosch $
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _LOCATE_STATISTIC_
|
||||
#define _LOCATE_STATISTIC_
|
||||
|
||||
void
|
||||
statistic (fp, path_fcodes)
|
||||
FILE *fp; /* open database */
|
||||
char *path_fcodes; /* for error message */
|
||||
{
|
||||
register int lines, chars, size, big;
|
||||
register u_char *p, *s;
|
||||
register int c;
|
||||
int count;
|
||||
u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
|
||||
|
||||
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
|
||||
p[c] = check_bigram_char(getc(fp));
|
||||
s[c] = check_bigram_char(getc(fp));
|
||||
}
|
||||
|
||||
lines = chars = big = 0;
|
||||
size = NBG + NBG;
|
||||
|
||||
for (c = getc(fp), count = 0; c != EOF; size++) {
|
||||
if (c == SWITCH) {
|
||||
count += getwf(fp) - OFFSET;
|
||||
size += sizeof(int);
|
||||
} else
|
||||
count += c - OFFSET;
|
||||
|
||||
for (p = path + count; (c = getc(fp)) > SWITCH; size++)
|
||||
if (c < PARITY)
|
||||
p++;
|
||||
else {
|
||||
big++;
|
||||
p += 2;
|
||||
}
|
||||
|
||||
p++;
|
||||
lines++;
|
||||
chars += (p - path);
|
||||
}
|
||||
|
||||
(void)printf("\nDatabase: %s\n", path_fcodes);
|
||||
(void)printf("Compression: Front: %2.2f%%, ",
|
||||
(float)(100 * (size + big)) / chars);
|
||||
(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size);
|
||||
(void)printf("Total: %2.2f%%\n", (float)(100 * size) / chars);
|
||||
(void)printf("Filenames: %d, ", lines);
|
||||
(void)printf("Chars: %d\n", chars);
|
||||
(void)printf("Database size: %d, ", size);
|
||||
(void)printf("Bigram chars: %d\n", big);
|
||||
|
||||
}
|
||||
#endif /* _LOCATE_STATISTIC_ */
|
||||
|
||||
|
||||
void
|
||||
#ifdef FF_MMAP
|
||||
|
||||
|
||||
#ifdef FF_ICASE
|
||||
fastfind_mmap_icase
|
||||
#else
|
||||
fastfind_mmap
|
||||
#endif
|
||||
(pathpart, paddr, len, database)
|
||||
char *pathpart; /* search string */
|
||||
caddr_t paddr; /* mmap pointer */
|
||||
int len; /* length of database */
|
||||
char *database; /* for error message */
|
||||
|
||||
|
||||
#else /* MMAP */
|
||||
|
||||
|
||||
#ifdef FF_ICASE
|
||||
fastfind_icase
|
||||
#else /* !FF_ICASE */
|
||||
fastfind
|
||||
#endif /* FF_ICASE */
|
||||
|
||||
(fp, pathpart, database)
|
||||
FILE *fp; /* open database */
|
||||
char *pathpart; /* search string */
|
||||
char *database; /* for error message */
|
||||
|
||||
|
||||
#endif /* MMAP */
|
||||
|
||||
{
|
||||
register u_char *p, *s, *patend, *q, *foundchar;
|
||||
register int c, cc;
|
||||
int count, found, globflag;
|
||||
u_char *cutoff;
|
||||
u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
|
||||
|
||||
#ifdef FF_ICASE
|
||||
/* use a lookup table for case insensitive search */
|
||||
u_char table[UCHAR_MAX];
|
||||
|
||||
tolower_word(pathpart);
|
||||
#endif
|
||||
|
||||
/* init bigram table */
|
||||
#ifdef FF_MMAP
|
||||
if (len < (2*NBG)) {
|
||||
(void)fprintf(stderr, "database to small: %s\n", database);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
|
||||
p[c] = check_bigram_char(*paddr++);
|
||||
s[c] = check_bigram_char(*paddr++);
|
||||
}
|
||||
#else
|
||||
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
|
||||
p[c] = check_bigram_char(getc(fp));
|
||||
s[c] = check_bigram_char(getc(fp));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* find optimal (last) char for searching */
|
||||
p = pathpart;
|
||||
globflag = index(p, '*') || index(p, '?') || index(p, '[');
|
||||
patend = patprep(p);
|
||||
cc = *patend;
|
||||
|
||||
#ifdef FF_ICASE
|
||||
/* set patend char to true */
|
||||
table[TOLOWER(*patend)] = 1;
|
||||
table[toupper(*patend)] = 1;
|
||||
#endif
|
||||
|
||||
|
||||
/* main loop */
|
||||
found = count = 0;
|
||||
foundchar = 0;
|
||||
|
||||
#ifdef FF_MMAP
|
||||
for (c = (u_char)*paddr++; len-- > 0; ) {
|
||||
#else
|
||||
for (c = getc(fp); c != EOF; ) {
|
||||
#endif
|
||||
|
||||
/* go forward or backward */
|
||||
if (c == SWITCH) { /* big step, an integer */
|
||||
#ifdef FF_MMAP
|
||||
count += getwm(paddr) - OFFSET;
|
||||
len -= INTSIZE; paddr += INTSIZE;
|
||||
#else
|
||||
count += getwf(fp) - OFFSET;
|
||||
#endif
|
||||
} else { /* slow step, =< 14 chars */
|
||||
count += c - OFFSET;
|
||||
}
|
||||
|
||||
/* overlay old path */
|
||||
p = path + count;
|
||||
foundchar = p - 1;
|
||||
#ifdef FF_MMAP
|
||||
for (; (c = (u_char)*paddr++) > SWITCH; len--)
|
||||
#else
|
||||
for (; (c = getc(fp)) > SWITCH; )
|
||||
#endif
|
||||
|
||||
if (c < PARITY) {
|
||||
#ifdef FF_ICASE
|
||||
if (table[c])
|
||||
#else
|
||||
if (c == cc)
|
||||
#endif
|
||||
foundchar = p;
|
||||
*p++ = c;
|
||||
}
|
||||
else {
|
||||
/* bigrams are parity-marked */
|
||||
TO7BIT(c);
|
||||
|
||||
#ifndef FF_ICASE
|
||||
if (bigram1[c] == cc ||
|
||||
bigram2[c] == cc)
|
||||
#else
|
||||
|
||||
if (table[bigram1[c]] ||
|
||||
table[bigram2[c]])
|
||||
#endif
|
||||
foundchar = p + 1;
|
||||
|
||||
*p++ = bigram1[c];
|
||||
*p++ = bigram2[c];
|
||||
}
|
||||
|
||||
|
||||
if (found) { /* previous line matched */
|
||||
cutoff = path;
|
||||
*p-- = '\0';
|
||||
foundchar = p;
|
||||
} else if (foundchar >= path + count) { /* a char matched */
|
||||
*p-- = '\0';
|
||||
cutoff = path + count;
|
||||
} else /* nothing to do */
|
||||
continue;
|
||||
|
||||
found = 0;
|
||||
for (s = foundchar; s >= cutoff; s--) {
|
||||
if (*s == cc
|
||||
#ifdef FF_ICASE
|
||||
|| TOLOWER(*s) == cc
|
||||
#endif
|
||||
) { /* fast first char check */
|
||||
for (p = patend - 1, q = s - 1; *p != '\0';
|
||||
p--, q--)
|
||||
if (*q != *p
|
||||
#ifdef FF_ICASE
|
||||
&& TOLOWER(*q) != *p
|
||||
#endif
|
||||
)
|
||||
break;
|
||||
if (*p == '\0') { /* fast match success */
|
||||
found = 1;
|
||||
if (!globflag || !fnmatch(pathpart, path, 0)) {
|
||||
if (f_silent)
|
||||
counter++;
|
||||
else if (f_limit) {
|
||||
counter++;
|
||||
if (f_limit >= counter)
|
||||
(void)puts(path);
|
||||
else {
|
||||
(void)fprintf(stderr, "[show only %d lines]\n", counter - 1);
|
||||
exit(0);
|
||||
}
|
||||
} else
|
||||
(void)puts(path);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
.\" Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
|
||||
.\" Copyright (c) 1990, 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
.\"
|
||||
@ -30,21 +31,26 @@
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)locate.1 8.1 (Berkeley) 6/6/93
|
||||
.\" $Id$
|
||||
.\"
|
||||
.Dd June 6, 1993
|
||||
.Dt LOCATE 1
|
||||
.Os BSD 4.4
|
||||
.Sh NAME
|
||||
.Nm locate
|
||||
.Nd find files
|
||||
.Nd find filenames quickly
|
||||
.Sh SYNOPSIS
|
||||
.Ar locate
|
||||
pattern
|
||||
.Nm
|
||||
.Op Fl Scims
|
||||
.Op Fl l Ar limit
|
||||
.Op Fl d Ar database
|
||||
pattern ...
|
||||
.Sh DESCRIPTION
|
||||
.Nm Locate
|
||||
searches a database for all pathnames which match the specified
|
||||
.Ar pattern .
|
||||
The database is recomputed periodically, and contains the pathnames
|
||||
The database is recomputed periodically (usually weekly or daily),
|
||||
and contains the pathnames
|
||||
of all files which are publicly accessible.
|
||||
.Pp
|
||||
Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``[''
|
||||
@ -59,12 +65,95 @@ including slashes (``/'').
|
||||
.Pp
|
||||
As a special case, a pattern containing no globbing characters (``foo'')
|
||||
is matched as though it were ``*foo*''.
|
||||
|
||||
The following options are available:
|
||||
.Bl -tag -width 10n indent
|
||||
.It Fl S
|
||||
Print some statistic about the database and exit.
|
||||
.It Fl c
|
||||
Suppress normal output; instead print a count of matching file names.
|
||||
.It Fl d Ar database
|
||||
Search in
|
||||
.Ar database
|
||||
instead the default file name database.
|
||||
Multiple
|
||||
.Fl d
|
||||
options are allowed. Each additional
|
||||
.Fl d
|
||||
option adds the specified database to the list
|
||||
of databases to be searched.
|
||||
|
||||
.Ar database
|
||||
may be a colon-separated list of databases. A single colon is a reference
|
||||
to the default database.
|
||||
|
||||
$ locate -d $HOME/lib/mydb: foo
|
||||
|
||||
will first search string ``foo'' in
|
||||
.Pa $HOME/lib/mydb
|
||||
and then in
|
||||
.Pa /var/db/locate.database .
|
||||
|
||||
$ locate -d $HOME/lib/mydb::/cdrom/locate.database foo
|
||||
|
||||
will first search string ``foo'' in
|
||||
.Pa $HOME/lib/mydb
|
||||
and then in
|
||||
.Pa /var/db/locate.database
|
||||
and then in
|
||||
.Pa /cdrom/locate.database .
|
||||
|
||||
|
||||
``$ locate -d db1 -d db2 -d db3 pattern'' is the same as
|
||||
|
||||
``$ locate -d db1:db2:db3 pattern'' or
|
||||
|
||||
``$ locate -d db1:db2 -d db3 pattern''.
|
||||
|
||||
If
|
||||
.Ar -
|
||||
is given as the database name, standard input will be read instead.
|
||||
For example, you can compress your database
|
||||
and use:
|
||||
|
||||
$ zcat database.gz | locate -d - pattern
|
||||
|
||||
This might be useful on machines with a fast CPU and little RAM and slow
|
||||
I/O. Note: you can only use
|
||||
.Ar one
|
||||
pattern for stdin.
|
||||
.It Fl i
|
||||
Ignore case distinctions in both the pattern and the database.
|
||||
.It Fl l Ar number
|
||||
Limit output to
|
||||
.Ar number
|
||||
of file names and exit.
|
||||
.It Fl m
|
||||
Use
|
||||
.Xr mmap 2
|
||||
instead of the
|
||||
.Xr stdio 3
|
||||
library. This is the default behavior. Usually faster in most cases.
|
||||
.It Fl s
|
||||
Use the
|
||||
.Xr stdio 3
|
||||
library instead of
|
||||
.Xr mmap 2 .
|
||||
.Sh FILES
|
||||
.Bl -tag -width /usr/libexec/locate.updatedb -compact
|
||||
.It Pa /var/db/locate.database
|
||||
The actual database
|
||||
locate database
|
||||
.It Pa /usr/libexec/locate.updatedb
|
||||
Script to update the locate database
|
||||
.It Pa /etc/weekly
|
||||
Script that usually starts the database rebuild
|
||||
.El
|
||||
.Sh ENVIRONMENT
|
||||
.Bl -tag -width LOCATE_PATH -compact
|
||||
.It Pa LOCATE_PATH
|
||||
path to the locate database if set and not empty, ignored if the
|
||||
.Fl d
|
||||
option was specified.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr find 1 ,
|
||||
@ -79,17 +168,48 @@ Script to update the locate database
|
||||
.%P pp. 8-10
|
||||
.Re
|
||||
.Sh BUGS
|
||||
.Nm Locate
|
||||
.Nm
|
||||
may fail to list some files that are present, or may
|
||||
to list files that have been removed from the system. This is because
|
||||
locate only reports files that are present in the database, which is
|
||||
typically only regenerated once a week by the
|
||||
.Nm /etc/weekly
|
||||
.Pa /etc/weekly
|
||||
script. Use
|
||||
.Xr find 1
|
||||
to locate files that are of a more transitory nature.
|
||||
|
||||
.Nm
|
||||
database was built by user
|
||||
.Dq nobody .
|
||||
.Xr find 1
|
||||
skip directories,
|
||||
which are not readable for user
|
||||
.Dq nobody ,
|
||||
group
|
||||
.Dq nobody ,
|
||||
or
|
||||
world. E.g. if your HOME directory ist not world-readable, all your
|
||||
files are
|
||||
.Ar not
|
||||
in the database.
|
||||
|
||||
The
|
||||
.Nm
|
||||
database is not byte order independ. It is not possible
|
||||
to share the databases between machines with different byte order.
|
||||
The current
|
||||
.Nm
|
||||
implementation understand databases in host byte order or
|
||||
network byte order. So you can read on a FreeBSD/i386 machine
|
||||
(little endian)
|
||||
a locate database which was built on SunOS/sparc machine
|
||||
(big endian, net).
|
||||
|
||||
.Sh HISTORY
|
||||
The
|
||||
.Nm locate
|
||||
command appears in
|
||||
.Bx 4.4 .
|
||||
Many new features were
|
||||
added in
|
||||
.Fx 2.2 .
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* James A. Woods.
|
||||
@ -15,8 +16,8 @@
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
@ -32,16 +33,19 @@
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id: locate.c,v 1.3 1996/08/29 22:39:41 wosch Exp wosch $
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char copyright[] =
|
||||
"@(#) Copyright (c) 1989, 1993\n\
|
||||
The Regents of the University of California. All rights reserved.\n";
|
||||
"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\
|
||||
@(#) Copyright (c) 1989, 1993\n\
|
||||
The Regents of the University of California. All rights reserved.\n";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
|
||||
static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
/*
|
||||
@ -54,10 +58,10 @@ static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
|
||||
*
|
||||
* The codes are:
|
||||
*
|
||||
* 0-28 likeliest differential counts + offset to make nonnegative
|
||||
* 30 switch code for out-of-range count to follow in next word
|
||||
* 128-255 bigram codes (128 most common, as determined by 'updatedb')
|
||||
* 32-127 single character (printable) ascii residue (ie, literal)
|
||||
* 0-28 likeliest differential counts + offset to make nonnegative
|
||||
* 30 switch code for out-of-range count to follow in next word
|
||||
* 128-255 bigram codes (128 most common, as determined by 'updatedb')
|
||||
* 32-127 single character (printable) ascii residue (ie, literal)
|
||||
*
|
||||
* A novel two-tiered string search technique is employed:
|
||||
*
|
||||
@ -72,129 +76,291 @@ static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <fnmatch.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#ifdef MMAP
|
||||
# include <sys/types.h>
|
||||
# include <sys/stat.h>
|
||||
# include <sys/mman.h>
|
||||
# include <fcntl.h>
|
||||
#endif
|
||||
#include <err.h>
|
||||
|
||||
#ifdef sun
|
||||
#include <netinet/in.h> /* SunOS byteorder(3) htohl(3) */
|
||||
#ifndef __P
|
||||
#define __P(x) x
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "locate.h"
|
||||
#include "pathnames.h"
|
||||
|
||||
FILE *fp;
|
||||
#ifdef DEBUG
|
||||
# include <sys/time.h>
|
||||
# include <sys/types.h>
|
||||
# include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
char *path_fcodes; /* locate database */
|
||||
int f_mmap; /* use mmap */
|
||||
int f_icase; /* ignore case */
|
||||
int f_stdin; /* read database from stdin */
|
||||
int f_statistic; /* print statistic */
|
||||
int f_silent; /* suppress output, show only count of matches */
|
||||
int f_limit; /* limit number of output lines, 0 == infinite */
|
||||
u_int counter; /* counter for matches [-c] */
|
||||
|
||||
|
||||
void usage __P((void));
|
||||
void statistic __P((FILE *, char *));
|
||||
void fastfind __P((FILE *, char *, char *));
|
||||
void fastfind_icase __P((FILE *, char *, char *));
|
||||
void fastfind_mmap __P((char *, caddr_t, int, char *));
|
||||
void fastfind_mmap_icase __P((char *, caddr_t, int, char *));
|
||||
void search_mmap __P((char *, char **));
|
||||
void search_fopen __P((char *, char **));
|
||||
unsigned long cputime __P((void));
|
||||
|
||||
extern char **colon __P((char **, char*, char*));
|
||||
extern void print_matches __P((u_int));
|
||||
extern int getwm __P((caddr_t));
|
||||
extern int getwf __P((FILE *));
|
||||
extern u_char *tolower_word __P((u_char *));
|
||||
extern int check_bigram_char __P((int));
|
||||
extern char *patprep __P((char *));
|
||||
|
||||
extern char *optarg;
|
||||
extern int optind;
|
||||
|
||||
void fastfind __P((char *pathpart));
|
||||
char *patprep __P((char *name));
|
||||
|
||||
int
|
||||
main(argc, argv)
|
||||
int argc;
|
||||
char *argv[];
|
||||
int argc;
|
||||
char **argv;
|
||||
{
|
||||
if (argc != 2) {
|
||||
(void)fprintf(stderr, "usage: locate pattern\n");
|
||||
exit(1);
|
||||
}
|
||||
if (!(fp = fopen(_PATH_FCODES, "r"))) {
|
||||
(void)fprintf(stderr, "locate: no database file %s.\n",
|
||||
_PATH_FCODES);
|
||||
exit(1);
|
||||
}
|
||||
while (*++argv)
|
||||
fastfind(*argv);
|
||||
exit(0);
|
||||
register int ch;
|
||||
char **dbv = NULL;
|
||||
#ifdef MMAP
|
||||
f_mmap = 1; /* mmap is default */
|
||||
#endif
|
||||
|
||||
while ((ch = getopt(argc, argv, "Scd:il:ms")) != EOF)
|
||||
switch(ch) {
|
||||
case 'S': /* statistic lines */
|
||||
f_statistic = 1;
|
||||
break;
|
||||
case 'l': /* limit number of output lines, 0 == infinite */
|
||||
f_limit = atoi(optarg);
|
||||
break;
|
||||
case 'd': /* database */
|
||||
dbv = colon(dbv, optarg, _PATH_FCODES);
|
||||
break;
|
||||
case 'i': /* ignore case */
|
||||
f_icase = 1;
|
||||
break;
|
||||
case 'm': /* mmap */
|
||||
#ifdef MMAP
|
||||
f_mmap = 1;
|
||||
#else
|
||||
(void)fprintf(stderr, "mmap(2) not implemented\n");
|
||||
#endif
|
||||
break;
|
||||
case 's': /* stdio lib */
|
||||
f_mmap = 0;
|
||||
break;
|
||||
case 'c': /* suppress output, show only count of matches */
|
||||
f_silent = 1;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
}
|
||||
argv += optind;
|
||||
argc -= optind;
|
||||
|
||||
/* to few arguments */
|
||||
if (argc < 1 && !(f_statistic))
|
||||
usage();
|
||||
|
||||
/* no (valid) database as argument */
|
||||
if (dbv == NULL || *dbv == NULL) {
|
||||
/* try to read database from enviroment */
|
||||
if ((path_fcodes = getenv("LOCATE_PATH")) == NULL ||
|
||||
*path_fcodes == '\0')
|
||||
/* use default database */
|
||||
dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES);
|
||||
else /* $LOCATE_PATH */
|
||||
dbv = colon(dbv, path_fcodes, _PATH_FCODES);
|
||||
}
|
||||
|
||||
if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */
|
||||
for (ch = 0; ch <= UCHAR_MAX; ch++)
|
||||
myctype[ch] = tolower(ch);
|
||||
|
||||
/* foreach database ... */
|
||||
while((path_fcodes = *dbv) != NULL) {
|
||||
dbv++;
|
||||
|
||||
if (!strcmp(path_fcodes, "-"))
|
||||
f_stdin = 1;
|
||||
else
|
||||
f_stdin = 0;
|
||||
|
||||
#ifndef MMAP
|
||||
f_mmap = 0; /* be paranoid */
|
||||
#endif
|
||||
if (!f_mmap || f_stdin || f_statistic)
|
||||
search_fopen(path_fcodes, argv);
|
||||
else
|
||||
search_mmap(path_fcodes, argv);
|
||||
}
|
||||
|
||||
if (f_silent)
|
||||
print_matches(counter);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
fastfind(pathpart)
|
||||
char *pathpart;
|
||||
search_fopen(db, s)
|
||||
char *db; /* database */
|
||||
char **s; /* search strings */
|
||||
{
|
||||
register char *p, *s;
|
||||
register int c;
|
||||
int count, found, globflag;
|
||||
char *cutoff, *patend, *q;
|
||||
char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
|
||||
FILE *fp;
|
||||
#ifdef DEBUG
|
||||
long t0;
|
||||
#endif
|
||||
|
||||
/* can only read stdin once */
|
||||
if (f_stdin) {
|
||||
fp = stdin;
|
||||
if (*(s+1) != NULL) {
|
||||
(void)fprintf(stderr,
|
||||
"read database from stdin, use only");
|
||||
(void)fprintf(stderr, " `%s' as pattern\n", *s);
|
||||
*(s+1) = NULL;
|
||||
}
|
||||
}
|
||||
else if ((fp = fopen(path_fcodes, "r")) == NULL)
|
||||
err(1, "`%s'", path_fcodes);
|
||||
|
||||
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
|
||||
p[c] = getc(fp), s[c] = getc(fp);
|
||||
|
||||
p = pathpart;
|
||||
globflag = index(p, '*') || index(p, '?') || index(p, '[');
|
||||
patend = patprep(p);
|
||||
|
||||
found = 0;
|
||||
for (c = getc(fp), count = 0; c != EOF;) {
|
||||
count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
|
||||
/* overlay old path */
|
||||
for (p = path + count; (c = getc(fp)) > SWITCH;)
|
||||
if (c < PARITY)
|
||||
*p++ = c;
|
||||
else { /* bigrams are parity-marked */
|
||||
c &= PARITY - 1;
|
||||
*p++ = bigram1[c], *p++ = bigram2[c];
|
||||
}
|
||||
*p-- = NULL;
|
||||
cutoff = (found ? path : path + count);
|
||||
for (found = 0, s = p; s >= cutoff; s--)
|
||||
if (*s == *patend) { /* fast first char check */
|
||||
for (p = patend - 1, q = s - 1; *p != NULL;
|
||||
p--, q--)
|
||||
if (*q != *p)
|
||||
break;
|
||||
if (*p == NULL) { /* fast match success */
|
||||
found = 1;
|
||||
if (!globflag ||
|
||||
!fnmatch(pathpart, path, 0))
|
||||
(void)printf("%s\n", path);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* count only chars or lines */
|
||||
if (f_statistic) {
|
||||
statistic(fp, path_fcodes);
|
||||
(void)fclose(fp);
|
||||
return;
|
||||
}
|
||||
|
||||
/* foreach search string ... */
|
||||
while(*s != NULL) {
|
||||
#ifdef DEBUG
|
||||
t0 = cputime();
|
||||
#endif
|
||||
if (!f_stdin &&
|
||||
fseek(fp, (long)0, SEEK_SET) == -1)
|
||||
err(1, "fseek to begin of ``%s''\n", path_fcodes);
|
||||
|
||||
if (f_icase)
|
||||
fastfind_icase(fp, *s, path_fcodes);
|
||||
else
|
||||
fastfind(fp, *s, path_fcodes);
|
||||
#ifdef DEBUG
|
||||
(void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
|
||||
#endif
|
||||
s++;
|
||||
}
|
||||
(void)fclose(fp);
|
||||
}
|
||||
|
||||
#ifdef MMAP
|
||||
void
|
||||
search_mmap(db, s)
|
||||
char *db; /* database */
|
||||
char **s; /* search strings */
|
||||
{
|
||||
struct stat sb;
|
||||
int fd;
|
||||
caddr_t p;
|
||||
off_t len;
|
||||
#ifdef DEBUG
|
||||
long t0;
|
||||
#endif
|
||||
if ((fd = open(path_fcodes, O_RDONLY)) == -1 ||
|
||||
fstat(fd, &sb) == -1)
|
||||
err(1, "`%s'", path_fcodes);
|
||||
len = sb.st_size;
|
||||
|
||||
if ((p = mmap((caddr_t)0, (size_t)len,
|
||||
PROT_READ, MAP_SHARED,
|
||||
fd, (off_t)0)) == (caddr_t)-1)
|
||||
err(1, "mmap ``%s''", path_fcodes);
|
||||
|
||||
/* foreach search string ... */
|
||||
while (*s != NULL) {
|
||||
#ifdef DEBUG
|
||||
t0 = cputime();
|
||||
#endif
|
||||
if (f_icase)
|
||||
fastfind_mmap_icase(*s, p, (int)len, path_fcodes);
|
||||
else
|
||||
fastfind_mmap(*s, p, (int)len, path_fcodes);
|
||||
#ifdef DEBUG
|
||||
(void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
|
||||
#endif
|
||||
s++;
|
||||
}
|
||||
|
||||
if (munmap(p, (size_t)len) == -1)
|
||||
warn("munmap %s\n", path_fcodes);
|
||||
|
||||
(void)close(fd);
|
||||
}
|
||||
#endif /* MMAP */
|
||||
|
||||
#ifdef DEBUG
|
||||
unsigned long
|
||||
cputime ()
|
||||
{
|
||||
struct rusage rus;
|
||||
|
||||
getrusage(0, &rus);
|
||||
return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000);
|
||||
}
|
||||
#endif /* DEBUG */
|
||||
|
||||
void
|
||||
usage ()
|
||||
{
|
||||
(void)fprintf(stderr, "usage: locate [-Scims] [-l limit] ");
|
||||
(void)fprintf(stderr, "[-d database] pattern ...\n\n");
|
||||
(void)fprintf(stderr, "default database: `%s' or $LOCATE_PATH\n",
|
||||
_PATH_FCODES);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* extract last glob-free subpattern in name for fast pre-match; prepend
|
||||
* '\0' for backwards match; return end of new pattern
|
||||
*/
|
||||
static char globfree[100];
|
||||
|
||||
char *
|
||||
patprep(name)
|
||||
char *name;
|
||||
{
|
||||
register char *endmark, *p, *subp;
|
||||
/* load fastfind functions */
|
||||
|
||||
subp = globfree;
|
||||
*subp++ = '\0';
|
||||
p = name + strlen(name) - 1;
|
||||
/* skip trailing metacharacters (and [] ranges) */
|
||||
for (; p >= name; p--)
|
||||
if (index("*?", *p) == 0)
|
||||
break;
|
||||
if (p < name)
|
||||
p = name;
|
||||
if (*p == ']')
|
||||
for (p--; p >= name; p--)
|
||||
if (*p == '[') {
|
||||
p--;
|
||||
break;
|
||||
}
|
||||
if (p < name)
|
||||
p = name;
|
||||
/*
|
||||
* if pattern has only metacharacters, check every path (force '/'
|
||||
* search)
|
||||
*/
|
||||
if ((p == name) && index("?*[]", *p) != 0)
|
||||
*subp++ = '/';
|
||||
else {
|
||||
for (endmark = p; p >= name; p--)
|
||||
if (index("]*?", *p) != 0)
|
||||
break;
|
||||
for (++p;
|
||||
(p <= endmark) && subp < (globfree + sizeof(globfree));)
|
||||
*subp++ = *p++;
|
||||
}
|
||||
*subp = '\0';
|
||||
return(--subp);
|
||||
}
|
||||
/* statistic */
|
||||
/* fastfind_mmap, fastfind_mmap_icase */
|
||||
#ifdef MMAP
|
||||
#undef FF_MMAP
|
||||
#undef FF_ICASE
|
||||
|
||||
#define FF_MMAP
|
||||
#include <fastfind.c>
|
||||
#define FF_ICASE
|
||||
#include <fastfind.c>
|
||||
#endif /* MMAP */
|
||||
|
||||
/* fopen */
|
||||
/* fastfind, fastfind_icase */
|
||||
#undef FF_MMAP
|
||||
#undef FF_ICASE
|
||||
#include <fastfind.c>
|
||||
#define FF_ICASE
|
||||
#include <fastfind.c>
|
||||
|
267
usr.bin/locate/locate/util.c
Normal file
267
usr.bin/locate/locate/util.c
Normal file
@ -0,0 +1,267 @@
|
||||
/*
|
||||
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* James A. Woods.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id: util.c,v 1.2 1996/08/29 22:39:41 wosch Exp wosch $
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <err.h>
|
||||
#include <sys/param.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "locate.h"
|
||||
|
||||
char **colon __P((char **, char*, char*));
|
||||
char *patprep __P((char *));
|
||||
void print_matches __P((u_int));
|
||||
u_char *tolower_word __P((u_char *));
|
||||
int getwm __P((caddr_t));
|
||||
int getwf __P((FILE *));
|
||||
int check_bigram_char __P((int));
|
||||
|
||||
/*
|
||||
* Validate bigram chars. If the test failed the database is corrupt
|
||||
* or the database is obviously not a locate database.
|
||||
*/
|
||||
int
|
||||
check_bigram_char(ch)
|
||||
int ch;
|
||||
{
|
||||
/* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
|
||||
if (ch == 0 ||
|
||||
(ch >= ASCII_MIN && ch <= ASCII_MAX))
|
||||
return(ch);
|
||||
|
||||
(void)fprintf(stderr, "locate database header corrupt, bigram ");
|
||||
(void)fprintf(stderr, "char outside 0, %d-%d: %d\n",
|
||||
ASCII_MIN, ASCII_MAX, ch);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* split a colon separated string into a char vector
|
||||
*
|
||||
* "bla:foo" -> {"foo", "bla"}
|
||||
* "bla:" -> {"foo", dot}
|
||||
* "bla" -> {"bla"}
|
||||
* "" -> do nothing
|
||||
*
|
||||
*/
|
||||
char **
|
||||
colon(dbv, path, dot)
|
||||
char **dbv;
|
||||
char *path;
|
||||
char *dot; /* default for single ':' */
|
||||
{
|
||||
int vlen, slen;
|
||||
char *c, *ch, *p;
|
||||
char **pv;
|
||||
|
||||
if (dbv == NULL) {
|
||||
if ((dbv = malloc(sizeof(char **))) == NULL)
|
||||
err(1, "malloc");
|
||||
*dbv = NULL;
|
||||
}
|
||||
|
||||
/* empty string */
|
||||
if (*path == '\0') {
|
||||
(void)fprintf(stderr, "empty database name, ignored\n");
|
||||
return(dbv);
|
||||
}
|
||||
|
||||
/* length of string vector */
|
||||
for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++);
|
||||
|
||||
for (ch = c = path; ; ch++) {
|
||||
if (*ch == ':' ||
|
||||
(!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
|
||||
/* single colon -> dot */
|
||||
if (ch == c)
|
||||
p = dot;
|
||||
else {
|
||||
/* a string */
|
||||
slen = ch - c;
|
||||
if ((p = malloc(sizeof(char) * (slen + 1)))
|
||||
== NULL)
|
||||
err(1, "malloc");
|
||||
bcopy(c, p, slen);
|
||||
*(p + slen) = '\0';
|
||||
}
|
||||
/* increase dbv with element p */
|
||||
if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2)))
|
||||
== NULL)
|
||||
err(1, "realloc");
|
||||
*(dbv + vlen) = p;
|
||||
*(dbv + ++vlen) = NULL;
|
||||
c = ch + 1;
|
||||
}
|
||||
if (*ch == '\0')
|
||||
break;
|
||||
}
|
||||
return (dbv);
|
||||
}
|
||||
|
||||
void
|
||||
print_matches(counter)
|
||||
u_int counter;
|
||||
{
|
||||
(void)printf("%d\n", counter);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* extract last glob-free subpattern in name for fast pre-match; prepend
|
||||
* '\0' for backwards match; return end of new pattern
|
||||
*/
|
||||
static char globfree[100];
|
||||
|
||||
char *
|
||||
patprep(name)
|
||||
char *name;
|
||||
{
|
||||
register char *endmark, *p, *subp;
|
||||
|
||||
subp = globfree;
|
||||
*subp++ = '\0';
|
||||
p = name + strlen(name) - 1;
|
||||
/* skip trailing metacharacters (and [] ranges) */
|
||||
for (; p >= name; p--)
|
||||
if (index("*?", *p) == 0)
|
||||
break;
|
||||
if (p < name)
|
||||
p = name;
|
||||
if (*p == ']')
|
||||
for (p--; p >= name; p--)
|
||||
if (*p == '[') {
|
||||
p--;
|
||||
break;
|
||||
}
|
||||
if (p < name)
|
||||
p = name;
|
||||
/*
|
||||
* if pattern has only metacharacters, check every path (force '/'
|
||||
* search)
|
||||
*/
|
||||
if ((p == name) && index("?*[]", *p) != 0)
|
||||
*subp++ = '/';
|
||||
else {
|
||||
for (endmark = p; p >= name; p--)
|
||||
if (index("]*?", *p) != 0)
|
||||
break;
|
||||
for (++p;
|
||||
(p <= endmark) && subp < (globfree + sizeof(globfree));)
|
||||
*subp++ = *p++;
|
||||
}
|
||||
*subp = '\0';
|
||||
return(--subp);
|
||||
}
|
||||
|
||||
/* tolower word */
|
||||
u_char *
|
||||
tolower_word(word)
|
||||
u_char *word;
|
||||
{
|
||||
register u_char *p;
|
||||
|
||||
for(p = word; *p != '\0'; p++)
|
||||
*p = TOLOWER(*p);
|
||||
|
||||
return(word);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Read integer from mmap pointer.
|
||||
* Essential a simple ``return *(int *)p'' but avoid sigbus
|
||||
* for integer alignment (SunOS 4.x, 5.x).
|
||||
*
|
||||
* Convert network byte order to host byte order if neccessary.
|
||||
* So we can read on FreeBSD/i386 (little endian) a locate database
|
||||
* which was built on SunOS/sparc (big endian).
|
||||
*/
|
||||
|
||||
int
|
||||
getwm(p)
|
||||
caddr_t p;
|
||||
{
|
||||
static char buf[INTSIZE];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < INTSIZE; i++)
|
||||
buf[i] = *p++;
|
||||
|
||||
i = *(int *)buf;
|
||||
|
||||
if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
|
||||
i = ntohl(i);
|
||||
if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
|
||||
(void)fprintf(stderr,
|
||||
"integer out of +-MAXPATHLEN (%d): %d\n",
|
||||
MAXPATHLEN, i);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
return(i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read integer from stream.
|
||||
*
|
||||
* Convert network byte order to host byte order if neccessary.
|
||||
* So we can read on FreeBSD/i386 (little endian) a locate database
|
||||
* which was built on SunOS/sparc (big endian).
|
||||
*/
|
||||
|
||||
int
|
||||
getwf(fp)
|
||||
FILE *fp;
|
||||
{
|
||||
register int word;
|
||||
|
||||
word = getw(fp);
|
||||
|
||||
if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
|
||||
word = ntohl(word);
|
||||
if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
|
||||
(void)fprintf(stderr,
|
||||
"integer out of +-MAXPATHLEN (%d): %d\n",
|
||||
MAXPATHLEN, word);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
return(word);
|
||||
}
|
Loading…
Reference in New Issue
Block a user