sync improvements from concatdb.sh

- stop on first error
- improve awk script: print the last two characters for bigram -  not the second word
- remove unnecessary checks
- use mktemp
- refactor
This commit is contained in:
Wolfram Schneider 2022-01-29 07:10:18 +00:00
parent 9dac026822
commit 21cc4bee69

View File

@ -2,7 +2,7 @@
#
# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
#
# Copyright (c) September 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
# Copyright (c) September 1995-2022 Wolfram Schneider <wosch@FreeBSD.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@ -32,43 +32,32 @@
#
# $FreeBSD$
# stop on first error
set -e
set -o pipefail
# The directory containing locate subprograms
: ${LIBEXECDIR:=/usr/libexec}; export LIBEXECDIR
: ${TMPDIR:=/var/tmp}; export TMPDIR
PATH=$LIBEXECDIR:/bin:/usr/bin:$PATH; export PATH
umask 077 # protect temp files
: ${TMPDIR:=/tmp}; export TMPDIR
test -d "$TMPDIR" || TMPDIR=/tmp
if ! TMPDIR=`mktemp -d $TMPDIR/mklocateXXXXXXXXXX`; then
exit 1
fi
# utilities to built locate database
: ${bigram:=locate.bigram}
: ${code:=locate.code}
: ${sort:=sort}
sort_opt="-u -T $TMPDIR -S 20%"
sortopt="-u -T $TMPDIR -S 20%"
sortcmd=$sort
bigrams=$TMPDIR/_mklocatedb$$.bigrams
filelist=$TMPDIR/_mklocatedb$$.list
trap 'rm -f $bigrams $filelist; rmdir $TMPDIR' 0 1 2 3 5 10 15
bigrams=$(mktemp -t mklocatedb.bigrams)
filelist=$(mktemp -t mklocatedb.filelist)
trap 'rm -f $bigrams $filelist' 0 1 2 3 5 10 15
# Input already sorted
if [ X"$1" = "X-presort" ]; then
shift;
# create an empty file
true > $bigrams
# Locate database bootstrapping
# 1. first build a temp database without bigram compression
# 2. create the bigram from the temp database
@ -76,19 +65,15 @@ if [ X"$1" = "X-presort" ]; then
#
# This scheme avoid large temporary files in /tmp
$code $bigrams > $filelist || exit 1
locate -d $filelist / | $bigram | $sort -nr | head -128 |
awk '{if (/^[ ]*[0-9]+[ ]+..$/) {printf("%s",$2)} else {exit 1}}' > $bigrams || exit 1
locate -d $filelist / | $code $bigrams || exit 1
exit
$code $bigrams > $filelist
locate -d $filelist / | $bigram | $sort -nr | \
awk 'NR <= 128 && /^[ \t]*[1-9][0-9]*[ \t]+..$/ { printf("%s", substr($0, length($0)-1, 2)) }' > $bigrams
locate -d $filelist / | $code $bigrams
else
if $sortcmd $sortopt > $filelist; then
$bigram < $filelist | $sort -nr |
awk '{if (/^[ ]*[0-9]+[ ]+..$/) {printf("%s",$2)} else {exit 1}}' > $bigrams || exit 1
$code $bigrams < $filelist || exit 1
else
echo "`basename $0`: cannot build locate database" >&2
exit 1
fi
$sort $sort_opt > $filelist
$bigram < $filelist | $sort -nr | \
awk 'NR <= 128 && /^[ \t]*[1-9][0-9]*[ \t]+..$/ { printf("%s", substr($0, length($0)-1, 2)) }' > $bigrams
$code $bigrams < $filelist
fi
#EOF