awk: print the last two characters for bigram - not the second word
A bigram may contain a space character, and we always need two characters.
This commit is contained in:
parent
41c539bdd1
commit
0a88bd81b7
@ -68,7 +68,8 @@ trap 'rm -f $bigrams' 0 1 2 3 5 10 15
|
||||
for db
|
||||
do
|
||||
$locate -d $db /
|
||||
done | $bigram | $sort -nr | awk 'NR <= 128 { printf $2 }' > $bigrams
|
||||
done | $bigram | $sort -nr | \
|
||||
awk 'NR <= 128 && /^[ \t]*[1-9][0-9]*[ \t]+..$/ { printf("%s", substr($0, length($0)-1, 2)) }' > $bigrams
|
||||
|
||||
for db
|
||||
do
|
||||
|
Loading…
Reference in New Issue
Block a user