Eitan Adler 62e8d5e1b0 fortune/tools/do_uniq.py
slight imporvements to do_uniq.py:
	- use with open()
	- don't depend on sys.argv directly
	- fix style
2015-04-02 22:42:23 +00:00

69 lines
1.7 KiB
Python

#!/usr/local/bin/python
#
# $FreeBSD$
#
# an aggressive little script for trimming duplicate cookies
import argparse
import re
wordlist = [
'hadnot',
'donot', 'hadnt',
'dont', 'have', 'more', 'will', 'your',
'and', 'are', 'had', 'the', 'you',
'am', 'an', 'is', 'll', 've', 'we',
'a', 'd', 'i', 'm', 's',
]
def hash(fortune):
f = fortune
f = f.lower()
f = re.sub('[\W_]', '', f)
for word in wordlist:
f = re.sub(word, '', f)
# f = re.sub('[aeiouy]', '', f)
# f = re.sub('[^aeiouy]', '', f)
f = f[:30]
# f = f[-30:]
return f
def edit(datfile):
dups = {}
fortunes = []
fortune = ""
with open(datfile, "r") as datfiledf:
for line in datfiledf:
if line == "%\n":
key = hash(fortune)
if key not in dups:
dups[key] = []
dups[key].append(fortune)
fortunes.append(fortune)
fortune = ""
else:
fortune += line
for key in list(dups.keys()):
if len(dups[key]) == 1:
del dups[key]
with open(datfile + "~", "w") as o:
for fortune in fortunes:
key = hash(fortune)
if key in dups:
print('\n' * 50)
for f in dups[key]:
if f != fortune:
print(f, '%')
print(fortune, '%')
if input("Remove last fortune? ") == 'y':
del dups[key]
continue
o.write(fortune + "%\n")
parser = argparse.ArgumentParser(description="trimming duplicate cookies")
parser.add_argument("filename", type=str, nargs=1)
args = parser.parse_args()
edit(args.filename[0])