499b3d00ca
Changes: https://github.com/eggert/tz/blob/2018c/NEWS MFC after: 3 days
158 lines
4.1 KiB
Awk
158 lines
4.1 KiB
Awk
# Convert tzdata source into a smaller version of itself.
|
|
|
|
# Contributed by Paul Eggert. This file is in the public domain.
|
|
|
|
# This is not a general-purpose converter; it is designed for current tzdata.
|
|
# 'zic' should treat this script's output as if it were identical to
|
|
# this script's input.
|
|
|
|
|
|
# Return a new rule name.
|
|
# N_RULE_NAMES keeps track of how many rule names have been generated.
|
|
|
|
function gen_rule_name(alphabet, base, rule_name, n, digit)
|
|
{
|
|
alphabet = ""
|
|
alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
|
|
alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
|
|
base = length(alphabet)
|
|
rule_name = ""
|
|
n = n_rule_names++
|
|
|
|
do {
|
|
n -= rule_name && n <= base
|
|
digit = n % base
|
|
rule_name = substr(alphabet, digit + 1, 1) rule_name
|
|
n = (n - digit) / base
|
|
} while (n);
|
|
|
|
return rule_name
|
|
}
|
|
|
|
# Process an input line and save it for later output.
|
|
|
|
function process_input_line(line, field, end, i, n, startdef)
|
|
{
|
|
# Remove comments, normalize spaces, and append a space to each line.
|
|
sub(/#.*/, "", line)
|
|
line = line " "
|
|
gsub(/[\f\r\t\v ]+/, " ", line)
|
|
|
|
# Abbreviate keywords. Do not abbreviate "Link" to just "L",
|
|
# as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
|
|
sub(/^Link /, "Li ", line)
|
|
sub(/^Rule /, "R ", line)
|
|
sub(/^Zone /, "Z ", line)
|
|
|
|
# SystemV rules are not needed.
|
|
if (line ~ /^R SystemV /) return
|
|
|
|
# Replace FooAsia rules with the same rules without "Asia", as they
|
|
# are duplicates.
|
|
if (match(line, /[^ ]Asia /)) {
|
|
if (line ~ /^R /) return
|
|
line = substr(line, 1, RSTART) substr(line, RSTART + 5)
|
|
}
|
|
|
|
# Abbreviate times.
|
|
while (match(line, /[: ]0+[0-9]/))
|
|
line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
|
|
while (match(line, /:0[^:]/))
|
|
line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
|
|
|
|
# Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as
|
|
# pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
|
|
while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
|
|
end = RSTART + RLENGTH
|
|
line = substr(line, 1, end - 4) substr(line, end - 1)
|
|
}
|
|
while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
|
|
end = RSTART + RLENGTH
|
|
line = substr(line, 1, end - 3) substr(line, end - 1)
|
|
}
|
|
|
|
# Abbreviate "max", "only" and month names.
|
|
# Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
|
|
# as ambiguous.
|
|
gsub(/ max /, " ma ", line)
|
|
gsub(/ only /, " o ", line)
|
|
gsub(/ Jan /, " Ja ", line)
|
|
gsub(/ Feb /, " F ", line)
|
|
gsub(/ Apr /, " Ap ", line)
|
|
gsub(/ Aug /, " Au ", line)
|
|
gsub(/ Sep /, " S ", line)
|
|
gsub(/ Oct /, " O ", line)
|
|
gsub(/ Nov /, " N ", line)
|
|
gsub(/ Dec /, " D ", line)
|
|
|
|
# Strip leading and trailing space.
|
|
sub(/^ /, "", line)
|
|
sub(/ $/, "", line)
|
|
|
|
# Remove unnecessary trailing zero fields.
|
|
sub(/ 0+$/, "", line)
|
|
|
|
# Remove unnecessary trailing days-of-month "1".
|
|
if (match(line, /[A-Za-z] 1$/))
|
|
line = substr(line, 1, RSTART)
|
|
|
|
# Remove unnecessary trailing " Ja" (for January).
|
|
sub(/ Ja$/, "", line)
|
|
|
|
n = split(line, field)
|
|
|
|
# Abbreviate rule names.
|
|
i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
|
|
if (i && field[i] ~ /^[^-+0-9]/) {
|
|
if (!rule[field[i]])
|
|
rule[field[i]] = gen_rule_name()
|
|
field[i] = rule[field[i]]
|
|
}
|
|
|
|
# If this zone supersedes an earlier one, delete the earlier one
|
|
# from the saved output lines.
|
|
startdef = ""
|
|
if (field[1] == "Z")
|
|
zonename = startdef = field[2]
|
|
else if (field[1] == "Li")
|
|
zonename = startdef = field[3]
|
|
else if (field[1] == "R")
|
|
zonename = ""
|
|
if (startdef) {
|
|
i = zonedef[startdef]
|
|
if (i) {
|
|
do
|
|
output_line[i - 1] = ""
|
|
while (output_line[i++] ~ /^[-+0-9]/);
|
|
}
|
|
}
|
|
zonedef[zonename] = nout + 1
|
|
|
|
# Save the line for later output.
|
|
line = field[1]
|
|
for (i = 2; i <= n; i++)
|
|
line = line " " field[i]
|
|
output_line[nout++] = line
|
|
}
|
|
|
|
function output_saved_lines(i)
|
|
{
|
|
for (i = 0; i < nout; i++)
|
|
if (output_line[i])
|
|
print output_line[i]
|
|
}
|
|
|
|
BEGIN {
|
|
print "# version", version
|
|
print "# This zic input file is in the public domain."
|
|
}
|
|
|
|
/^[\f\r\t\v ]*[^#\f\r\t\v ]/ {
|
|
process_input_line($0)
|
|
}
|
|
|
|
END {
|
|
output_saved_lines()
|
|
}
|