freebsd-dev/ziguard.awk

317 lines
9.2 KiB
Awk
Raw Normal View History

2018-03-24 04:41:49 +00:00
# Convert tzdata source into vanguard or rearguard form.
# Contributed by Paul Eggert. This file is in the public domain.
# This is not a general-purpose converter; it is designed for current tzdata.
2020-10-17 00:41:14 +00:00
# It just converts from current source to main, vanguard, and rearguard forms.
# Although it might be nice for it to be idempotent, or to be useful
# for converting back and forth between vanguard and rearguard formats,
# it does not do these nonessential tasks now.
#
# Although main and vanguard forms are currently equivalent,
2022-08-11 02:23:48 +00:00
# this need not always be the case. When the two forms differ,
# this script can convert either from main to vanguard form (needed then),
# or from vanguard to main form (this conversion would be needed later,
# after main became rearguard and vanguard became main).
# There is no need to convert rearguard to other forms.
2018-03-24 04:41:49 +00:00
#
# When converting to vanguard form, the output can use negative SAVE
# values.
#
# When converting to rearguard form, the output uses only nonnegative
# SAVE values. The idea is for the output data to simulate the behavior
# of the input data as best it can within the constraints of the
# rearguard format.
2022-08-11 02:23:48 +00:00
# Given a FIELD like "-0:30", return a minute count like -30.
function get_minutes(field, \
sign, hours, minutes)
{
sign = field ~ /^-/ ? -1 : 1
hours = +field
if (field ~ /:/) {
minutes = field
sub(/[^:]*:/, "", minutes)
}
return 60 * hours + sign * minutes
}
# Given an OFFSET, which is a minute count like 300 or 330,
# return a %z-style abbreviation like "+05" or "+0530".
function offset_abbr(offset, \
hours, minutes, sign)
{
hours = int(offset / 60)
minutes = offset % 60
if (minutes) {
return sprintf("%+.4d", hours * 100 + minutes);
} else {
return sprintf("%+.2d", hours)
}
}
# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
function round_to_second(timestamp, \
hh, mm, ss, seconds, dot_dddd, subseconds)
{
dot_dddd = timestamp
if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
return timestamp
hh = mm = ss = timestamp
sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
sub(/^[-+]?[0-9]+:/, "", mm)
sub(/^[-+]?/, "", hh)
seconds = 3600 * hh + 60 * mm + ss
subseconds = +dot_dddd
seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
seconds / 3600, seconds / 60 % 60, seconds % 60)
}
2018-03-24 04:41:49 +00:00
BEGIN {
2018-05-04 10:10:21 +00:00
dataform_type["vanguard"] = 1
dataform_type["main"] = 1
dataform_type["rearguard"] = 1
2018-03-24 04:41:49 +00:00
2022-08-11 02:23:48 +00:00
if (PACKRATLIST) {
while (getline <PACKRATLIST) {
if ($0 ~ /^#/) continue
packratlist[$3] = 1
}
}
2018-05-04 10:10:21 +00:00
# The command line should set DATAFORM.
if (!dataform_type[DATAFORM]) exit 1
2022-08-11 02:23:48 +00:00
}
$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
2018-03-24 04:41:49 +00:00
}
/^Zone/ { zone = $2 }
2018-05-04 10:10:21 +00:00
DATAFORM != "main" {
2022-08-16 02:34:28 +00:00
in_comment = $0 ~ /^#/
2018-05-04 10:10:21 +00:00
uncomment = comment_out = 0
2020-10-17 00:41:14 +00:00
# If this line should differ due to Czechoslovakia using negative SAVE values,
2018-05-04 10:10:21 +00:00
# uncomment the desired version and comment out the undesired one.
2022-08-16 02:34:28 +00:00
if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
&& $0 ~ /1947 Feb 23/) {
2022-08-11 02:23:48 +00:00
if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
2018-05-04 10:10:21 +00:00
uncomment = in_comment
} else {
comment_out = !in_comment
}
}
2018-03-24 04:41:49 +00:00
# If this line should differ due to Ireland using negative SAVE values,
# uncomment the desired version and comment out the undesired one.
2022-08-16 02:34:28 +00:00
Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
2018-03-24 04:41:49 +00:00
Zone_Dublin_post_1968 \
2022-08-16 02:34:28 +00:00
= (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
2018-03-24 04:41:49 +00:00
&& (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
if (Rule_Eire || Zone_Dublin_post_1968) {
if ((Rule_Eire \
|| (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
2022-08-11 02:23:48 +00:00
== (DATAFORM != "rearguard")) {
2018-05-04 10:10:21 +00:00
uncomment = in_comment
} else {
comment_out = !in_comment
2018-03-24 04:41:49 +00:00
}
}
2018-05-04 10:10:21 +00:00
2018-12-30 08:13:51 +00:00
# If this line should differ due to Namibia using negative SAVE values,
2018-05-04 10:10:21 +00:00
# uncomment the desired version and comment out the undesired one.
2022-08-16 02:34:28 +00:00
Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
2018-05-04 10:10:21 +00:00
Zone_using_Namibia_rule \
2022-08-16 02:34:28 +00:00
= (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
2018-05-04 10:10:21 +00:00
&& ($(in_comment + 2) == "Namibia" \
2020-12-30 04:45:24 +00:00
|| ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
&& ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
|| in_comment + 3 == NF))))
2018-05-04 10:10:21 +00:00
if (Rule_Namibia || Zone_using_Namibia_rule) {
2022-08-11 02:23:48 +00:00
if ((Rule_Namibia \
? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
: $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
== (DATAFORM != "rearguard")) {
uncomment = in_comment
} else {
comment_out = !in_comment
}
}
# If this line should differ due to Portugal benefiting from %z if supported,
# uncomment the desired version and comment out the undesired one.
2022-08-16 02:34:28 +00:00
if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
2018-05-04 10:10:21 +00:00
uncomment = in_comment
} else {
comment_out = !in_comment
}
}
if (uncomment) {
sub(/^#/, "")
}
if (comment_out) {
sub(/^/, "#")
}
2018-10-28 23:50:24 +00:00
2022-08-11 02:23:48 +00:00
# Prefer %z in vanguard form, explicit abbreviations otherwise.
if (DATAFORM == "vanguard") {
sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
"&CHANGE-TO-%z")
sub(/-00CHANGE-TO-%z/, "-00")
sub(/[-+][^\t ]+CHANGE-TO-/, "")
} else {
2022-08-16 02:34:28 +00:00
if ($0 ~ /^[^#]*%z/) {
stdoff_column = 2 * ($0 ~ /^Zone/) + 1
2022-08-11 02:23:48 +00:00
rules_column = stdoff_column + 1
stdoff = get_minutes($stdoff_column)
rules = $rules_column
stdabbr = offset_abbr(stdoff)
if (rules == "-") {
abbr = stdabbr
} else {
dstabbr_only = rules ~ /^[+0-9-]/
if (dstabbr_only) {
dstoff = get_minutes(rules)
} else {
# The DST offset is normally an hour, but there are special cases.
if (rules == "Morocco" && NF == 3) {
dstoff = -60
} else if (rules == "NBorneo") {
dstoff = 20
} else if (((rules == "Cook" || rules == "LH") && NF == 3) \
|| (rules == "Uruguay" \
2022-08-16 02:34:28 +00:00
&& $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
2022-08-11 02:23:48 +00:00
dstoff = 30
2022-08-16 02:34:28 +00:00
} else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
2022-08-11 02:23:48 +00:00
dstoff = 90
} else {
dstoff = 60
}
}
dstabbr = offset_abbr(stdoff + dstoff)
if (dstabbr_only) {
abbr = dstabbr
} else {
abbr = stdabbr "/" dstabbr
}
}
sub(/%z/, abbr)
}
2018-10-28 23:50:24 +00:00
}
2018-12-30 08:13:51 +00:00
2022-08-11 02:23:48 +00:00
# Normally, prefer whole seconds. However, prefer subseconds
# if generating vanguard form and the otherwise-undocumented
# VANGUARD_SUBSECONDS environment variable is set.
# This relies on #STDOFF comment lines in the data.
# It is for hypothetical clients that support UT offsets that are
# not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
# No known clients need this currently, and this experimental
# feature may be changed or withdrawn in future releases.
if ($1 == "#STDOFF") {
stdoff = $2
rounded_stdoff = round_to_second(stdoff)
if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
stdoff_subst[0] = rounded_stdoff
stdoff_subst[1] = stdoff
} else {
stdoff_subst[0] = stdoff
stdoff_subst[1] = rounded_stdoff
}
} else if (stdoff_subst[0]) {
2022-08-16 02:34:28 +00:00
stdoff_column = 2 * ($0 ~ /^Zone/) + 1
2022-08-11 02:23:48 +00:00
stdoff_column_val = $stdoff_column
if (stdoff_column_val == stdoff_subst[0]) {
sub(stdoff_subst[0], stdoff_subst[1])
} else if (stdoff_column_val != stdoff_subst[1]) {
stdoff_subst[0] = 0
}
2018-12-30 08:13:51 +00:00
}
2022-08-11 02:23:48 +00:00
# In rearguard form, change the Japan rule line with "Sat>=8 25:00"
# to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
2022-08-16 02:34:28 +00:00
if ($0 ~ /^Rule/ && $2 == "Japan") {
2022-08-11 02:23:48 +00:00
if (DATAFORM == "rearguard") {
if ($7 == "Sat>=8" && $8 == "25:00") {
sub(/Sat>=8/, "Sun>=9")
sub(/25:00/, " 1:00")
}
2018-12-30 08:13:51 +00:00
} else {
2022-08-11 02:23:48 +00:00
if ($7 == "Sun>=9" && $8 == "1:00") {
sub(/Sun>=9/, "Sat>=8")
sub(/ 1:00/, "25:00")
}
2018-12-30 08:13:51 +00:00
}
}
2022-08-11 02:23:48 +00:00
# In rearguard form, change the Morocco lines with negative SAVE values
# to use positive SAVE values.
if ($2 == "Morocco") {
2022-08-16 02:34:28 +00:00
if ($0 ~ /^Rule/) {
2022-08-11 02:23:48 +00:00
if ($4 ~ /^201[78]$/ && $6 == "Oct") {
if (DATAFORM == "rearguard") {
sub(/\t2018\t/, "\t2017\t")
} else {
sub(/\t2017\t/, "\t2018\t")
}
}
if (2019 <= $3) {
if ($8 == "2:00") {
if (DATAFORM == "rearguard") {
sub(/\t0\t/, "\t1:00\t")
} else {
sub(/\t1:00\t/, "\t0\t")
}
} else {
if (DATAFORM == "rearguard") {
sub(/\t-1:00\t/, "\t0\t")
} else {
sub(/\t0\t/, "\t-1:00\t")
}
}
}
}
if ($1 ~ /^[+0-9-]/ && NF == 3) {
if (DATAFORM == "rearguard") {
sub(/1:00\tMorocco/, "0:00\tMorocco")
sub(/\t\+01\/\+00$/, "\t+00/+01")
} else {
sub(/0:00\tMorocco/, "1:00\tMorocco")
sub(/\t\+00\/+01$/, "\t+01/+00")
}
}
2018-12-30 08:13:51 +00:00
}
2018-03-24 04:41:49 +00:00
}
2022-08-11 02:23:48 +00:00
/^Zone/ {
packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
}
2022-08-16 02:34:28 +00:00
{
if (packrat_ignored && $0 !~ /^Rule/) {
sub(/^/, "#")
}
2022-08-11 02:23:48 +00:00
}
2021-09-26 06:35:07 +00:00
# If a Link line is followed by a Link or Zone line for the same data, comment
2018-03-24 04:41:49 +00:00
# out the Link line. This can happen if backzone overrides a Link
2021-09-26 06:35:07 +00:00
# with a Zone or a different Link.
2018-03-24 04:41:49 +00:00
/^Zone/ {
sub(/^Link/, "#Link", line[linkline[$2]])
}
2021-09-26 06:35:07 +00:00
/^Link/ {
sub(/^Link/, "#Link", line[linkline[$3]])
linkline[$3] = NR
}
2018-03-24 04:41:49 +00:00
{ line[NR] = $0 }
END {
for (i = 1; i <= NR; i++)
print line[i]
}