diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile index 2b5aa55d189a..bac5c3e7f63b 100644 --- a/tools/tools/locale/Makefile +++ b/tools/tools/locale/Makefile @@ -22,6 +22,23 @@ KNOWN= monetdef numericdef msgdef timedef colldef ctypedef TYPES?= ${KNOWN} LOCALE_DESTDIR?= /tmp/generated-locales/ +COLLATION_SPECIAL?= \ + cs_CZ ISO8859-2 \ + da_DK ISO8859-1 \ + da_DK ISO8859-15 \ + hr_HR ISO8859-2 \ + hu_HU ISO8859-2 \ + nb_NO ISO8859-1 \ + nb_NO ISO8859-15 \ + sk_SK ISO8859-2 \ + zh_Hans_CN GB2312 \ + zh_Hans_CN eucCN \ + +.for area enc in ${COLLATION_SPECIAL} +COLLATIONS_SPECIAL_ENV+= ${area}.${enc} +.endfor +PASSON+= COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" + .if defined(LC) LC:= --lc=${LC} .endif @@ -55,17 +72,26 @@ post-install: .endfor .for t in ${TYPES} -build-${t}: +gen-${t}: mkdir -p ${t} ${t}.draft perl -I tools tools/cldr2def.pl \ --cldr=$$(realpath ${CLDRDIR}) \ --unidata=$$(realpath ${UNIDATADIR}) \ --etc=$$(realpath ${ETCDIR}) \ --type=${t} ${LC} + +build-${t}: gen-${t} env ${PASSON} tools/finalize ${t} .endfor -build-ctypedef: transfer-rollup +gen-ctypedef: transfer-rollup +static-colldef: gen-colldef +build-colldef: static-colldef + +static-colldef: +.for area enc in ${COLLATION_SPECIAL} + awk -f tools/extract-colldef.awk ${CLDRDIR}/posix/${area}.${enc}.src > colldef/${area}.${enc}.src +.endfor transfer-rollup: cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src @@ -93,12 +119,34 @@ BASE_LOCALES_OF_INTEREST?= \ uk_UA \ kk_Cyrl_KZ mn_Cyrl_MN sr_Cyrl_RS sr_Latn_RS \ zh_Hans_CN zh_Hant_HK zh_Hant_TW \ - \ - \ bn_IN gu_IN or_IN ta_IN te_IN kn_IN ml_IN si_LK \ th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \ km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN +ENCODINGS= Big5 \ + CP1251 \ + CP866 \ + CP949 \ + eucCN \ + eucJP \ + eucKR \ + GB18030 \ + GB2312 \ + GBK \ + ISO8859-1 \ + ISO8859-13 \ + ISO8859-15 \ + ISO8859-2 \ + ISO8859-5 \ + ISO8859-7 \ + ISO8859-9 \ + KOI8-R \ + KOI8-U \ + SJIS \ + US-ASCII \ + UTF-8 \ + + POSIX: .if exists (${CLDRDIR}/tools/java/cldr.jar) mkdir -p ${CLDRDIR}/posix @@ -109,11 +157,20 @@ POSIX: -d ${CLDRDIR}/posix -m ${area} -c UTF-8 . endif . endfor -. if !exists(${CLDRDIR}/posix/UTF-8.cm) +. for area encoding in ${COLLATION_SPECIAL} +. if !exists(${CLDRDIR}/posix/${area}.${encoding}.src) + java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \ + org.unicode.cldr.posix.GeneratePOSIX \ + -d ${CLDRDIR}/posix -m ${area} -c ${encoding} +. endif +. endfor +. for enc in ${ENCODINGS} +. if !exists(${CLDRDIR}/posix/${enc}.cm) java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \ org.unicode.cldr.posix.GenerateCharmap \ - -d ${CLDRDIR}/posix + -d ${CLDRDIR}/posix -c ${enc} . endif +. endfor .else @echo "Please install CLDR toolset for the desired release" @echo "It should go at ${CLDRDIR}/tools" diff --git a/tools/tools/locale/etc/charmaps.xml b/tools/tools/locale/etc/charmaps.xml index 0b6551aee55f..e0d39b25d576 100644 --- a/tools/tools/locale/etc/charmaps.xml +++ b/tools/tools/locale/etc/charmaps.xml @@ -185,10 +185,6 @@ family="Hans" encoding="GB18030 GB2312 GBK eucCN" countries="CN" /> - - - - - - - - - - - - - - - - - - - - - @@ -516,11 +512,11 @@ cldr="CJK UNIFIED IDEOGRAPH-706B" ucc="706B" /> - - - diff --git a/tools/tools/locale/etc/charmaps/charmaps.txt b/tools/tools/locale/etc/charmaps/charmaps.txt index a0791f7f9567..d8f8bb8190b3 100644 --- a/tools/tools/locale/etc/charmaps/charmaps.txt +++ b/tools/tools/locale/etc/charmaps/charmaps.txt @@ -8,7 +8,6 @@ haible.de: http://haible.de/bruno/charsets/conversion-tables/ ARMSCII-8 haible.de: Armenian.html Big5 unicodeorg: OBSOLETE/EASTASIA/OTHER - Big5HKSCS haible.de: BIG5-HKSCS.html / CP1131 haible.de: CP1131.html / aix-4.3.2/IBM-1131.TXT CP1251 unicode.org: VENDORS/MICSFT/WINDOWS CP866 unicode.org: VENDORS/MICSFT/PC diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl index fae7c91b4273..3f61bb127402 100755 --- a/tools/tools/locale/tools/cldr2def.pl +++ b/tools/tools/locale/tools/cldr2def.pl @@ -808,14 +808,24 @@ sub make_makefile { my $SRCOUT; my $SRCOUT2; my $SRCOUT3 = ""; + my $SRCOUT4 = ""; my $MAPLOC; if ($TYPE eq "colldef") { $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . - "\t-f \${MAPLOC}/map.UTF-8 " . + "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E} " . "\${.OBJDIR}/\${.IMPSRC:T:R}"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . "locale/etc/final-maps\n"; $SRCOUT2 = "LC_COLLATE"; + $SRCOUT3 = "" . + ".for f t in \${LOCALES_MAPPED}\n" . + "FILES+=\t\$t.LC_COLLATE\n" . + "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . + "\tlocaledef -D -U -i \${.ALLSRC} \\\n" . + "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E} \\\n" . + "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . + ".endfor\n\n"; + $SRCOUT4 = "## LOCALES_MAPPED\n"; } elsif ($TYPE eq "ctypedef") { $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . @@ -855,6 +865,8 @@ ${MAPLOC} ## PLACEHOLDER +${SRCOUT4} + EOF foreach my $hash (keys(%hashtable)) { diff --git a/tools/tools/locale/tools/convert_map.pl b/tools/tools/locale/tools/convert_map.pl index e5381f3f3dcc..88222531d064 100755 --- a/tools/tools/locale/tools/convert_map.pl +++ b/tools/tools/locale/tools/convert_map.pl @@ -1,5 +1,7 @@ #! /usr/local/bin/perl # +# $FreeBSD$ +# # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version @@ -167,7 +169,6 @@ elsif ($codeset eq "eucKR") { $max_mb = 2 } elsif ($codeset eq "GBK") { $max_mb = 2 } elsif ($codeset eq "GB2312") { $max_mb = 2 } elsif ($codeset eq "Big5") { $max_mb = 2 } -elsif ($codeset eq "Big5HKSCS") { $max_mb = 2 } else { $max_mb = 1 }; print(" \"$codeset\"\n"); print(" 1\n"); diff --git a/tools/tools/locale/tools/extract-colldef.awk b/tools/tools/locale/tools/extract-colldef.awk new file mode 100644 index 000000000000..3f2924922fc1 --- /dev/null +++ b/tools/tools/locale/tools/extract-colldef.awk @@ -0,0 +1,18 @@ +# $FreeBSD$ + +BEGIN { + print "# Warning: Do not edit. This is automatically extracted" + print "# from CLDR project data, obtained from http://cldr.unicode.org/" + print "# -----------------------------------------------------------------------------" +} +$1 == "comment_char" { print $0 } +$1 == "escape_char" { print $0 } +$1 == "LC_COLLATE" { + print $0 + while (getline line) { + print line + if (line == "END LC_COLLATE") { + break + } + } +} diff --git a/tools/tools/locale/tools/finalize b/tools/tools/locale/tools/finalize index 7ce3e74bb6ec..b32c52c21d6d 100755 --- a/tools/tools/locale/tools/finalize +++ b/tools/tools/locale/tools/finalize @@ -26,12 +26,15 @@ new=${base}/../${1} TEMP=/tmp/${1}.locales TEMP2=/tmp/${1}.hashes TEMP3=/tmp/${1}.symlinks +TEMP4=/tmp/${1}.mapped FULLMAP=/tmp/utf8-map FULLEXTRACT=/tmp/extracted-names AWKCMD="/## PLACEHOLDER/ { \ while ( getline line < \"${TEMP}\" ) {print line} } \ /## SYMPAIRS/ { \ while ( getline line < \"${TEMP3}\" ) {print line} } \ + /## LOCALES_MAPPED/ { \ + while ( getline line < \"${TEMP4}\" ) {print line} } \ !/## / { print \$0 }" grep '^LOCALES+' ${old}/Makefile > ${TEMP} @@ -51,21 +54,23 @@ then /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${CLDRDIR}/posix/UTF-8.cm \ > ${base}/../etc/final-maps/map.UTF-8 - CHARMAPS="ARMSCII-8 Big5 Big5HKSCS CP1131 CP1251 \ + /usr/bin/sed -E -e 's/[ ]+/ /g' \ + ${CLDRDIR}/posix/eucCN.cm \ + > ${base}/../etc/final-maps/map.eucCN + /usr/bin/sed -E -e 's/[ ]+/ /g' \ + ${CLDRDIR}/posix/eucCN.cm \ + > ${base}/../etc/final-maps/map.GB2312 + CHARMAPS="ARMSCII-8 Big5 CP1131 CP1251 \ CP866 GB2312 GBK ISCII-DEV ISO8859-1 \ ISO8859-13 ISO8859-15 ISO8859-2 ISO8859-4 \ ISO8859-5 ISO8859-7 ISO8859-9 KOI8-R KOI8-U \ - PT154 SJIS US-ASCII eucCN eucJP eucKR" + PT154 SJIS US-ASCII eucJP eucKR" # GB18030 blows up, use pre-generate Illumos version for map in ${CHARMAPS} do encoding=${map} - if [ ${map} = "Big5HKSCS" ] - then - encoding="Big5" - fi /usr/local/bin/perl ${base}/convert_map.pl \ ${base}/../etc/charmaps/${map}.TXT ${encoding} \ | /usr/bin/sed -E -e 's/ +/ /g' \ @@ -73,6 +78,31 @@ then echo map ${map} converted. done +elif [ $1 = "colldef" ] +then + awk -v tmp4=${TEMP4} '$1 == "SAME+=" && $0 !~ /legacy/ { + orig=$2 + dest=$3 + gsub(/.*\./, "", orig) + gsub(/.*\./, "", dest) + if (orig != dest ) + print "LOCALES_MAPPED+=\t"$2 " "$3 > tmp4 + }' ${old}/Makefile + + for line in $(awk '{ print $3 }' ${TEMP4}); do + sed -i '' "/^SAME.*$line$/d" ${old}/Makefile + done + echo "" >> ${TEMP4} + for enc in ${COLLATIONS_SPECIAL}; do + sed -i '' "/^.*${enc}$/d" ${TEMP4} + echo "LOCALES+= ${enc}" >> ${TEMP4} + done + + keep=$(cat ${TEMP} | awk '{ print $2 }') + for original in ${keep} + do + cp ${old}/${original}.src ${new}/ + done else # below is everything but ctypedef keep=$(cat ${TEMP} | awk '{ print $2 }') @@ -85,4 +115,4 @@ fi grep -v '^LOCALES+' ${old}/Makefile | awk "${AWKCMD}" > ${new}/Makefile -rm -f ${TEMP} ${TEMP3} +rm -f ${TEMP} ${TEMP3} ${TEMP4}