Synchronize locale generation tools with dragonfly

generates the makefiles and the sources
This commit is contained in:
Baptiste Daroussin 2015-10-13 20:21:52 +00:00
parent becbad1f6e
commit 4a707b2112
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/collation/; revision=289260
7 changed files with 7644 additions and 81 deletions

View File

@ -27,6 +27,7 @@ LC:= --lc=${LC}
.endif
all:
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src
.for t in ${TYPES}
. if ${KNOWN:M${t}}
test -d ${t} || mkdir ${t}
@ -64,13 +65,24 @@ build-${t}:
env ${PASSON} tools/finalize ${t}
.endfor
build-ctypedef: transfer-rollup
transfer-rollup:
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src
rollup:
perl -I tools tools/utf8-rollup.pl \
--cldr=$$(realpath ${CLDRDIR}) \
--etc=$$(realpath ${ETCDIR})
clean:
.for t in ${TYPES}
rm -rf ${t} ${t}.draft
.endfor
BASE_LOCALES_OF_INTEREST?= \
af_ZA am_ET be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \
af_ZA am_ET ar_AE ar_EG ar_JO ar_MA ar_QA ar_SA \
be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \
cs_CZ da_DK de_AT de_CH de_DE el_GR en_AU en_CA \
en_GB en_HK en_IE en_NZ en_PH en_SG en_US en_ZA \
es_AR es_CR es_ES es_MX et_EE eu_ES fi_FI fr_BE \
@ -80,7 +92,12 @@ BASE_LOCALES_OF_INTEREST?= \
ru_RU se_FI se_NO sk_SK sl_SI sv_FI sv_SE tr_TR \
uk_UA \
kk_Cyrl_KZ mn_Cyrl_MN sr_Cyrl_RS sr_Latn_RS \
zh_Hans_CN zh_Hant_HK zh_Hant_TW
zh_Hans_CN zh_Hant_HK zh_Hant_TW \
\
\
bn_IN gu_IN or_IN ta_IN te_IN kn_IN ml_IN si_LK \
th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \
km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN
POSIX:
.if exists (${CLDRDIR}/tools/java/cldr.jar)

View File

@ -28,10 +28,12 @@
-->
<language name="af"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-1"
countries="ZA" />
<language name="am"
countries="ET" /> <!-- UTF-8 only -->
<language name="ar"
countries="AE EG JO MA QA SA" />
<language name="be"
encoding="CP1131 CP1251 ISO8859-5"
countries="BY" />
@ -40,46 +42,57 @@
countries="BG" />
<language name="ca"
fallback="ca_ES"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="AD ES FR IT" /> <!-- only ca_ES defined -->
<language name="cs"
encoding="ISO8859-2"
countries="CZ" />
<language name="da"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="DK" />
<language name="de"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="AT CH DE" />
<language name="el"
encoding="ISO8859-7"
countries="GR" />
<language name="en"
encoding="ISO8859-1 ISO8859-15 US-ASCII"
countries="AU CA GB NZ US ZA" />
encoding="ISO8859-15 US-ASCII"
countries="GB" />
<language name="en"
encoding="ISO8859-1 US-ASCII"
countries="AU CA NZ US ZA" />
<language name="en"
encoding="ISO8859-15"
countries="IE" />
<language name="en"
encoding="ISO8859-1"
countries="HK PH SG" />
countries="HK SG" />
<language name="en"
countries="PH" /> <!-- UTF-8 only -->
<language name="es"
encoding="ISO8859-1 ISO8859-15"
countries="CR" /> <!-- UTF-8 only -->
<language name="es"
encoding="ISO8859-15"
countries="ES" />
<language name="es"
encoding="ISO8859-1"
countries="AR CR MX" />
countries="AR MX" />
<language name="et"
encoding="ISO8859-15"
countries="EE" />
<language name="eu"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="ES" />
<language name="fi"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="FI" />
<language name="fr"
encoding="ISO8859-1 ISO8859-15"
countries="BE CA CH FR" />
encoding="ISO8859-15"
countries="BE CH FR" />
<language name="fr"
encoding="ISO8859-1"
countries="CA" />
<language name="he"
countries="IL" />
<language name="hi"
@ -95,10 +108,10 @@
encoding="ARMSCII-8"
countries="AM" />
<language name="is"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="IS" />
<language name="it"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="CH IT" />
<language name="ja"
encoding="SJIS eucJP"
@ -111,7 +124,7 @@
encoding_link="eucKR:CP949"
countries="KR" />
<language name="lt"
encoding="ISO8859-4 ISO8859-13"
encoding="ISO8859-13"
countries="LT" />
<language name="lv"
encoding="ISO8859-13"
@ -120,20 +133,23 @@
family="Cyrl"
countries="MN" />
<language name="nb"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="NO" />
<language name="nl"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="BE NL" />
<language name="nn"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="NO" />
<language name="pl"
encoding="ISO8859-2"
countries="PL" />
<language name="pt"
encoding="ISO8859-1 ISO8859-15"
countries="PT BR" />
encoding="ISO8859-15"
countries="PT" />
<language name="pt"
encoding="ISO8859-1"
countries="BR" />
<language name="ro"
encoding="ISO8859-2"
countries="RO" />
@ -157,7 +173,7 @@
encoding="ISO8859-5"
countries="RS" />
<language name="sv"
encoding="ISO8859-1 ISO8859-15"
encoding="ISO8859-15"
countries="SE FI" />
<language name="tr"
encoding="ISO8859-9"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,989 @@
******* REMAINING DEFINITIONS ARE MANUALLY ASSEMBLED *******
blank <NO-BREAK_SPACE>
digit <SUPERSCRIPT_TWO>;<SUPERSCRIPT_THREE>;<SUPERSCRIPT_ONE>
punct <INVERTED_EXCLAMATION_MARK>;...;<COPYRIGHT_SIGN>;/
<LEFT-POINTING_DOUBLE_ANGLE_QUOTATION_MARK>;...;<PLUS-MINUS_SIGN>;/
<ACUTE_ACCENT>;/
<PILCROW_SIGN>;...;<CEDILLA>;/
<RIGHT-POINTING_DOUBLE_ANGLE_QUOTATION_MARK>;...;<INVERTED_QUESTION_MARK>
number <VULGAR_FRACTION_ONE_QUARTER>;...;<VULGAR_FRACTION_THREE_QUARTERS>
cntrl <CONTROL-0080>;...;<APPLICATION_PROGRAM_COMMAND>
graph <INVERTED_EXCLAMATION_MARK>;...;<INVERTED_QUESTION_MARK>
punct <DIVISION_SIGN>;<MULTIPLICATION_SIGN>
graph <DIVISION_SIGN>;<MULTIPLICATION_SIGN>
**********************************************************************
* Complete set of "special" characters
**********************************************************************
special <EXCLAMATION_MARK>;...;<SOLIDUS>;/
<COLON>;...;<COMMERCIAL_AT>;/
<LEFT_SQUARE_BRACKET>;...;<GRAVE_ACCENT>;/
<LEFT_CURLY_BRACKET>;...;<TILDE>;/
<INVERTED_EXCLAMATION_MARK>;...;<ACUTE_ACCENT>;/
<MIDDLE_DOT>;...;<INVERTED_QUESTION_MARK>;/
<MULTIPLICATION_SIGN>;/
<DIVISION_SIGN>;/
<HYPHEN>;...;<HYPHENATION_POINT>;/
<PER_MILLE_SIGN>;...;<VERTICAL_FOUR_DOTS>
**********************************************************************
* Supplement generated sections with "number" classification
**********************************************************************
digit <ARABIC-INDIC_DIGIT_ZERO>;...;<ARABIC-INDIC_DIGIT_NINE>
digit <EXTENDED_ARABIC-INDIC_DIGIT_ZERO>;...;<EXTENDED_ARABIC-INDIC_DIGIT_NINE>
digit <DEVANAGARI_DIGIT_ZERO>;...;<DEVANAGARI_DIGIT_NINE>
digit <BENGALI_DIGIT_ZERO>;...;<BENGALI_DIGIT_NINE>
number <BENGALI_CURRENCY_NUMERATOR_ONE>;...;<BENGALI_CURRENCY_DENOMINATOR_SIXTEEN>
digit <GURMUKHI_DIGIT_ZERO>;...;<GURMUKHI_DIGIT_NINE>
digit <GUJARATI_DIGIT_ZERO>;...;<GUJARATI_DIGIT_NINE>
digit <ORIYA_DIGIT_ZERO>;...;<ORIYA_DIGIT_NINE>
digit <TAMIL_DIGIT_ZERO>;...;<TAMIL_DIGIT_NINE>
number <TAMIL_NUMBER_TEN>;...;<TAMIL_NUMBER_ONE_THOUSAND>
digit <TELUGU_DIGIT_ZERO>;...;<TELUGU_DIGIT_NINE>
number <TELUGU_FRACTION_DIGIT_ZERO_FOR_ODD_POWERS_OF_FOUR>;...;<TELUGU_FRACTION_DIGIT_THREE_FOR_EVEN_POWERS_OF_FOUR>
digit <KANNADA_DIGIT_ZERO>;...;<KANNADA_DIGIT_NINE>
digit <MALAYALAM_DIGIT_ZERO>;...;<MALAYALAM_DIGIT_NINE>
number <MALAYALAM_NUMBER_TEN>;...;<MALAYALAM_FRACTION_THREE_QUARTERS>
digit <THAI_DIGIT_ZERO>;...;<THAI_DIGIT_NINE>
digit <LAO_DIGIT_ZERO>;...;<LAO_DIGIT_NINE>
digit <TIBETAN_DIGIT_ZERO>;...;<TIBETAN_DIGIT_NINE>
number <TIBETAN_DIGIT_HALF_ONE>;...;<TIBETAN_DIGIT_HALF_ZERO>
digit <MYANMAR_DIGIT_ZERO>;...;<MYANMAR_DIGIT_NINE>
digit <MYANMAR_SHAN_DIGIT_ZERO>;...;<MYANMAR_SHAN_DIGIT_NINE>
digit <ETHIOPIC_DIGIT_ONE>;...;<ETHIOPIC_DIGIT_NINE>
number <ETHIOPIC_NUMBER_TEN>;...;<ETHIOPIC_NUMBER_TEN_THOUSAND>
digit <KHMER_DIGIT_ZERO>;...;<KHMER_DIGIT_NINE>
number <KHMER_SYMBOL_LEK_ATTAK_SON>;...;<KHMER_SYMBOL_LEK_ATTAK_PRAM-BUON>
digit <VAI_DIGIT_ZERO>;...;<VAI_DIGIT_NINE>
number <ROMAN_NUMERAL_ONE>;...;<ROMAN_NUMERAL_TEN_THOUSAND>
number <ROMAN_NUMERAL_SIX_LATE_FORM>;...;<ROMAN_NUMERAL_ONE_HUNDRED_THOUSAND>
number <PARENTHESIZED_IDEOGRAPH_ONE>;...;<PARENTHESIZED_IDEOGRAPH_TEN>
number <CIRCLED_NUMBER_TEN_ON_BLACK_SQUARE>;...;<CIRCLED_NUMBER_EIGHTY_ON_BLACK_SQUARE>
number <CIRCLED_NUMBER_TWENTY_ONE>;...;<CIRCLED_NUMBER_THIRTY_FIVE>
number <CIRCLED_IDEOGRAPH_ONE>;...;<CIRCLED_IDEOGRAPH_TEN>
number <CIRCLED_NUMBER_THIRTY_SIX>;...;<CIRCLED_NUMBER_FIFTY>
number <CJK_UNIFIED_IDEOGRAPH-3405>;/
<CJK_UNIFIED_IDEOGRAPH-3483>;/
<CJK_UNIFIED_IDEOGRAPH-382A>;/
<CJK_UNIFIED_IDEOGRAPH-3B4D>;/
<CJK_UNIFIED_IDEOGRAPH-4E00>;/
<CJK_UNIFIED_IDEOGRAPH-4E03>;/
<CJK_UNIFIED_IDEOGRAPH-4E07>;/
<CJK_UNIFIED_IDEOGRAPH-4E09>;/
<CJK_UNIFIED_IDEOGRAPH-4E5D>;/
<CJK_UNIFIED_IDEOGRAPH-4E8C>;/
<CJK_UNIFIED_IDEOGRAPH-4E94>;/
<CJK_UNIFIED_IDEOGRAPH-4E96>;/
<CJK_UNIFIED_IDEOGRAPH-4EBF>;/
<CJK_UNIFIED_IDEOGRAPH-4EC0>;/
<CJK_UNIFIED_IDEOGRAPH-4EDF>;/
<CJK_UNIFIED_IDEOGRAPH-4EE8>;/
<CJK_UNIFIED_IDEOGRAPH-4F0D>;/
<CJK_UNIFIED_IDEOGRAPH-4F70>;/
<CJK_UNIFIED_IDEOGRAPH-5104>;/
<CJK_UNIFIED_IDEOGRAPH-5146>;/
<CJK_UNIFIED_IDEOGRAPH-5169>;/
<CJK_UNIFIED_IDEOGRAPH-516B>;/
<CJK_UNIFIED_IDEOGRAPH-516D>;/
<CJK_UNIFIED_IDEOGRAPH-5341>;/
<CJK_UNIFIED_IDEOGRAPH-5343>;...;<CJK_UNIFIED_IDEOGRAPH-5345>;/
<CJK_UNIFIED_IDEOGRAPH-534C>;/
<CJK_UNIFIED_IDEOGRAPH-53C1>;...;<CJK_UNIFIED_IDEOGRAPH-53C4>;/
<CJK_UNIFIED_IDEOGRAPH-56DB>;/
<CJK_UNIFIED_IDEOGRAPH-58F1>;/
<CJK_UNIFIED_IDEOGRAPH-58F9>;/
<CJK_UNIFIED_IDEOGRAPH-5E7A>;/
<CJK_UNIFIED_IDEOGRAPH-5EFE>;/
<CJK_UNIFIED_IDEOGRAPH-5EFF>;/
<CJK_UNIFIED_IDEOGRAPH-5F0C>;...;<CJK_UNIFIED_IDEOGRAPH-5F0E>;/
<CJK_UNIFIED_IDEOGRAPH-5F10>;/
<CJK_UNIFIED_IDEOGRAPH-62FE>;/
<CJK_UNIFIED_IDEOGRAPH-634C>;/
<CJK_UNIFIED_IDEOGRAPH-67D2>;/
<CJK_UNIFIED_IDEOGRAPH-6F06>;/
<CJK_UNIFIED_IDEOGRAPH-7396>;/
<CJK_UNIFIED_IDEOGRAPH-767E>;/
<CJK_UNIFIED_IDEOGRAPH-8086>;/
<CJK_UNIFIED_IDEOGRAPH-842C>;/
<CJK_UNIFIED_IDEOGRAPH-8CAE>;/
<CJK_UNIFIED_IDEOGRAPH-8CB3>;/
<CJK_UNIFIED_IDEOGRAPH-8D30>;/
<CJK_UNIFIED_IDEOGRAPH-9621>;/
<CJK_UNIFIED_IDEOGRAPH-9646>;/
<CJK_UNIFIED_IDEOGRAPH-964C>;/
<CJK_UNIFIED_IDEOGRAPH-9678>;/
<CJK_UNIFIED_IDEOGRAPH-96F6>
number <CJK_COMPATIBILITY_IDEOGRAPH-F96B>;/
<CJK_COMPATIBILITY_IDEOGRAPH-F973>;/
<CJK_COMPATIBILITY_IDEOGRAPH-F978>;/
<CJK_COMPATIBILITY_IDEOGRAPH-F9B2>;/
<CJK_COMPATIBILITY_IDEOGRAPH-F9D1>;/
<CJK_COMPATIBILITY_IDEOGRAPH-F9D3>;/
<CJK_COMPATIBILITY_IDEOGRAPH-F9FD>
digit <FULLWIDTH_DIGIT_ZERO>;...;<FULLWIDTH_DIGIT_NINE>
**********************************************************************
* 0x02B0 - 0x02FF Spacing Modification Letters
**********************************************************************
graph <MODIFIER_LETTER_SMALL_H>;...;<MODIFIER_LETTER_LOW_LEFT_ARROW>
punct <MODIFIER_LETTER_PRIME>;...;<MODIFIER_LETTER_LEFT_HALF_RING>;/
<MODIFIER_LETTER_LEFT_ARROWHEAD>;...;<MODIFIER_LETTER_CROSS_ACCENT>;/
<MODIFIER_LETTER_EXTRA-HIGH_TONE_BAR>;...;/
<MODIFIER_LETTER_LOW_LEFT_ARROW>
lower <MODIFIER_LETTER_SMALL_H>;...;<MODIFIER_LETTER_SMALL_Y>;/
<MODIFIER_LETTER_GLOTTAL_STOP>;/
<MODIFIER_LETTER_REVERSED+GLOTTAL_STOP>;/
<MODIFIER_LETTER_SMALL_GAMMA>;...;/
<MODIFIER_LETTER_SMALL_REVERSED_GLOTTAL_STOP>
**********************************************************************
* 0x0300 - 0x036F Combining Diacritical Marks
**********************************************************************
graph <COMBINING_GRAVE_ACCENT>;...;<COMBINING_LATIN_SMALL_LETTER_X>
**********************************************************************
* 0x0300 - 0x0370 Coptic (Automatic section skips it)
**********************************************************************
graph <COPTIC_CAPITAL_LETTER_SHEI>;/
<COPTIC_CAPITAL_LETTER_FEI>;/
<COPTIC_CAPITAL_LETTER_KHEI>;/
<COPTIC_CAPITAL_LETTER_HORI>;/
<COPTIC_CAPITAL_LETTER_GANGIA>;/
<COPTIC_CAPITAL_LETTER_SHIMA>;/
<COPTIC_CAPITAL_LETTER_DEI>;/
<COPTIC_SMALL_LETTER_SHEI>;/
<COPTIC_SMALL_LETTER_FEI>;/
<COPTIC_SMALL_LETTER_KHEI>;/
<COPTIC_SMALL_LETTER_HORI>;/
<COPTIC_SMALL_LETTER_GANGIA>;/
<COPTIC_SMALL_LETTER_SHIMA>;/
<COPTIC_SMALL_LETTER_DEI>
upper <COPTIC_CAPITAL_LETTER_SHEI>;/
<COPTIC_CAPITAL_LETTER_FEI>;/
<COPTIC_CAPITAL_LETTER_KHEI>;/
<COPTIC_CAPITAL_LETTER_HORI>;/
<COPTIC_CAPITAL_LETTER_GANGIA>;/
<COPTIC_CAPITAL_LETTER_SHIMA>;/
<COPTIC_CAPITAL_LETTER_DEI>
lower <COPTIC_SMALL_LETTER_SHEI>;/
<COPTIC_SMALL_LETTER_FEI>;/
<COPTIC_SMALL_LETTER_KHEI>;/
<COPTIC_SMALL_LETTER_HORI>;/
<COPTIC_SMALL_LETTER_GANGIA>;/
<COPTIC_SMALL_LETTER_SHIMA>;/
<COPTIC_SMALL_LETTER_DEI>
toupper (<COPTIC_SMALL_LETTER_SHEI>,<COPTIC_CAPITAL_LETTER_SHEI>);/
(<COPTIC_SMALL_LETTER_FEI>,<COPTIC_CAPITAL_LETTER_FEI>);/
(<COPTIC_SMALL_LETTER_KHEI>,<COPTIC_CAPITAL_LETTER_KHEI>);/
(<COPTIC_SMALL_LETTER_HORI>,<COPTIC_CAPITAL_LETTER_HORI>);/
(<COPTIC_SMALL_LETTER_GANGIA>,<COPTIC_CAPITAL_LETTER_GANGIA>);/
(<COPTIC_SMALL_LETTER_SHIMA>,<COPTIC_CAPITAL_LETTER_SHIMA>);/
(<COPTIC_SMALL_LETTER_DEI>,<COPTIC_CAPITAL_LETTER_DEI>)
tolower (<COPTIC_CAPITAL_LETTER_SHEI>,<COPTIC_SMALL_LETTER_SHEI>);/
(<COPTIC_CAPITAL_LETTER_FEI>,<COPTIC_SMALL_LETTER_FEI>);/
(<COPTIC_CAPITAL_LETTER_KHEI>,<COPTIC_SMALL_LETTER_KHEI>);/
(<COPTIC_CAPITAL_LETTER_HORI>,<COPTIC_SMALL_LETTER_HORI>);/
(<COPTIC_CAPITAL_LETTER_GANGIA>,<COPTIC_SMALL_LETTER_GANGIA>);/
(<COPTIC_CAPITAL_LETTER_SHIMA>,<COPTIC_SMALL_LETTER_SHIMA>);/
(<COPTIC_CAPITAL_LETTER_DEI>,<COPTIC_SMALL_LETTER_DEI>)
**********************************************************************
* 0x0700 - 0x074F Syriac
**********************************************************************
graph <SYRIAC_END_OF_PARAGRAPH>;...;<SYRIAC_LETTER_SOGDIAN_FE>
**********************************************************************
* 0x0780 - 0x07BF Thaana
**********************************************************************
graph <THAANA_LETTER_HAA>;...;<THAANA_LETTER_NAA>
**********************************************************************
* 0x07C0 - 0x07FF Nko
**********************************************************************
digit <NKO_DIGIT_ZERO>;...;<NKO_DIGIT_NINE>
graph <NKO_LETTER_A>;...;<NKO_LAJANYALAN>
**********************************************************************
* 0x0800 - 0x083F Samaritan
**********************************************************************
graph <SAMARITAN_LETTER_ALAF>;...;<SAMARITAN_PUNCTUATION_ANNAAU>
**********************************************************************
* 0x0840 - 0x085F Mandaic
**********************************************************************
graph <MANDAIC_LETTER_HALQA>;...;<MANDAIC_GEMINATION_MARK>;/
<MANDAIC_PUNCTUATION>
**********************************************************************
* 0x1400 - 0x167F Unified Canadian Aboriginal Syllabics
**********************************************************************
graph <CANADIAN_SYLLABICS_HYPHEN>;...;<CANADIAN_SYLLABICS_BLACKFOOT_W>
**********************************************************************
* 0x1680 - 0x169F Ogham
**********************************************************************
graph <OGHAM_SPACE_MARK>;...;<OGHAM_REVERSED_FEATHER_MARK>
**********************************************************************
* 0x16A0 - 0x16FF Runic
**********************************************************************
graph <RUNIC_LETTER_FEHU_FEOH_FE_F>;...;<RUNIC_LETTER_FRANKS_CASKET_AESC>
number <RUNIC_ARLAUG_SYMBOL>;...;<RUNIC_BELGTHOR_SYMBOL>
**********************************************************************
* 0x1700 - 0x171F Tagalog
**********************************************************************
graph <TAGALOG_LETTER_A>;...;<TAGALOG_SIGN_VIRAMA>
**********************************************************************
* 0x1720 - 0x173F Hanunoo
**********************************************************************
graph <HANUNOO_LETTER_A>;...;<PHILIPPINE_DOUBLE_PUNCTUATION>
***********************************************************************
* 0x1740 - 0x175F Buhid
**********************************************************************
graph <BUHID_LETTER_A>;...;<BUHID_VOWEL_SIGN_U>
**********************************************************************
* 0x1760 - 0x177F Tagbanwa
**********************************************************************
graph <TAGBANWA_LETTER_A>;...;<TAGBANWA_VOWEL_SIGN_U>
**********************************************************************
* 0x1800 - 0x18AF Mongolian
**********************************************************************
graph <MONGOLIAN_BIRGA>;...;<MONGOLIAN_VOWEL_SEPARATOR>;/
<MONGOLIAN_LETTER_A>;...;<MONGOLIAN_LETTER_MANCHU_ZHA>;/
<MONGOLIAN_LETTER_ALI_GALI_ANUSVARA_ONE>;...;/
<MONGOLIAN_LETTER_MANCHU_ALI_GALI_LHA>
digit <MONGOLIAN_DIGIT_ZERO>;...;<MONGOLIAN_DIGIT_NINE>
**********************************************************************
* 0x18B0 - 0x18FF Unified CA Aboriginal Syllabics Extended
**********************************************************************
graph <CANADIAN_SYLLABICS_OY>;...;<CANADIAN_SYLLABICS_CARRIER_DENTAL_S>
**********************************************************************
* 0x1900 - 0x194F Limbu
**********************************************************************
graph <LIMBU_VOWEL-CARRIER_LETTER>;...;<LIMBU_LETTER_TRA>;/
<LIMBU_VOWEL_SIGN_A>;...;<LIMBU_SUBJOINED_LETTER_WA>;/
<LIMBU_SMALL_LETTER_KA>;...;<LIMBU_SIGN_SA-I>;/
<LIMBU_SIGN_LOO>;/
<LIMBU_EXCLAMATION_MARK>;/
<LIMBU_QUESTION_MARK>
digit <LIMBU_DIGIT_ZERO>;...;<LIMBU_DIGIT_NINE>
**********************************************************************
* 0x1950 - 0x197F Tai Le
**********************************************************************
graph <TAI_LE_LETTER_KA>;...;<TAI_LE_LETTER_AI>;/
<TAI_LE_LETTER_TONE-2>;...;<TAI_LE_LETTER_TONE-6>
**********************************************************************
* 0x1980 - 0x19DF New Tai Le
**********************************************************************
graph <NEW_TAI_LUE_LETTER_HIGH_QA>;...;<NEW_TAI_LUE_LETTER_LOW_SUA>;/
<NEW_TAI_LUE_VOWEL_SIGN_VOWEL_SHORTENER>;...;/
<NEW_TAI_LUE_TONE_MARK-2>;/
<NEW_TAI_LUE_SIGN_LAE>;/
<NEW_TAI_LUE_SIGN_LAEV>
digit <NEW_TAI_LUE_DIGIT_ZERO>;...;<NEW_TAI_LUE_THAM_DIGIT_ONE>
**********************************************************************
* 0x1A00 - 0x1A1F Buginese
**********************************************************************
graph <BUGINESE_LETTER_KA>;...;<BUGINESE_VOWEL_SIGN_AE>;/
<BUGINESE_PALLAWA>;/
<BUGINESE_END_OF_SECTION>
**********************************************************************
* 0x1A20 - 0x1AAF Tai Tham
**********************************************************************
graph <TAI_THAM_LETTER_HIGH_KA>;...;<TAI_THAM_CONSONANT_SIGN_SA>;/
<TAI_THAM_SIGN_SAKOT>;...;<TAI_THAM_SIGN_KHUEN-LUE_KARAN>;/
<TAI_THAM_COMBINING_CRYPTOGRAMMIC_DOT>;/
<TAI_THAM_SIGN_WIANG>;...;<TAI_THAM_SIGN_CAANG>
digit <TAI_THAM_HORA_DIGIT_ZERO>;...;<TAI_THAM_HORA_DIGIT_NINE>;/
<TAI_THAM_THAM_DIGIT_ZERO>;...;<TAI_THAM_THAM_DIGIT_NINE>
**********************************************************************
* 0x1AB0 - 0x1AFF Combining Diacritical Marks Extended
**********************************************************************
graph <COMBINING_DOUBLED_CIRCUMFLEX_ACCENT>;...;<COMBINING_PARENTHESES_OVERLAY>
**********************************************************************
* 0x1B00 - 0x1B7F Balinese
**********************************************************************
graph <BALINESE_SIGN_ULU_RICEM>;...;<BALINESE_LETTER_ASYURA_SASAK>;/
<BALINESE_PANTI>;...;<BALINESE_MUSICAL_SYMBOL_LEFT-HAND_OPEN_PING>
digit <BALINESE_DIGIT_ZERO>;...;<BALINESE_DIGIT_NINE>
**********************************************************************
* 0x1B80 - 0x1BBF Sundanese
**********************************************************************
graph <SUNDANESE_SIGN_PANYECEK>;...;<SUNDANESE_LETTER_FINAL_M>
digit <SUNDANESE_DIGIT_ZERO>;...;<SUNDANESE_DIGIT_NINE>
**********************************************************************
* 0x1BC0 - 0x1BFF Batak
**********************************************************************
graph <BATAK_LETTER_A>;...;<BATAK_PANONGONAN>;/
<BATAK_SYMBOL_BINDU_NA_METEK>;...;<BATAK_SYMBOL_BINDU_PANGOLAT>
**********************************************************************
* 0x1C00 - 0x1C4F Lepcha
**********************************************************************
graph <LEPCHA_LETTER_KA>;...;<LEPCHA_SIGN_NUKTA>;/
<LEPCHA_PUNCTUATION_TA-ROL>;...;<LEPCHA_PUNCTUATION_TSHOOK>;/
<LEPCHA_LETTER_TTA>;...;<LEPCHA_LETTER_DDA>
digit <LEPCHA_DIGIT_ZERO>;...;<LEPCHA_DIGIT_NINE>
**********************************************************************
* 0x1C50 - 0x1C7F Ol Chiki
**********************************************************************
graph <OL_CHIKI_LETTER_LA>;...;<OL_CHIKI_PUNCTUATION_DOUBLE_MUCAAD>
digit <OL_CHIKI_DIGIT_ZERO>;...;<OL_CHIKI_DIGIT_NINE>
**********************************************************************
* 0x1CC0 - 0x1CCF Sundanese Supplement
**********************************************************************
graph <SUNDANESE_PUNCTUATION_BINDU_SURYA>;...;/
<SUNDANESE_PUNCTUATION_BINDU_BA_SATANGA>
**********************************************************************
* 0x1CD0 - 0x1CFF Vedic Extensions
**********************************************************************
graph <VEDIC_TONE_KARSHANA>;...;<VEDIC_TONE_DOUBLE_RING_ABOVE>
**********************************************************************
* 0x1DC0 - 0x1DFF Combining Diacritical Marks Supplement
**********************************************************************
graph <COMBINING_DOTTED_GRAVE_ACCENT>;...;<COMBINING_UP_TACK_ABOVE>;/
<COMBINING_DOUBLE_INVERTED_BREVE_BELOW>;...;/
<COMBINING_RIGHT_ARROWHEAD_AND_DOWN_ARROWHEAD_BELOW>
**********************************************************************
* 0x2000 - 0x206F General Punctuation
**********************************************************************
space <EN_QUAD>;...;<RIGHT-TO-LEFT_MARK>;/
<LINE_SEPARATOR>;...;<NARROW_NO-BREAK_SPACE>
punct <HYPHEN>;...;<HYPHENATION_POINT>;/
<PER_MILLE_SIGN>;...;<VERTICAL_FOUR_DOTS>
**********************************************************************
* 0x2070 - 0x209F Superscripts and Subscripts
**********************************************************************
graph <SUPERSCRIPT_ZERO>;...;<LATIN_SUBSCRIPT_SMALL_LETTER_T>
digit <SUPERSCRIPT_ZERO>
digit <SUPERSCRIPT_FOUR>;...;<SUPERSCRIPT_NINE>
digit <SUBSCRIPT_ZERO>;...;<SUBSCRIPT_NINE>
punct <SUPERSCRIPT_MINUS>;...;<SUPERSCRIPT_RIGHT_PARENTHESIS>
punct <SUBSCRIPT_PLUS_SIGN>;...;<SUBSCRIPT_RIGHT_PARENTHESIS>
lower <SUPERSCRIPT_LATIN_SMALL_LETTER_I>;/
<SUPERSCRIPT_LATIN_SMALL_LETTER_N>;/
<LATIN_SUBSCRIPT_SMALL_LETTER_A>;...;<LATIN_SUBSCRIPT_SMALL_LETTER_T>
**********************************************************************
* 0x20A0 - 0x20CF Currency Symbols
**********************************************************************
punct <EURO-CURRENCY_SIGN>;...;<RUBLE_SIGN>
**********************************************************************
* 0x20D0 - 0x20FF Combining Diacritical Marks for Symbols
**********************************************************************
graph <COMBINING_LEFT_HARPOON_ABOVE>;...;<COMBINING_ASTERISK_ABOVE>
**********************************************************************
* 0x2100 - 0x214F Letterlike Symbols
**********************************************************************
graph <ACCOUNT_OF>;...;<SYMBOL_FOR_SAMARITAN_SOURCE>
punct <ACCOUNT_OF>;/
<ADDRESSED_TO_THE_SUBJECT>;/
<DEGREE_CELSIUS>;...;<CADA_UNA>;/
<SCRUPLE>;/
<DEGREE_FAHRENHEIT>;/
<L_B_BAR_SYMBOL>;/
<NUMERO_SIGN>;...;<SCRIPT_CAPITAL_P>;/
<PRESCRIPTION_TAKE>;...;<VERSICLE>;/
<OUNCE_SIGN>;/
<INVERTED_OHM_SIGN>;/
<TURNED_GREEK_SMALL_LETTER_IOTA>;/
<ESTIMATED_SYMBOL>;/
<ROTATED_CAPITAL_Q>;/
<DOUBLE-STRUCK_N-ARY_SUMMATION>;...;<TURNED_SANS-SERIF_CAPITAL_Y>;/
<PROPERTY_LINE>;...;<AKTIESELSKAB>;/
<SYMBOL_FOR_SAMARITAN_SOURCE>
upper <KELVIN_SIGN>;<ANGSTROM_SIGN>;<TURNED_CAPITAL_F>
lower <TURNED_SMALL_F>
alpha <DOUBLE-STRUCK_CAPITAL_C>;/
<EULER_CONSTANT>;/
<SCRIPT_SMALL_G>;...;<SCRIPT_SMALL_L>;/
<DOUBLE-STRUCK_CAPITAL_N>;/
<DOUBLE-STRUCK_CAPITAL_P>;...;<DOUBLE-STRUCK_CAPITAL_R>;/
<DOUBLE-STRUCK_CAPITAL_Z>;/
<OHM_SIGN>;/
<BLACK-LETTER_CAPITAL_Z>;/
<KELVIN_SIGN>;...;<BLACK-LETTER_CAPITAL_C>;/
<SCRIPT_SMALL_E>;...;<SCRIPT_SMALL_O>;/
<INFORMATION_SOURCE>;/
<DOUBLE-STRUCK_SMALL_PI>;...;<DOUBLE-STRUCK_CAPITAL_PI>;/
<DOUBLE-STRUCK_ITALIC_CAPITAL_D>;...;<DOUBLE-STRUCK_ITALIC_SMALL_J>
tolower (<KELVIN_SIGN>,<k>);/
(<ANGSTROM_SIGN>,<LATIN_SMALL_LETTER_A_WITH_RING_ABOVE>);/
(<TURNED_CAPITAL_F>,<TURNED_SMALL_F>)
toupper (<TURNED_SMALL_F>,<TURNED_CAPITAL_F>)
**********************************************************************
* 0x2150 - 0x218F Number Forms (differential)
**********************************************************************
number <VULGAR_FRACTION_ONE_SEVENTH>;...;<FRACTION_NUMERATOR_ONE>;/
<VULGAR_FRACTION_ZERO_THIRDS>
**********************************************************************
* 0x2190 - 0x21FF Arrows
**********************************************************************
punct <LEFTWARDS_ARROW>;...;<LEFT_RIGHT_OPEN-HEADED_ARROW>
**********************************************************************
* 0x2200 - 0x22FF Mathematical Operators
**********************************************************************
punct <FOR_ALL>;...;<Z_NOTATION_BAG_MEMBERSHIP>
**********************************************************************
* 0x2300 - 0x23FF Miscellaneous Technical
**********************************************************************
punct <DIAMETER_SIGN>;...;<BLACK_CIRCLE_FOR_RECORD>
**********************************************************************
* 0x2400 - 0x243F Control Pictures
**********************************************************************
punct <SYMBOL_FOR_NULL>;...;<SYMBOL_FOR_SUBSTITUTE_FORM_TWO>
**********************************************************************
* 0x2440 - 0x245F Optical Character Recognition
**********************************************************************
punct <OCR_HOOK>;...;<OCR_DOUBLE_BACKSLASH>
**********************************************************************
* 0x2460 - 0x24FF Enclosed Alphanumerics
**********************************************************************
graph <CIRCLED_DIGIT_ONE>;...;<NEGATIVE_CIRCLED_DIGIT_ZERO>
digit <CIRCLED_DIGIT_ONE>;...;<CIRCLED_DIGIT_NINE>
digit <PARENTHESIZED_DIGIT_ONE>;...;<PARENTHESIZED_DIGIT_NINE>
digit <DIGIT_ONE_FULL_STOP>;...;<DIGIT_NINE_FULL_STOP>
digit <CIRCLED_DIGIT_ZERO>
digit <DOUBLE_CIRCLED_DIGIT_ONE>;...;<DOUBLE_CIRCLED_DIGIT_NINE>
digit <NEGATIVE_CIRCLED_DIGIT_ZERO>
xdigit <CIRCLED_LATIN_CAPITAL_LETTER_A>;...;<CIRCLED_LATIN_CAPITAL_LETTER_F>
xdigit <CIRCLED_LATIN_SMALL_LETTER_A>;...;<CIRCLED_LATIN_SMALL_LETTER_F>
number <CIRCLED_NUMBER_TEN>;...;<CIRCLED_NUMBER_TWENTY>
number <PARENTHESIZED_NUMBER_TEN>;...;<PARENTHESIZED_NUMBER_TWENTY>
number <NUMBER_TEN_FULL_STOP>;...;<NUMBER_TWENTY_FULL_STOP>
number <NEGATIVE_CIRCLED_NUMBER_ELEVEN>;...;<NEGATIVE_CIRCLED_NUMBER_TWENTY>
number <DOUBLE_CIRCLED_NUMBER_TEN>
lower <PARENTHESIZED_LATIN_SMALL_LETTER_A>;...;<PARENTHESIZED_LATIN_SMALL_LETTER_Z>
upper <CIRCLED_LATIN_CAPITAL_LETTER_A>;...;<CIRCLED_LATIN_CAPITAL_LETTER_Z>
lower <CIRCLED_LATIN_SMALL_LETTER_A>;...;<CIRCLED_LATIN_SMALL_LETTER_Z>
toupper (<CIRCLED_LATIN_SMALL_LETTER_A>,<CIRCLED_LATIN_CAPITAL_LETTER_A>);/
(<CIRCLED_LATIN_SMALL_LETTER_B>,<CIRCLED_LATIN_CAPITAL_LETTER_B>);/
(<CIRCLED_LATIN_SMALL_LETTER_C>,<CIRCLED_LATIN_CAPITAL_LETTER_C>);/
(<CIRCLED_LATIN_SMALL_LETTER_D>,<CIRCLED_LATIN_CAPITAL_LETTER_D>);/
(<CIRCLED_LATIN_SMALL_LETTER_E>,<CIRCLED_LATIN_CAPITAL_LETTER_E>);/
(<CIRCLED_LATIN_SMALL_LETTER_F>,<CIRCLED_LATIN_CAPITAL_LETTER_F>);/
(<CIRCLED_LATIN_SMALL_LETTER_G>,<CIRCLED_LATIN_CAPITAL_LETTER_G>);/
(<CIRCLED_LATIN_SMALL_LETTER_H>,<CIRCLED_LATIN_CAPITAL_LETTER_H>);/
(<CIRCLED_LATIN_SMALL_LETTER_I>,<CIRCLED_LATIN_CAPITAL_LETTER_I>);/
(<CIRCLED_LATIN_SMALL_LETTER_J>,<CIRCLED_LATIN_CAPITAL_LETTER_J>);/
(<CIRCLED_LATIN_SMALL_LETTER_K>,<CIRCLED_LATIN_CAPITAL_LETTER_K>);/
(<CIRCLED_LATIN_SMALL_LETTER_L>,<CIRCLED_LATIN_CAPITAL_LETTER_L>);/
(<CIRCLED_LATIN_SMALL_LETTER_M>,<CIRCLED_LATIN_CAPITAL_LETTER_M>);/
(<CIRCLED_LATIN_SMALL_LETTER_N>,<CIRCLED_LATIN_CAPITAL_LETTER_N>);/
(<CIRCLED_LATIN_SMALL_LETTER_O>,<CIRCLED_LATIN_CAPITAL_LETTER_O>);/
(<CIRCLED_LATIN_SMALL_LETTER_P>,<CIRCLED_LATIN_CAPITAL_LETTER_P>);/
(<CIRCLED_LATIN_SMALL_LETTER_Q>,<CIRCLED_LATIN_CAPITAL_LETTER_Q>);/
(<CIRCLED_LATIN_SMALL_LETTER_R>,<CIRCLED_LATIN_CAPITAL_LETTER_R>);/
(<CIRCLED_LATIN_SMALL_LETTER_S>,<CIRCLED_LATIN_CAPITAL_LETTER_S>);/
(<CIRCLED_LATIN_SMALL_LETTER_T>,<CIRCLED_LATIN_CAPITAL_LETTER_T>);/
(<CIRCLED_LATIN_SMALL_LETTER_U>,<CIRCLED_LATIN_CAPITAL_LETTER_U>);/
(<CIRCLED_LATIN_SMALL_LETTER_V>,<CIRCLED_LATIN_CAPITAL_LETTER_V>);/
(<CIRCLED_LATIN_SMALL_LETTER_W>,<CIRCLED_LATIN_CAPITAL_LETTER_W>);/
(<CIRCLED_LATIN_SMALL_LETTER_X>,<CIRCLED_LATIN_CAPITAL_LETTER_X>);/
(<CIRCLED_LATIN_SMALL_LETTER_Y>,<CIRCLED_LATIN_CAPITAL_LETTER_Y>);/
(<CIRCLED_LATIN_SMALL_LETTER_Z>,<CIRCLED_LATIN_CAPITAL_LETTER_Z>)
tolower (<CIRCLED_LATIN_CAPITAL_LETTER_A>,<CIRCLED_LATIN_SMALL_LETTER_A>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_B>,<CIRCLED_LATIN_SMALL_LETTER_B>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_C>,<CIRCLED_LATIN_SMALL_LETTER_C>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_D>,<CIRCLED_LATIN_SMALL_LETTER_D>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_E>,<CIRCLED_LATIN_SMALL_LETTER_E>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_F>,<CIRCLED_LATIN_SMALL_LETTER_F>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_G>,<CIRCLED_LATIN_SMALL_LETTER_G>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_H>,<CIRCLED_LATIN_SMALL_LETTER_H>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_I>,<CIRCLED_LATIN_SMALL_LETTER_I>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_J>,<CIRCLED_LATIN_SMALL_LETTER_J>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_K>,<CIRCLED_LATIN_SMALL_LETTER_K>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_L>,<CIRCLED_LATIN_SMALL_LETTER_L>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_M>,<CIRCLED_LATIN_SMALL_LETTER_M>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_N>,<CIRCLED_LATIN_SMALL_LETTER_N>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_O>,<CIRCLED_LATIN_SMALL_LETTER_O>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_P>,<CIRCLED_LATIN_SMALL_LETTER_P>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_Q>,<CIRCLED_LATIN_SMALL_LETTER_Q>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_R>,<CIRCLED_LATIN_SMALL_LETTER_R>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_S>,<CIRCLED_LATIN_SMALL_LETTER_S>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_T>,<CIRCLED_LATIN_SMALL_LETTER_T>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_U>,<CIRCLED_LATIN_SMALL_LETTER_U>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_V>,<CIRCLED_LATIN_SMALL_LETTER_V>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_W>,<CIRCLED_LATIN_SMALL_LETTER_W>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_X>,<CIRCLED_LATIN_SMALL_LETTER_X>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_Y>,<CIRCLED_LATIN_SMALL_LETTER_Y>);/
(<CIRCLED_LATIN_CAPITAL_LETTER_Z>,<CIRCLED_LATIN_SMALL_LETTER_Z>)
**********************************************************************
* 0x2500 - 0x257F Box Drawing
**********************************************************************
punct <BOX_DRAWINGS_LIGHT_HORIZONTAL>;...;<BOX_DRAWINGS_HEAVY_UP_AND_LIGHT_DOWN>
**********************************************************************
* 0x2580 - 0x259F Block Elements
**********************************************************************
punct <UPPER_HALF_BLOCK>;...;<QUADRANT_UPPER_RIGHT_AND_LOWER_LEFT_AND_LOWER_RIGHT>
**********************************************************************
* 0x25A0 - 0x25FF Geometric Shapes
**********************************************************************
punct <BLACK_SQUARE>;...;<LOWER_RIGHT_TRIANGLE>
**********************************************************************
* 0x2600 - 0x26FF Miscellaneous symbols
**********************************************************************
punct <BLACK_SUN_WITH_RAYS>;...;<WHITE_FLAG_WITH_HORIZONTAL_MIDDLE_BLACK_STRIPE>
**********************************************************************
* 0x2700 - 0x27BF Dingbats
**********************************************************************
graph <BLACK_SAFETY_SCISSORS>;...;<DOUBLE_CURLY_LOOP>
digit <DINGBAT_NEGATIVE_CIRCLED_DIGIT_ONE>;...;<DINGBAT_NEGATIVE_CIRCLED_DIGIT_NINE>
number <DINGBAT_NEGATIVE_CIRCLED_NUMBER_TEN>
digit <DINGBAT_CIRCLED_SANS-SERIF_DIGIT_ONE>;...;<DINGBAT_CIRCLED_SANS-SERIF_DIGIT_NINE>
number <DINGBAT_CIRCLED_SANS-SERIF_NUMBER_TEN>
digit <DINGBAT_NEGATIVE_CIRCLED_SANS-SERIF_DIGIT_ONE>;...;<DINGBAT_NEGATIVE_CIRCLED_SANS-SERIF_DIGIT_NINE>
number <DINGBAT_NEGATIVE_CIRCLED_SANS-SERIF_NUMBER_TEN>
punct <BLACK_SAFETY_SCISSORS>;...;<MEDIUM_RIGHT_CURLY_BRACKET_ORNAMENT>;/
<HEAVY_WIDE-HEADED_RIGHTWARDS_ARROW>;...;<DOUBLE_CURLY_LOOP>
**********************************************************************
* 0x27C0 - 0x27EF Miscellaneous Mathematical Symbols-A
**********************************************************************
punct <THREE_DIMENSIONAL_ANGLE>;...;<MATHEMATICAL_RIGHT_FLATTENED_PARENTHESIS>
**********************************************************************
* 0x27F0 - 0x27FF Supplemental Arrows-A
**********************************************************************
punct <UPWARDS_QUADRUPLE_ARROW>;...;<LONG_RIGHTWARDS_SQUIGGLE_ARROW>
**********************************************************************
* 0x2800 - 0x28FF Braille Patterns
**********************************************************************
punct <BRAILLE_PATTERN_BLANK>;...;<BRAILLE_PATTERN_DOTS-12345678>
**********************************************************************
* 0x2900 - 0x297F Supplemental Arrows-B
**********************************************************************
punct <RIGHTWARDS_TWO-HEADED_ARROW_WITH_VERTICAL_STROKE>;...;<DOWN_FISH_TAIL>
**********************************************************************
* 0x2980 - 0x29FF Miscellaneouis Mathematical Symbols-B
**********************************************************************
punct <TRIPLE_VERTICAL_BAR_DELIMITER>;...;<MINY>
**********************************************************************
* 0x2A00 - 0x2AFF Supplemental Mathematical Operators
**********************************************************************
punct <N-ARY_CIRCLED_DOT_OPERATOR>;...;<N-ARY_WHITE_VERTICAL_BAR>
**********************************************************************
* 0x2B00 - 0x2BFF Miscellaneous Symbols and Arrows
**********************************************************************
graph <NORTH_EAST_WHITE_ARROW>;...;<DOWNWARDS_TRIANGLE-HEADED_ARROW_TO_BAR>;/
<NORTH_WEST_TRIANGLE-HEADED_ARROW_TO_BAR>;...;/
<RIGHTWARDS_BLACK_ARROW>;/
<THREE-D_TOP-LIGHTED_LEFTWARDS_EQUILATERAL_ARROWHEAD>;...;/
<UP_ARROWHEAD_IN_A_RECTANGLE_BOX>;/
<BALLOT_BOX_WITH_LIGHT_X>;...;<UNCERTAINTY_SIGN>
punct <NORTH_EAST_WHITE_ARROW>;...;<DOWNWARDS_TRIANGLE-HEADED_ARROW_TO_BAR>;/
<NORTH_WEST_TRIANGLE-HEADED_ARROW_TO_BAR>;...;/
<RIGHTWARDS_BLACK_ARROW>;/
<THREE-D_TOP-LIGHTED_LEFTWARDS_EQUILATERAL_ARROWHEAD>;...;/
<UP_ARROWHEAD_IN_A_RECTANGLE_BOX>;/
<BALLOT_BOX_WITH_LIGHT_X>;...;<UNCERTAINTY_SIGN>
**********************************************************************
* 0x2C00 - 0x2C5F Glagolitic
**********************************************************************
graph <GLAGOLITIC_CAPITAL_LETTER_AZU>;...;/
<GLAGOLITIC_CAPITAL_LETTER_LATINATE_MYSLITE>;/
<GLAGOLITIC_SMALL_LETTER_AZU>;...;/
<GLAGOLITIC_SMALL_LETTER_LATINATE_MYSLITE>
**********************************************************************
* 0x2C80 - 0x2CFF Coptic
**********************************************************************
graph <COPTIC_CAPITAL_LETTER_ALFA>;...;<COPTIC_SMALL_LETTER_BOHAIRIC_KHEI>;/
<COPTIC_OLD_NUBIAN_FULL_STOP>;...;<COPTIC_MORPHOLOGICAL_DIVIDER>
number <COPTIC_FRACTION_ONE_HALF>
**********************************************************************
* 0x2E00 - 0x2E7F Supplemental Punctuation
**********************************************************************
punct <RIGHT_ANGLE_SUBSTITUTION_MARKER>;...;<DOUBLE_LOW-REVERSED-9_QUOTATION_MARK>
**********************************************************************
* 0x2E80 - 0x2EFF CJK Radicals Supplement
**********************************************************************
punct <CJK_RADICAL_REPEAT>;...;<CJK_RADICAL_C-SIMPLIFIED_TURTLE>
**********************************************************************
* 0x2F00 - 0x2FDF Kangxi Radicals
**********************************************************************
punct <KANGXI_RADICAL_ONE>;...;<KANGXI_RADICAL_FLUTE>
**********************************************************************
* 0x2FF0 - 0x2FFF Ideographic Description Characters
**********************************************************************
punct <IDEOGRAPHIC_DESCRIPTION_CHARACTER_LEFT_TO_RIGHT>;...;/
<IDEOGRAPHIC_DESCRIPTION_CHARACTER_OVERLAID>
**********************************************************************
* 0x3000 - 0x30FF CJK Symbols and Punctuation
**********************************************************************
space <IDEOGRAPHIC_SPACE>
graph <IDEOGRAPHIC_COMMA>;...;<IDEOGRAPHIC_HALF_FILL_SPACE>
number <IDEOGRAPHIC_NUMBER_ZERO>;/
<HANGZHOU_NUMERAL_ONE>;...;<HANGZHOU_NUMERAL_NINE>;/
<HANGZHOU_NUMERAL_TEN>;...;<HANGZHOU_NUMERAL_THIRTY>
alpha <IDEOGRAPHIC_ITERATION_MARK>;/
<IDEOGRAPHIC_CLOSING_MARK>;/
<VERTICAL_IDEOGRAPHIC_ITERATION_MARK>
punct <IDEOGRAPHIC_COMMA>;...;<JAPANESE_INDUSTRIAL_STANDARD_SYMBOL>;/
<LEFT_ANGLE_BRACKET>;...;<POSTAL_MARK_FACE>;/
<IDEOGRAPHIC_LEVEL_TONE_MARK>;...;/
<IDEOGRAPHIC_TELEGRAPH_LINE_FEED_SEPARATOR_SYMBOL>;/
<MASU_MARK>;...;<IDEOGRAPHIC_HALF_FILL_SPACE>
**********************************************************************
* 0x3100 - 0x312F Bopomofo
**********************************************************************
graph <BOPOMOFO_LETTER_B>;...;<BOPOMOFO_LETTER_IH>
**********************************************************************
* 0x3190 - 0x319F Kanbun
**********************************************************************
graph <IDEOGRAPHIC_ANNOTATION_LINKING_MARK>;...;/
<IDEOGRAPHIC_ANNOTATION_MAN_MARK>
number <IDEOGRAPHIC_ANNOTATION_ONE_MARK>;...;/
<IDEOGRAPHIC_ANNOTATION_FOUR_MARK>
punct <IDEOGRAPHIC_ANNOTATION_LINKING_MARK>;/
<IDEOGRAPHIC_ANNOTATION_REVERSE_MARK>;/
<IDEOGRAPHIC_ANNOTATION_MIDDLE_MARK>;...;/
<IDEOGRAPHIC_ANNOTATION_MAN_MARK>
**********************************************************************
* 0x31A0 - 0x31BF : Bopomofo Extended
**********************************************************************
graph <BOPOMOFO_LETTER_BU>;...;<BOPOMOFO_LETTER_ZY>
**********************************************************************
* 0x31C0 - 0x31EF : CJK Strokes
**********************************************************************
graph <CJK_STROKE_T>;...;<CJK_STROKE_Q>
**********************************************************************
* 0x4DC0 - 0x4DFF Yijing Hexagram Symbols
**********************************************************************
graph <HEXAGRAM_FOR_THE_CREATIVE_HEAVEN>;...;<HEXAGRAM_FOR_BEFORE_COMPLETION>
**********************************************************************
* 0xA4D0 - 0xA4FF Lisu
**********************************************************************
graph <LISU_LETTER_BA>;...;<LISU_PUNCTUATION_FULL_STOP>
**********************************************************************
* 0xA6A0 - 0xA6FF Bamum
**********************************************************************
graph <BAMUM_LETTER_A>;...;<BAMUM_QUESTION_MARK>
number <BAMUM_LETTER_MO>;...;<BAMUM_LETTER_KOGHOM>
**********************************************************************
* 0xA700 - 0xA71F Modifier Tone Letters
**********************************************************************
graph <MODIFIER_LETTER_CHINESE_TONE_YIN_PING>;...;/
<MODIFIER_LETTER_LOW_INVERTED_EXCLAMATION_MARK>
**********************************************************************
* 0xA800 - 0xA82F Syloti Nagri
**********************************************************************
graph <SYLOTI_NAGRI_LETTER_A>;...;<SYLOTI_NAGRI_POETRY_MARK-4>
**********************************************************************
* 0xA830 - 0x083F Common Indic Number Forms
**********************************************************************
number <NORTH_INDIC_FRACTION_ONE_QUARTER>;...;<NORTH_INDIC_FRACTION_THREE_SIXTEENTHS>
graph <NORTH_INDIC_QUARTER_MARK>;...;<NORTH_INDIC_QUANTITY_MARK>
**********************************************************************
* 0xA840 - 0xA87F Phags-pa
**********************************************************************
graph <PHAGS-PA_LETTER_KA>;...;<PHAGS-PA_MARK_DOUBLE_SHAD>
**********************************************************************
* 0xA880 - 0xA8DF Saurashra
**********************************************************************
graph <SAURASHTRA_SIGN_ANUSVARA>;...;<SAURASHTRA_SIGN_VIRAMA>;/
<SAURASHTRA_DANDA>;/
<SAURASHTRA_DOUBLE_DANDA>
digit <SAURASHTRA_DIGIT_ZERO>;...;<SAURASHTRA_DIGIT_NINE>
**********************************************************************
* 0xA900 - 0xA92F Kayah Li
**********************************************************************
digit <KAYAH_LI_DIGIT_ZERO>;...;<KAYAH_LI_DIGIT_NINE>
graph <KAYAH_LI_LETTER_KA>;...;<KAYAH_LI_SIGN_SHYA>
**********************************************************************
* 0xA930 - 0xA95F Rejang
**********************************************************************
graph <REJANG_LETTER_KA>;...;<REJANG_VIRAMA>;/
<REJANG_SECTION_MARK>
**********************************************************************
* 0xA980 - 0xA9DF Javanese
**********************************************************************
graph <JAVANESE_SIGN_PANYANGGA>;...;<JAVANESE_TURNED_PADA_PISELEH>;/
<JAVANESE_PANGRANGKEP>;/
<JAVANESE_PADA_TIRTA_TUMETES>;/
<JAVANESE_PADA_ISEN-ISEN>
digit <JAVANESE_DIGIT_ZERO>;...;<JAVANESE_DIGIT_NINE>
**********************************************************************
* 0xAA00 - 0xAA5F Cham
**********************************************************************
graph <CHAM_LETTER_A>;...;<CHAM_CONSONANT_SIGN_WA>;/
<CHAM_LETTER_FINAL_K>;...;<CHAM_CONSONANT_SIGN_FINAL_H>;/
<CHAM_PUNCTUATION_SPIRAL>;...;<CHAM_PUNCTUATION_TRIPLE_DANDA>
digit <CHAM_DIGIT_ZERO>;...;<CHAM_DIGIT_NINE>
**********************************************************************
* 0xAA80 - 0xAADF Tal Viet
**********************************************************************
graph <TAI_VIET_LETTER_LOW_KO>;...;<TAI_VIET_TONE_MAI_SONG>;/
<TAI_VIET_SYMBOL_KON>;...;<TAI_VIET_SYMBOL_KOI_KOI>
**********************************************************************
* 0xAAE0 - 0xAAFF Meetei Mayek Extensions
**********************************************************************
graph <MEETEI_MAYEK_LETTER_E>;...;<MEETEI_MAYEK_VIRAMA>
**********************************************************************
* 0xABC0 - 0xABFF Meetei Mayek
**********************************************************************
graph <MEETEI_MAYEK_LETTER_KOK>;...;<MEETEI_MAYEK_APUN_IYEK>
digit <MEETEI_MAYEK_DIGIT_ZERO>;...;<MEETEI_MAYEK_DIGIT_NINE>
**********************************************************************
* 0xFB50 - 0xFDFF Arabic Presentation Forms (differential)
**********************************************************************
punct <ORNATE_LEFT_PARENTHESIS>;/
<ORNATE_RIGHT_PARENTHESIS>
**********************************************************************
* 0xFE10 - 0xFE1F Vertical Forms
**********************************************************************
graph <PRESENTATION_FORM_FOR_VERTICAL_COMMA>;...;/
<PRESENTATION_FORM_FOR_VERTICAL_HORIZONTAL_ELLIPSIS>
**********************************************************************
* 0xFE20 - 0xFE2F Combining Half Marks
**********************************************************************
graph <COMBINING_LIGATURE_LEFT_HALF>;...;<COMBINING_CONJOINING_MACRON_BELOW>
**********************************************************************
* 0xFE30 - 0xFE4F CJK Compatibility Forms
**********************************************************************
punct <PRESENTATION_FORM_FOR_VERTICAL_TWO_DOT_LEADER>;...;<WAVY_LOW_LINE>
**********************************************************************
* 0xFE50 - 0xFE6F Small Form Variants
**********************************************************************
punct <SMALL_COMMA>;...;<SMALL_COMMERCIAL_AT>
**********************************************************************
* 0xFE70 - 0xFEFF Arabic Presentation Forms-B (differential)
**********************************************************************
blank <ZERO_WIDTH_NO-BREAK_SPACE>
**********************************************************************
* 0x10300 - 0x1032F Old Italic
**********************************************************************
graph <OLD_ITALIC_LETTER_A>;...;<OLD_ITALIC_NUMERAL_FIFTY>
number <OLD_ITALIC_NUMERAL_ONE>;...;<OLD_ITALIC_NUMERAL_FIFTY>
**********************************************************************
* 0x10330 - 0x1034F Gothic
**********************************************************************
graph <GOTHIC_LETTER_AHSA>;...;<GOTHIC_LETTER_NINE_HUNDRED>
number <GOTHIC_LETTER_NINE_HUNDRED>
**********************************************************************
* 0x1D100 - 0x1D1FF Musical Symbols
**********************************************************************
punct <MUSICAL_SYMBOL_SINGLE_BARLINE>;...;<MUSICAL_SYMBOL_DRUM_CLEF-2>;/
<MUSICAL_SYMBOL_MULTIPLE_MEASURE_REST>;...;<MUSICAL_SYMBOL_COMBINING_FLAG-5>;/
<MUSICAL_SYMBOL_COMBINING_ACCENT>
cntrl <MUSICAL_SYMBOL_BEGIN_BEAM>;...;<MUSICAL_SYMBOL_END_PHRASE>
graph <MUSICAL_SYMBOL_COMBINING_STACCATO>;...;<MUSICAL_SYMBOL_PES_SUBPUNCTIS>
**********************************************************************
* 0x1D400 - 0x1D7FF Mathematical Alphanumeric Symbols
**********************************************************************
graph <MATHEMATICAL_BOLD_CAPITAL_A>;...;<MATHEMATICAL_MONOSPACE_DIGIT_NINE>
**********************************************************************
* 0x1F600 - 0x1F64F Emoticons (Emoji)
**********************************************************************
graph <GRINNING_FACE>;...;<PERSON_WITH_FOLDED_HANDS>
**********************************************************************
* 0x1F680 - 0x1F6FF Transport and Map Symbols
**********************************************************************
graph <ROCKET>;...;<LEFT_LUGGAGE>
**********************************************************************
* 0x1F700 - 0x1F77F Alchemical Symbols
**********************************************************************
graph <ALCHEMICAL_SYMBOL_FOR_QUINTESSENCE>;...;/
<ALCHEMICAL_SYMBOL_FOR_HALF_OUNCE>
**********************************************************************
* 0x1F800 - 0x1F8FF Supplemental Arrows-C
**********************************************************************
graph <LEFTWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD>;...;/
<WHITE_ARROW_SHAFT_WIDTH_TWO_THIRDS>
**********************************************************************
* 0x20000 - 0x2A6D6 CJK Unified Ideographs Extension B
**********************************************************************
alpha <CJK_UNIFIED_IDEOGRAPH-20000>;...;<CJK_UNIFIED_IDEOGRAPH-2B81D>
**********************************************************************
* 0x2A700 - 0x2B734 CJK Unified Ideographs Extension C
**********************************************************************
alpha <CJK_UNIFIED_IDEOGRAPH-2A700>;...;<CJK_UNIFIED_IDEOGRAPH-2B734>
**********************************************************************
* 0x2B740 - 0x2B81D CJK Unified Ideographs Extension D
**********************************************************************
alpha <CJK_UNIFIED_IDEOGRAPH-2B740>;...;<CJK_UNIFIED_IDEOGRAPH-2B81D>

View File

@ -58,7 +58,7 @@ my %FILESNAMES = (
"timedef" => "LC_TIME",
"msgdef" => "LC_MESSAGES",
"numericdef" => "LC_NUMERIC",
"colldef" => "LC_COLLATE",
"colldef" => "LC_COLLATE",
"ctypedef" => "LC_CTYPE"
);
@ -348,19 +348,9 @@ sub transform_ctypes {
$file .= $c;
my $actfile = $file;
my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
$filename = "$ETCDIR/$file.$DEFENCODING.src"
if (! -f $filename);
if (! -f $filename
&& defined $languages{$l}{$f}{fallback}) {
$file = $languages{$l}{$f}{fallback};
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
}
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
if (! -f $filename);
my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
if (! -f $filename) {
print STDERR
"Cannot open $file.$DEFENCODING.src or fallback\n";
print STDERR "Cannot open $filename\n";
next;
}
open(FIN, "$filename");
@ -370,34 +360,45 @@ sub transform_ctypes {
my $shex;
my $uhex;
while (<FIN>) {
if ((/^comment_char\s/) || (/^escape_char\s/)){
push @lines, $_;
}
if (/^LC_CTYPE/../^END LC_CTYPE/) {
push @lines, $_;
}
push @lines, $_;
}
close(FIN);
$shex = sha1_hex(join("\n", @lines));
$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
print FOUT <<EOF;
print FOUT @lines;
close(FOUT);
foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
next if ($enc eq $DEFENCODING);
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
if (! -f $filename) {
print STDERR "Cannot open $filename\n";
next;
}
@lines = ();
open(FIN, "$filename");
while (<FIN>) {
if ((/^comment_char\s/) || (/^escape_char\s/)){
push @lines, $_;
}
if (/^LC_CTYPE/../^END LC_CTYPE/) {
push @lines, $_;
}
}
close(FIN);
$uhex = sha1_hex(join("\n", @lines) . $enc);
$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
print FOUT <<EOF;
# Warning: Do not edit. This file is automatically extracted from the
# tools in /usr/src/tools/tools/locale. The data is obtained from the
# CLDR project, obtained from http://cldr.unicode.org/
# -----------------------------------------------------------------------------
EOF
print FOUT @lines;
close(FOUT);
foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
next if ($enc eq $DEFENCODING);
copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
"$TYPE.draft/$actfile.$enc.src");
$uhex = sha1_hex(join("\n", @lines) . $enc);
$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
print FOUT @lines;
close(FOUT);
}
}
}
@ -769,6 +770,7 @@ sub make_makefile {
print "Creating Makefile for $TYPE\n";
my $SRCOUT;
my $SRCOUT2;
my $SRCOUT3;
my $MAPLOC;
if ($TYPE eq "colldef") {
$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
@ -786,6 +788,15 @@ sub make_makefile {
$SRCOUT2 = "LC_CTYPE";
$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
"locale/etc/final-maps\n";
$SRCOUT3 = "## SYMPAIRS\n\n" .
".for PAIR in \${SYMPAIRS}\n" .
"\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " .
"\${PAIR:C/:.*//}\n" .
"\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
"\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
"\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
" || true\n" .
".endfor\n\n";
}
else {
$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
@ -794,7 +805,6 @@ sub make_makefile {
}
open(FOUT, ">$TYPE.draft/Makefile");
print FOUT <<EOF;
# \$FreeBSD\$
# Warning: Do not edit. This file is automatically generated from the
# tools in /usr/src/tools/tools/locale.
@ -826,18 +836,26 @@ EOF
} keys(%{$hashtable{$hash}});
} elsif ($TYPE eq "ctypedef") {
@files = sort {
if ($a =~ /^en_x_US/ ||
if ($a eq 'en_x_US.UTF-8') { return -1; }
elsif ($b eq 'en_x_US.UTF-8') { return 1; }
if ($a =~ /^en_x_US/) { return -1; }
elsif ($b =~ /^en_x_US/) { return 1; }
if ($a =~ /^en_x_GB.ISO8859-15/ ||
$a =~ /^ru_x_RU/) { return -1; }
elsif ($b =~ /^en_x_US/ ||
elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
$b =~ /ru_x_RU/) { return 1; }
else { return uc($b) cmp uc($a); }
if ($a eq 'en_x_US.UTF-8') { return -1; }
elsif ($b eq 'en_x_US.UTF-8') { return 1; }
else { return uc($b) cmp uc($a); }
} keys(%{$hashtable{$hash}});
} else {
@files = sort(keys(%{$hashtable{$hash}}));
@files = sort {
if ($a =~ /_Comm_/ ||
$b eq 'en_x_US.UTF-8') { return 1; }
elsif ($b =~ /_Comm_/ ||
$a eq 'en_x_US.UTF-8') { return -1; }
else { return uc($b) cmp uc($a); }
} keys(%{$hashtable{$hash}});
}
if ($#files > 0) {
my $link = shift(@files);
@ -909,7 +927,7 @@ SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
.endfor
.include <bsd.prog.mk>
${SRCOUT3}.include <bsd.prog.mk>
EOF
close(FOUT);

View File

@ -23,37 +23,28 @@ old=${base}/../${1}.draft
new=${base}/../${1}
TEMP=/tmp/${1}.locales
TEMP2=/tmp/${1}.hashes
TEMP3=/tmp/${1}.symlinks
FULLMAP=/tmp/utf8-map
FULLEXTRACT=/tmp/extracted-names
AWKCMD="/## PLACEHOLDER/ { \
while ( getline line < \"${TEMP}\" ) {print line} } \
!/## PLACEHOLDER/ { print \$0 }"
/## SYMPAIRS/ { \
while ( getline line < \"${TEMP3}\" ) {print line} } \
!/## / { print \$0 }"
grep '^LOCALES+' ${old}/Makefile > ${TEMP}
if [ $1 = "ctypedef" ]
then
keep=$(cat ${TEMP} | awk '/UTF-8/ { print $2 }')
else
keep=$(cat ${TEMP} | awk '{ print $2 }')
fi
for original in ${keep}
do
cp ${old}/${original}.src ${new}/
done
if [ $1 = "ctypedef" ]
then
keep=$(cat ${TEMP} | awk '{ print $2 ".src" }')
(cd ${old} && md5 -r ${keep} | sort) > ${TEMP2}
linx=$(cat ${TEMP2} | awk '!/UTF-8/ { print $2 }')
for original in ${linx}
keep=$(awk '{ if ($1 != last1) print $2; last1 = $1; }' ${TEMP2})
for original in ${keep}
do
linkhash=$(fgrep "${original}" ${TEMP2} | awk '{ print $1 }')
utf8file=$(fgrep "${linkhash}" ${TEMP2} | fgrep 'UTF-8' | awk '{ print $2 }')
ln -s ${utf8file} ${new}/${original}
cp ${old}/${original} ${new}/
done
awk '{ if ($1 == last1) { print "SYMPAIRS+=\t" last2 ":" $2 } \
else {last1 = $1; last2 = $2}}' ${TEMP2} > ${TEMP3}
rm -f ${TEMP2}
/usr/bin/sed -E -e 's/[ ]+/ /g' \
${CLDRDIR}/posix/UTF-8.cm \
@ -80,8 +71,16 @@ fi
echo map ${map} converted.
done
else # below is everything but ctypedef
keep=$(cat ${TEMP} | awk '{ print $2 }')
for original in ${keep}
do
cp ${old}/${original}.src ${new}/
done
fi
grep -v '^LOCALES+' ${old}/Makefile | awk "${AWKCMD}" > ${new}/Makefile
rm -f ${TEMP}
rm -f ${TEMP} ${TEMP3}

View File

@ -0,0 +1,373 @@
#!/usr/local/bin/perl -wC
use strict;
#use File::Copy;
#use XML::Parser;
use Tie::IxHash;
#use Data::Dumper;
use Getopt::Long;
#use Digest::SHA qw(sha1_hex);
#require "charmaps.pm";
if ($#ARGV != 1) {
print "Usage: $0 --cldr=<cldrdir> --etc=<etcdir>\n";
exit(1);
}
my $CLDRDIR = undef;
my $ETCDIR = undef;
my $result = GetOptions (
"cldr=s" => \$CLDRDIR,
"etc=s" => \$ETCDIR,
);
my @SECTIONS = (
["en_US", "* 0x0000 - 0x007F Basic Latin\n" .
"* 0x0080 - 0x00FF Latin-1 Supplement\n" .
"* 0x0100 - 0x017F Latin Extended-A\n" .
"* 0x0180 - 0x024F Latin Extended-B\n" .
"* 0x0250 - 0x02AF IPA Extensions\n" .
"* 0x1D00 - 0x1D7F Phonetic Extensions\n" .
"* 0x1D80 - 0x1DBF Phonetic Extensions Supplement\n" .
"* 0x1E00 - 0x1EFF Latin Extended Additional\n" .
"* 0x2150 - 0x218F Number Forms (partial - Roman Numerals)\n".
"* 0x2C60 - 0x2C7F Latin Extended-C\n" .
"* 0xA720 - 0xA7FF Latin Extended-D\n" .
"* 0xAB30 - 0xAB6F Latin Extended-E\n" .
"* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n".
"* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"],
["el_GR", "* 0x0370 - 0x03FF Greek (No Coptic!)\n" .
"* 0x1F00 - 0x1FFF Greek Extended\n"],
["ru_RU", "* 0x0400 - 0x04FF Cyrillic\n" .
"* 0x0500 - 0x052F Cyrillic Supplementary\n" .
"* 0x2DE0 - 0x2DFF Cyrillic Extended-A\n" .
"* 0xA640 - 0xA69F Cyrillic Extended-B\n"],
["hy_AM", "* 0x0530 - 0x058F Armenian\n" .
"* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n"],
["he_IL", "* 0x0590 - 0x05FF Hebrew\n" .
"* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n"],
["ar_SA", "* 0x0600 - 0x06FF Arabic\n" .
"* 0x0750 - 0x074F Arabic Supplement\n" .
"* 0x08A0 - 0x08FF Arabic Extended-A\n" .
"* 0xFB50 - 0xFDFF Arabic Presentation Forms (partial)\n" .
"* 0xFE70 - 0xFEFF Arabic Presentation Forms-B (partial)\n"],
["hi_IN", "* 0x0900 - 0x097F Devanagari\n" .
"* 0xA8E0 - 0xA8FF Devanagari Extended\n"],
["bn_IN", "* 0x0900 - 0x097F Bengali\n"],
["pa_Guru_IN", "* 0x0A00 - 0x0A7F Gurmukhi\n"],
["gu_IN", "* 0x0A80 - 0x0AFF Gujarati\n"],
["or_IN", "* 0x0B00 - 0x0B7F Oriya\n"],
["ta_IN", "* 0x0B80 - 0x0BFF Tamil\n"],
["te_IN", "* 0x0C00 - 0x0C7F Telugu\n"],
["kn_IN", "* 0x0C80 - 0x0CFF Kannada\n"],
["ml_IN", "* 0x0D00 - 0x0D7F Malayalam\n"],
["si_LK", "* 0x0D80 - 0x0DFF Sinhala\n"],
["th_TH", "* 0x0E00 - 0x0E7F Thai\n"],
["lo_LA", "* 0x0E80 - 0x0EFF Lao\n"],
["bo_IN", "* 0x0F00 - 0x0FFF Tibetan\n"],
["my_MM", "* 0x1000 - 0x109F Myanmar\n" .
"* 0xA9E0 - 0xA9FF Myanmar Extended-B\n" .
"* 0xAA60 - 0xAA7F Myanmar Extended-A\n"],
["ka_GE", "* 0x10A0 - 0x10FF Georgia\n" .
"* 0x2D00 - 0x2D2F Georgian Supplement\n"],
["ja_JP", "* 0x1100 - 0x11FF Hangul Jamo\n" .
"* 0x3000 - 0x30FF CJK Symbols and Punctuation (partial)\n" .
"* 0x3040 - 0x309F Hiragana\n" .
"* 0x30A0 - 0x30FF Katakana\n" .
"* 0x31F0 - 0x31FF Katakana Phonetic Extensions\n" .
"* 0x3130 - 0x318F Hangul Compatibility Jamo (partial)\n" .
"* 0x3200 - 0x32FF Enclosed CJK Letters and Months (partial)\n" .
"* 0x3300 - 0x33FF CJK Compatibility\n" .
"* 0x3400 - 0x4DB5 CJK Unified Ideographs Extension-A (added)\n" .
"* 0x4E00 - 0x9FCC CJK Unified Ideographs (overridden)\n" .
"* 0xAC00 - 0xA7A3 Hangul Syllables (partial)\n" .
"* 0xD7B0 - 0xD7FF Hangul Jamo Extended-B\n" .
"* 0xF900 - 0xFAFF CJK Compatibility Ideographs (partial)\n" .
"* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"],
["am_ET", "* 0x1200 - 0x137F Ethiopic\n" .
"* 0x1380 - 0x139F Ethiopic Supplement\n" .
"* 0x2D80 - 0x2DDF Ethiopic Extended\n" .
"* 0xAB00 - 0xAB2F Ethiopic Extended-A\n"],
["chr_US", "* 0x13A0 - 0x13FF Cherokee\n"],
["km_KH", "* 0x1780 - 0x17FF Khmer\n" .
"* 0x19E0 - 0x19FF Khmer Symbols\n"],
["shi_Tfng_MA", "* 0x2D30 - 0x2D2F Tifinagh\n"],
["ii_CN", "* 0xA000 - 0xA48F Yi Syllables\n" .
"* 0xA490 - 0xA4CF Yi Radicals\n"],
["vai_Vaii_LR", "* 0xA500 - 0xA63F Vai\n"],
["ko_KR", "* 0x3130 - 0x318F Hangul Compatibility Jamo (partial)\n" .
"* 0xA960 - 0xA97F Hangul Jamo Extended-A\n" .
"* 0xAC00 - 0xA7A3 Hangul Syllables (partial)\n" .
"* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"],
);
# ["zh_Hans_CN", "* 0x2E80 - 0x2EFF CJK Radicals Supplement\n" .
# "* 0x2F00 - 0x2FDF Rangxi Radicales\n" .
# "* 0x3000 - 0x30FF CJK Symbols and Punctuation (partial)\n" .
# "* 0x3200 - 0x32FF Enclosed CJK Letters and Months (partial)\n" .
# "* 0x3400 - 0x4DB5 CJK Unified Ideographs Extension A\n" .
# "* 0xF900 - 0xFAFF CJK Compatibility Ideographs (partial)\n"],
my %seen = ();
my %pending_seen = ();
my %utf8map = ();
my %utf8aliases = ();
my $outfilename = "$ETCDIR/common.UTF-8.src";
my $manual_file = "$ETCDIR/manual-input.UTF-8";
my $stars = "**********************************************************************\n";
get_utf8map("$CLDRDIR/posix/UTF-8.cm");
generate_header ();
generate_sections ();
generate_footer ();
############################
sub get_utf8map {
my $file = shift;
open(FIN, $file);
my @lines = <FIN>;
close(FIN);
chomp(@lines);
my $prev_k = undef;
my $prev_v = "";
my $incharmap = 0;
foreach my $l (@lines) {
$l =~ s/\r//;
next if ($l =~ /^\#/);
next if ($l eq "");
if ($l eq "CHARMAP") {
$incharmap = 1;
next;
}
next if (!$incharmap);
last if ($l eq "END CHARMAP");
$l =~ /^<([^\s]+)>\s+(.*)/;
my $k = $1;
my $v = $2;
$k =~ s/_/ /g; # unicode char string
$v =~ s/\\x//g; # UTF-8 char code
$utf8map{$k} = $v;
$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
$prev_v = $v;
$prev_k = $k;
}
}
sub generate_header {
open(FOUT, ">", "$outfilename")
or die ("can't write to $outfilename\n");
print FOUT <<EOF;
# Warning: Do not edit. This file is automatically generated from the
# tools in /usr/src/tools/tools/locale. The data is obtained from the
# CLDR project, obtained from http://cldr.unicode.org/
# -----------------------------------------------------------------------------
comment_char *
escape_char /
LC_CTYPE
EOF
}
sub generate_footer {
print FOUT "\nEND LC_CTYPE\n";
close (FOUT);
}
sub already_seen {
my $ucode = shift;
if (defined $seen{$ucode}) {
return 1;
}
$pending_seen{$ucode} = 1;
return 0;
}
sub already_seen_RO {
my $ucode = shift;
if (defined $seen{$ucode}) {
return 1;
}
return 0;
}
sub merge_seen {
foreach my $sn (keys %pending_seen) {
$seen{$sn} = 1;
}
%pending_seen = ();
}
sub initialize_lines {
my @result = ();
my $terr = shift;
my $n;
my $back2hex;
my @types = ("graph", "alpha");
if ($terr eq "ja_JP") {
foreach my $T (@types) {
push @result, "$T\t<CJK_UNIFIED_IDEOGRAPH-3400>;/\n";
for ($n = hex("3401"); $n <= hex("4DB4"); $n++) {
$back2hex=sprintf("%X", $n);
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-" .
$back2hex . ">;/\n";
}
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-4DB5>\n";
push @result, "$T\t<CJK_UNIFIED_IDEOGRAPH-4E00>;/\n";
for ($n = hex("4E01"); $n <= hex("9FCB"); $n++) {
$back2hex=sprintf("%X", $n);
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-" .
$back2hex . ">;/\n";
}
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-9FCC>\n";
}
push @result, "merge\tnow\n";
}
return @result;
}
sub compress_ctype {
my $territory = shift;
my $term;
my $active = 0;
my $cat_loaded = 0;
my $lock_ID;
my $prev_ID;
my $curr_ID;
my $lock_name;
my $prev_name;
my $curr_name;
my $key_name;
my $category = '';
my @lines = initialize_lines ($territory);
my $filename = "$CLDRDIR/posix/$territory.UTF-8.src";
if (! -f $filename) {
print STDERR "Cannot open $filename\n";
return;
}
open(FIN, "$filename");
print "Reading from $filename\n";
while (<FIN>) {
if (/^LC_CTYPE/../^END LC_CTYPE/) {
if ($_ ne "LC_CTYPE\n" && $_ ne "END LC_CTYPE\n" &&
$_ ne "*************\n" && $_ ne "\n") {
push @lines, $_;
}
}
}
close(FIN);
foreach my $line (@lines) {
if ($line =~ m/^([a-z]{3,})\t/) {
$category = $1;
if ($category eq 'merge') {
merge_seen;
next;
}
if ($category ne 'print') {
$cat_loaded = 1;
}
}
next if ($category eq 'print');
if ($category eq 'toupper' || $category eq 'tolower') {
if ($line =~ m/<([-_A-Za-z0-9]+)>,/) {
$key_name = $1;
$key_name =~ s/_/ /g;
if (already_seen_RO (hex($utf8map{$key_name}))) {
next;
}
if ($cat_loaded) { print FOUT $category; }
$cat_loaded = 0;
$line =~ s/^[a-z]{3,}\t/\t/;
print FOUT $line;
}
next;
}
if ($line =~ m/<([-_A-Za-z0-9]+)>(;.|)$/) {
$term = ($2 eq '') ? 1 : 0;
$curr_name = $1;
$key_name = $1;
$key_name =~ s/_/ /g;
$curr_ID = hex($utf8map{$key_name});
if (already_seen ($curr_ID)) {
next;
}
if ($active) {
if ($curr_ID == $prev_ID + 1) {
$prev_ID = $curr_ID;
$prev_name = $curr_name;
} else {
if ($cat_loaded) { print FOUT $category; }
$cat_loaded = 0;
if ($prev_ID == $lock_ID) {
print FOUT "\t<" . $prev_name . ">;/\n";
} elsif ($prev_ID - 1 == $lock_ID) {
print FOUT "\t<" . $lock_name . ">;/\n";
print FOUT "\t<" . $prev_name . ">;/\n";
} else {
print FOUT "\t<" . $lock_name .
">;...;<" . $prev_name . ">;/\n";
}
$lock_ID = $curr_ID;
$prev_ID = $curr_ID;
$lock_name = $curr_name;
$prev_name = $curr_name;
}
} else {
$active = 1;
$lock_ID = $curr_ID;
$prev_ID = $curr_ID;
$lock_name = $curr_name;
$prev_name = $curr_name;
}
if ($term) {
if ($cat_loaded) { print FOUT $category; }
$cat_loaded = 0;
if ($curr_ID == $lock_ID) {
print FOUT "\t<" . $curr_name . ">\n";
} elsif ($curr_ID == $lock_ID + 1) {
print FOUT "\t<" . $lock_name . ">;/\n";
print FOUT "\t<" . $curr_name . ">\n";
} else {
print FOUT "\t<" . $lock_name .
">;...;<" . $curr_name . ">\n";
}
$active = 0;
}
} else {
print FOUT $line;
}
}
}
sub generate_sections {
foreach my $section (@SECTIONS ) {
print FOUT "\n";
print FOUT $stars;
print FOUT @$section[1];
print FOUT $stars;
compress_ctype (@$section[0]);
merge_seen;
}
my @lines = ();
open(FIN, "$manual_file");
print "Reading from $manual_file\n";
while (<FIN>) {
push @lines, $_;
}
close(FIN);
foreach my $line (@lines) {
print FOUT $line;
}
}