From 84fa0ef97db9dd72693f10a1fa72c766f2b0838f Mon Sep 17 00:00:00 2001 From: Hiroki Sato Date: Sat, 15 Aug 2020 07:19:37 +0000 Subject: [PATCH] - Update to Unicode CLDR v35 (Unicode 12.0). - Update tools/tools/locale to add make targets to automatically generate locale source files. With this change, just typing "make obj && make -j4" will rebuild them. Check README for more details. - Fix issues in ja_JP ctypedef and range specification support in utf8-rollup.pl. - Add a temporary patch for UnicodeData.txt to fix code ranges of CJK Ideograph Extension A and Extension B. - tools/cldr2def.pl: Use eucJP for ja_JP ctypedef because eucJP is not compatible with UTF-8. - tools/convert_map.pl: Add a verbose error message. - tools/utf8-rollup.pl: Normalize entries to use Unicode, not UTF-8. Reviewed by: bapt Differential Revision: https://reviews.freebsd.org/D25503 --- tools/tools/locale/Makefile | 193 ++++-- tools/tools/locale/README | 80 ++- tools/tools/locale/etc/final-maps/map.UTF-8 | 554 ++++++++++++++++++ .../tools/locale/patch/patch-UnicodeData.txt | 29 + tools/tools/locale/tools/cldr2def.pl | 5 + tools/tools/locale/tools/convert_map.pl | 5 +- tools/tools/locale/tools/finalize | 38 +- tools/tools/locale/tools/utf8-rollup.pl | 31 +- 8 files changed, 821 insertions(+), 114 deletions(-) create mode 100644 tools/tools/locale/patch/patch-UnicodeData.txt diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile index 508034d19695..aad2c2160bb5 100644 --- a/tools/tools/locale/Makefile +++ b/tools/tools/locale/Makefile @@ -7,19 +7,30 @@ # # Modified by John Marino to suit DragonFly needs # - -.OBJDIR: . - -.if !defined(UNIDIR) -.error UNIDIR is not set +.if ${.CURDIR} == ${.OBJDIR} +.error Do make obj first. .endif -PASSON= UNIDIR="${UNIDIR}" -ETCDIR= ${.CURDIR}/etc +LOCALESRCDIR?= ${DESTDIR}/usr/src/share +TMPDIR?= /tmp + +BASEDIR= ${.CURDIR} +ETCDIR= ${BASEDIR}/etc +TOOLSDIR= ${BASEDIR}/tools +PATCHDIR= ${BASEDIR}/patch +UNIDIR= ${.OBJDIR:tA}/unicode + +PKGS= openjdk8 \ + apache-ant \ + p5-XML-Parser \ + p5-Tie-IxHash \ + p5-Text-Iconv +tools-test: + pkg info -e ${PKGS} + @echo tools ok. KNOWN= monetdef numericdef msgdef colldef ctypedef # timedef TYPES?= ${KNOWN} -LOCALE_DESTDIR?= /tmp/generated-locales/ COLLATION_SPECIAL?= \ cs_CZ ISO8859-2 \ @@ -44,65 +55,79 @@ COLLATION_SPECIAL?= \ .for area enc in ${COLLATION_SPECIAL} COLLATIONS_SPECIAL_ENV+= ${area}.${enc} .endfor -PASSON+= COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" +SETENV= env -i \ + PATH="${PATH}" \ + TMPDIR="${TMPDIR}" \ + COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" \ + UNIDIR="${UNIDIR}" \ + BASEDIR="${BASEDIR}" \ + TOOLSDIR="${TOOLSDIR}" \ + ETCDIR="${ETCDIR}" -all: -.for t in ${TYPES} -. if ${KNOWN:M${t}} - test -d ${t} || mkdir ${t} - make build-${t} -. endif -.endfor +all: posix build afterbuild +.ORDER: posix build afterbuild + +afterbuild: build @echo "" @find . -name *failed .for t in ${TYPES} +. if ${KNOWN:M${t}} +build: build-${t} +.ORDER: build-${t} afterbuild +. endif +.endfor + +diff: +.for t in ${TYPES} +. if ${KNOWN:M${t}} +diff: diff-${t} +diff-${t}: + -/usr/bin/diff -ruN -x Makefile -x Makefile.depend \ + ${LOCALESRCDIR}/${t} ${t} +. endif +.endfor + +install: +.for t in ${TYPES} +. if ${KNOWN:M${t}} install: install-${t} install-${t}: -. if ${KNOWN:M${t}} - rm -rf ${.CURDIR}/${t}.draft - rm -f ${.CURDIR}/../../../share/${t}/Makefile - rm -f ${.CURDIR}/../../../share/${t}/*.src - mv ${.CURDIR}/${t}/* ${.CURDIR}/../../../share/${t}/ + cd ${LOCALESRCDIR}/${t} && \ + rm -f Makefile *.src && \ + install -c ${t}/* ${LOCALESRCDIR}/${t} . endif .endfor post-install: .for t in ${TYPES} . if ${KNOWN:M${t}} - (cd ${.CURDIR}/../../../share/${t} && \ - make && make install && make clean) + cd ${LOCALSRCDIR}/${t} && \ + make && make install && make clean . endif .endfor .for t in ${TYPES} -gen-${t}: - mkdir -p ${t} ${t}.draft - perl -I tools tools/cldr2def.pl \ - --unidir=$$(realpath ${UNIDIR}) \ - --etc=$$(realpath ${ETCDIR}) \ +CLEANDIRS+= ${t} ${t}.draft +${t}: + mkdir -p ${t} ${t}.draft && \ + perl -I ${TOOLSDIR} ${TOOLSDIR}/cldr2def.pl \ + --unidir=${UNIDIR:tA} \ + --etc=${ETCDIR:tA} \ --type=${t} -build-${t}: gen-${t} - env ${PASSON} tools/finalize ${t} +build-${t}: ${t} + ${SETENV} OUTBASEDIR="${.OBJDIR}/${t}" ${TOOLSDIR}/finalize ${t} .endfor -gen-ctypedef: ctype-rollup -static-colldef: gen-colldef +static-colldef: colldef build-colldef: static-colldef static-colldef: .for area enc in ${COLLATION_SPECIAL} - awk -f tools/extract-colldef.awk ${UNIDIR}/posix/${area}.${enc}.src > \ - colldef.draft/${area}.${enc}.src -.endfor - -ctype-rollup: - perl -I tools tools/utf8-rollup.pl --unidir=$$(realpath ${UNIDIR}) - -clean: -.for t in ${TYPES} - rm -rf ${t} ${t}.draft +colldef.draft/${area}.${enc}.src: posix/${area}.${enc}.src + awk -f ${TOOLSDIR}/extract-colldef.awk \ + ${.ALLSRC} > ${.TARGET} || (rm -f ${.TARGET} && false) .endfor BASE_LOCALES_OF_INTEREST?= \ @@ -145,31 +170,71 @@ ENCODINGS= Big5 \ US-ASCII \ UTF-8 -POSIX: posixsrc posixcol posixcm -.if !exists(${UNIDIR}/tools/java/cldr.jar) -.error check README about building cldr.jar +# CLDR files +CLDRFILES_CORE= https://unicode.org/Public/cldr/35/core.zip +CLDRFILES_KEY= https://unicode.org/Public/cldr/35/keyboards.zip +CLDRFILES_TOOLS=https://unicode.org/Public/cldr/35/tools.zip +CLDRFILES_UCD= http://www.unicode.org/Public/zipped/latest/UCD.zip + +# fetch and extract targets +${UNIDIR}: + mkdir -p ${UNIDIR} +.for N in CORE KEY TOOLS UCD +${CLDRFILES_${N}:T}: + fetch ${CLDRFILES_${N}} +fetch: ${CLDRFILES_${N}:T} +extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UNIDIR} + cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T} +extract: extract-${CLDRFILES_${N}:T} +.endfor +patch:: +.if exists(${PATCHDIR}) + cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch .endif + +.if !exists(${UNIDIR}/tools/java/cldr.jar) +.ORDER: extract patch +build-tools: extract patch tools-test ${UNIDIR} + cd ${UNIDIR}/tools/java && ${SETENV} ant all jar +.else +build-tools: + @echo cldr.jar is ready. +.endif + +JAVA_CLDR= java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar + +posix: posixcm post-posixcm posixsrc posixcol +.ORDER: posixcm post-posixcm posixsrc posixcol +${UNIDIR}/posix: + ln -s -f ../posix ${.TARGET} +clean-posix: + rm -rf posix ${UNIDIR}/posix +post-posixcm: ${UNIDIR}/posix + perl -I ${TOOLSDIR} ${TOOLSDIR}/utf8-rollup.pl \ + --unidir=${UNIDIR} +.for enc in ${ENCODINGS} +posixcm: build-tools posix/${enc}.cm +.ORDER: build-tools posix/${enc}.cm +posix/${enc}.cm: + mkdir -p posix && \ + ${JAVA_CLDR} org.unicode.cldr.posix.GenerateCharmap \ + -d posix -c ${enc} +.endfor .for area in ${BASE_LOCALES_OF_INTEREST} -posixsrc: ${UNIDIR}/posix/${area}.UTF-8.src -${UNIDIR}/posix/${area}.UTF-8.src: - java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ - org.unicode.cldr.posix.GeneratePOSIX \ - -d ${UNIDIR}/posix -m ${area} -c UTF-8 +posixsrc: build-tools posix/${area}.UTF-8.src +.ORDER: build-tools posix/${area}.UTF-8.src +posix/${area}.UTF-8.src: + mkdir -p posix && \ + ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ + -d posix -m ${area} -c UTF-8 .endfor .for area encoding in ${COLLATION_SPECIAL} -posixcol: ${UNIDIR}/posix/${area}.${encoding}.src -${UNIDIR}/posix/${area}.${encoding}.src: - java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ - org.unicode.cldr.posix.GeneratePOSIX \ - -d ${UNIDIR}/posix -m ${area} -c ${encoding} -.endfor -.for enc in ${ENCODINGS} -posixcm: ${UNIDIR}/posix/${enc}.cm -${UNIDIR}/posix/${enc}.cm: - java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ - org.unicode.cldr.posix.GenerateCharmap \ - -d ${UNIDIR}/posix -c ${enc} +posixcol: build-tools posix/${area}.${encoding}.src +.ORDER: build-tools posix/${area}.${encoding}.src +posix/${area}.${encoding}.src: + mkdir -p posix && \ + ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ + -d posix -m ${area} -c ${encoding} .endfor -clean-POSIX: - rm -f ${UNIDIR}/posix/* +.include diff --git a/tools/tools/locale/README b/tools/tools/locale/README index 60357d4d8dfe..8dc8c2a25fc1 100644 --- a/tools/tools/locale/README +++ b/tools/tools/locale/README @@ -1,32 +1,58 @@ # $FreeBSD$ -To generate the locales: +Files in this directory are used to generate locale source files +from files in CLDR (Unicode Common Locale Data Repository). -Tools needed: - java (openjdk >= 8) - perl - converters/p5-Text-Iconv - devel/apache-ant - devel/p5-Tie-IxHash - textproc/p5-XML-Parser +To generate the files, do the following: -1. Fetch CLDR data from: http://unicode.org/Public/cldr/. You need all of the -core.zip, keyboards.zip, and tools.zip. -2. Fetch unidata (UCD.zip) from http://www.unicode.org/Public/zipped/latest. -3. Extract: - mkdir -p ~/unicode - cd ~/unicode - unzip ~/core.zip - unzip ~/keyboards.zip - unzip ~/tools.zip - unzip ~/UCD.zip -4. Export variable: - UNIDIR=~/unicode; export UNIDIR -5. Build the CLDR tools: - cd $UNIDIR/tools/java - ant jar -6. Build POSIX data files from CLDR data: - make POSIX -7. Build and install new locale data: - make + cd /usr/src/tools/tools/locale + make obj (mandatory) + make -j16 (-jN recommended) + make diff (check if the changes are reasonable) make install + +"make" downloads the necessary files, build them, and install the +results into /usr/src/share/* as source files for locales. + +More details are as follows: + +Variables: + LOCALESRCDIR + Destination path for the generated locale files. + Default: $DESTDIR/usr/src/share. + TMPDIR + Temporary directory. + Default: /tmp + +Targets: + make obj + Create a temporary directory for building. + + make clean + Clean up the obj directories. + + make cleandir + Remove the obj directories completely. + + make tools-test + Check if necessary tools are installed or not. + If something is missing, install them. + + make fetch + Download necessary files from CLDR. + + make build-tools + Build a tool to generate locale source files. + + make posix + Build POSIX locale source files. + + make build + Build locale files. + + make diff + Run diff(1) the build results against $LOCALESRCDIR. + + make install + Install the build results into $LOCALESRCDIR. +[EOF] diff --git a/tools/tools/locale/etc/final-maps/map.UTF-8 b/tools/tools/locale/etc/final-maps/map.UTF-8 index 031b8545982d..ad0dfd2c7de6 100644 --- a/tools/tools/locale/etc/final-maps/map.UTF-8 +++ b/tools/tools/locale/etc/final-maps/map.UTF-8 @@ -2969,6 +2969,7 @@ CHARMAP \xE0\xB1\xAD \xE0\xB1\xAE \xE0\xB1\xAF + \xE0\xB1\xB7 \xE0\xB1\xB8 \xE0\xB1\xB9 \xE0\xB1\xBA @@ -3363,14 +3364,24 @@ CHARMAP \xE0\xBA\x81 \xE0\xBA\x82 \xE0\xBA\x84 + \xE0\xBA\x86 \xE0\xBA\x87 \xE0\xBA\x88 + \xE0\xBA\x89 \xE0\xBA\x8A + \xE0\xBA\x8C \xE0\xBA\x8D + \xE0\xBA\x8E + \xE0\xBA\x8F + \xE0\xBA\x90 + \xE0\xBA\x91 + \xE0\xBA\x92 + \xE0\xBA\x93 \xE0\xBA\x94 \xE0\xBA\x95 \xE0\xBA\x96 \xE0\xBA\x97 + \xE0\xBA\x98 \xE0\xBA\x99 \xE0\xBA\x9A \xE0\xBA\x9B @@ -3378,13 +3389,17 @@ CHARMAP \xE0\xBA\x9D \xE0\xBA\x9E \xE0\xBA\x9F + \xE0\xBA\xA0 \xE0\xBA\xA1 \xE0\xBA\xA2 \xE0\xBA\xA3 \xE0\xBA\xA5 \xE0\xBA\xA7 + \xE0\xBA\xA8 + \xE0\xBA\xA9 \xE0\xBA\xAA \xE0\xBA\xAB + \xE0\xBA\xAC \xE0\xBA\xAD \xE0\xBA\xAE \xE0\xBA\xAF @@ -3398,6 +3413,7 @@ CHARMAP \xE0\xBA\xB7 \xE0\xBA\xB8 \xE0\xBA\xB9 + \xE0\xBA\xBA \xE0\xBA\xBB \xE0\xBA\xBC \xE0\xBA\xBD @@ -6656,6 +6672,7 @@ CHARMAP \xE1\xB3\xB7 \xE1\xB3\xB8 \xE1\xB3\xB9 + \xE1\xB3\xBA \xE1\xB4\x80 \xE1\xB4\x81 \xE1\xB4\x82 @@ -10325,6 +10342,7 @@ CHARMAP \xE2\xAF\x86 \xE2\xAF\x87 \xE2\xAF\x88 + \xE2\xAF\x89 \xE2\xAF\x8A \xE2\xAF\x8B \xE2\xAF\x8C @@ -10378,6 +10396,7 @@ CHARMAP \xE2\xAF\xBC \xE2\xAF\xBD \xE2\xAF\xBE + \xE2\xAF\xBF \xE2\xB0\x80 \xE2\xB0\x81 \xE2\xB0\x82 @@ -10916,6 +10935,7 @@ CHARMAP \xE2\xB9\x8C \xE2\xB9\x8D \xE2\xB9\x8E + \xE2\xB9\x8F \xE2\xBA\x80 \xE2\xBA\x81 \xE2\xBA\x82 @@ -41812,6 +41832,17 @@ CHARMAP \xEA\x9E\xB7 \xEA\x9E\xB8 \xEA\x9E\xB9 + \xEA\x9E\xBA + \xEA\x9E\xBB + \xEA\x9E\xBC + \xEA\x9E\xBD + \xEA\x9E\xBE + \xEA\x9E\xBF + \xEA\x9F\x82 + \xEA\x9F\x83 + \xEA\x9F\x84 + \xEA\x9F\x85 + \xEA\x9F\x86 \xEA\x9F\xB7 \xEA\x9F\xB8 \xEA\x9F\xB9 @@ -42577,6 +42608,8 @@ CHARMAP \xEA\xAD\xA3 \xEA\xAD\xA4 \xEA\xAD\xA5 + \xEA\xAD\xA6 + \xEA\xAD\xA7 \xEA\xAD\xB0 \xEA\xAD\xB1 \xEA\xAD\xB2 @@ -64381,6 +64414,29 @@ CHARMAP \xF0\x90\xBD\x97 \xF0\x90\xBD\x98 \xF0\x90\xBD\x99 + \xF0\x90\xBF\xA0 + \xF0\x90\xBF\xA1 + \xF0\x90\xBF\xA2 + \xF0\x90\xBF\xA3 + \xF0\x90\xBF\xA4 + \xF0\x90\xBF\xA5 + \xF0\x90\xBF\xA6 + \xF0\x90\xBF\xA7 + \xF0\x90\xBF\xA8 + \xF0\x90\xBF\xA9 + \xF0\x90\xBF\xAA + \xF0\x90\xBF\xAB + \xF0\x90\xBF\xAC + \xF0\x90\xBF\xAD + \xF0\x90\xBF\xAE + \xF0\x90\xBF\xAF + \xF0\x90\xBF\xB0 + \xF0\x90\xBF\xB1 + \xF0\x90\xBF\xB2 + \xF0\x90\xBF\xB3 + \xF0\x90\xBF\xB4 + \xF0\x90\xBF\xB5 + \xF0\x90\xBF\xB6 \xF0\x91\x80\x80 \xF0\x91\x80\x81 \xF0\x91\x80\x82 @@ -65163,6 +65219,7 @@ CHARMAP \xF0\x91\x91\x9B \xF0\x91\x91\x9D \xF0\x91\x91\x9E + \xF0\x91\x91\x9F \xF0\x91\x92\x80 \xF0\x91\x92\x81 \xF0\x91\x92\x82 @@ -65485,6 +65542,7 @@ CHARMAP \xF0\x91\x9A\xB5 \xF0\x91\x9A\xB6 \xF0\x91\x9A\xB7 + \xF0\x91\x9A\xB8 \xF0\x91\x9B\x80 \xF0\x91\x9B\x81 \xF0\x91\x9B\x82 @@ -65697,6 +65755,71 @@ CHARMAP \xF0\x91\xA3\xB1 \xF0\x91\xA3\xB2 \xF0\x91\xA3\xBF + \xF0\x91\xA6\xA0 + \xF0\x91\xA6\xA1 + \xF0\x91\xA6\xA2 + \xF0\x91\xA6\xA3 + \xF0\x91\xA6\xA4 + \xF0\x91\xA6\xA5 + \xF0\x91\xA6\xA6 + \xF0\x91\xA6\xA7 + \xF0\x91\xA6\xAA + \xF0\x91\xA6\xAB + \xF0\x91\xA6\xAC + \xF0\x91\xA6\xAD + \xF0\x91\xA6\xAE + \xF0\x91\xA6\xAF + \xF0\x91\xA6\xB0 + \xF0\x91\xA6\xB1 + \xF0\x91\xA6\xB2 + \xF0\x91\xA6\xB3 + \xF0\x91\xA6\xB4 + \xF0\x91\xA6\xB5 + \xF0\x91\xA6\xB6 + \xF0\x91\xA6\xB7 + \xF0\x91\xA6\xB8 + \xF0\x91\xA6\xB9 + \xF0\x91\xA6\xBA + \xF0\x91\xA6\xBB + \xF0\x91\xA6\xBC + \xF0\x91\xA6\xBD + \xF0\x91\xA6\xBE + \xF0\x91\xA6\xBF + \xF0\x91\xA7\x80 + \xF0\x91\xA7\x81 + \xF0\x91\xA7\x82 + \xF0\x91\xA7\x83 + \xF0\x91\xA7\x84 + \xF0\x91\xA7\x85 + \xF0\x91\xA7\x86 + \xF0\x91\xA7\x87 + \xF0\x91\xA7\x88 + \xF0\x91\xA7\x89 + \xF0\x91\xA7\x8A + \xF0\x91\xA7\x8B + \xF0\x91\xA7\x8C + \xF0\x91\xA7\x8D + \xF0\x91\xA7\x8E + \xF0\x91\xA7\x8F + \xF0\x91\xA7\x90 + \xF0\x91\xA7\x91 + \xF0\x91\xA7\x92 + \xF0\x91\xA7\x93 + \xF0\x91\xA7\x94 + \xF0\x91\xA7\x95 + \xF0\x91\xA7\x96 + \xF0\x91\xA7\x97 + \xF0\x91\xA7\x9A + \xF0\x91\xA7\x9B + \xF0\x91\xA7\x9C + \xF0\x91\xA7\x9D + \xF0\x91\xA7\x9E + \xF0\x91\xA7\x9F + \xF0\x91\xA7\xA0 + \xF0\x91\xA7\xA1 + \xF0\x91\xA7\xA2 + \xF0\x91\xA7\xA3 + \xF0\x91\xA7\xA4 \xF0\x91\xA8\x80 \xF0\x91\xA8\x81 \xF0\x91\xA8\x82 @@ -65821,6 +65944,8 @@ CHARMAP \xF0\x91\xAA\x81 \xF0\x91\xAA\x82 \xF0\x91\xAA\x83 + \xF0\x91\xAA\x84 + \xF0\x91\xAA\x85 \xF0\x91\xAA\x86 \xF0\x91\xAA\x87 \xF0\x91\xAA\x88 @@ -66235,6 +66360,57 @@ CHARMAP \xF0\x91\xBB\xB6 \xF0\x91\xBB\xB7 \xF0\x91\xBB\xB8 + \xF0\x91\xBF\x80 + \xF0\x91\xBF\x81 + \xF0\x91\xBF\x82 + \xF0\x91\xBF\x83 + \xF0\x91\xBF\x84 + \xF0\x91\xBF\x85 + \xF0\x91\xBF\x86 + \xF0\x91\xBF\x87 + \xF0\x91\xBF\x88 + \xF0\x91\xBF\x89 + \xF0\x91\xBF\x8A + \xF0\x91\xBF\x8B + \xF0\x91\xBF\x8C + \xF0\x91\xBF\x8D + \xF0\x91\xBF\x8E + \xF0\x91\xBF\x8F + \xF0\x91\xBF\x90 + \xF0\x91\xBF\x91 + \xF0\x91\xBF\x92 + \xF0\x91\xBF\x93 + \xF0\x91\xBF\x94 + \xF0\x91\xBF\x95 + \xF0\x91\xBF\x96 + \xF0\x91\xBF\x97 + \xF0\x91\xBF\x98 + \xF0\x91\xBF\x99 + \xF0\x91\xBF\x9A + \xF0\x91\xBF\x9B + \xF0\x91\xBF\x9C + \xF0\x91\xBF\x9D + \xF0\x91\xBF\x9E + \xF0\x91\xBF\x9F + \xF0\x91\xBF\xA0 + \xF0\x91\xBF\xA1 + \xF0\x91\xBF\xA2 + \xF0\x91\xBF\xA3 + \xF0\x91\xBF\xA4 + \xF0\x91\xBF\xA5 + \xF0\x91\xBF\xA6 + \xF0\x91\xBF\xA7 + \xF0\x91\xBF\xA8 + \xF0\x91\xBF\xA9 + \xF0\x91\xBF\xAA + \xF0\x91\xBF\xAB + \xF0\x91\xBF\xAC + \xF0\x91\xBF\xAD + \xF0\x91\xBF\xAE + \xF0\x91\xBF\xAF + \xF0\x91\xBF\xB0 + \xF0\x91\xBF\xB1 + \xF0\x91\xBF\xBF \xF0\x92\x80\x80 \xF0\x92\x80\x81 \xF0\x92\x80\x82 @@ -68540,6 +68716,15 @@ CHARMAP \xF0\x93\x90\xAC \xF0\x93\x90\xAD \xF0\x93\x90\xAE + \xF0\x93\x90\xB0 + \xF0\x93\x90\xB1 + \xF0\x93\x90\xB2 + \xF0\x93\x90\xB3 + \xF0\x93\x90\xB4 + \xF0\x93\x90\xB5 + \xF0\x93\x90\xB6 + \xF0\x93\x90\xB7 + \xF0\x93\x90\xB8 \xF0\x94\x90\x80 \xF0\x94\x90\x81 \xF0\x94\x90\x82 @@ -70058,6 +70243,13 @@ CHARMAP \xF0\x96\xBD\x82 \xF0\x96\xBD\x83 \xF0\x96\xBD\x84 + \xF0\x96\xBD\x85 + \xF0\x96\xBD\x86 + \xF0\x96\xBD\x87 + \xF0\x96\xBD\x88 + \xF0\x96\xBD\x89 + \xF0\x96\xBD\x8A + \xF0\x96\xBD\x8F \xF0\x96\xBD\x90 \xF0\x96\xBD\x91 \xF0\x96\xBD\x92 @@ -70105,6 +70297,15 @@ CHARMAP \xF0\x96\xBD\xBC \xF0\x96\xBD\xBD \xF0\x96\xBD\xBE + \xF0\x96\xBD\xBF + \xF0\x96\xBE\x80 + \xF0\x96\xBE\x81 + \xF0\x96\xBE\x82 + \xF0\x96\xBE\x83 + \xF0\x96\xBE\x84 + \xF0\x96\xBE\x85 + \xF0\x96\xBE\x86 + \xF0\x96\xBE\x87 \xF0\x96\xBE\x8F \xF0\x96\xBE\x90 \xF0\x96\xBE\x91 @@ -70124,6 +70325,8 @@ CHARMAP \xF0\x96\xBE\x9F \xF0\x96\xBF\xA0 \xF0\x96\xBF\xA1 + \xF0\x96\xBF\xA2 + \xF0\x96\xBF\xA3 \xF0\x97\x80\x80 \xF0\x97\x80\x81 \xF0\x97\x80\x82 @@ -76254,6 +76457,12 @@ CHARMAP \xF0\x98\x9F\xAF \xF0\x98\x9F\xB0 \xF0\x98\x9F\xB1 + \xF0\x98\x9F\xB2 + \xF0\x98\x9F\xB3 + \xF0\x98\x9F\xB4 + \xF0\x98\x9F\xB5 + \xF0\x98\x9F\xB6 + \xF0\x98\x9F\xB7 \xF0\x98\xA0\x80 \xF0\x98\xA0\x81 \xF0\x98\xA0\x82 @@ -77296,6 +77505,13 @@ CHARMAP \xF0\x9B\x84\x9C \xF0\x9B\x84\x9D \xF0\x9B\x84\x9E + \xF0\x9B\x85\x90 + \xF0\x9B\x85\x91 + \xF0\x9B\x85\x92 + \xF0\x9B\x85\xA4 + \xF0\x9B\x85\xA5 + \xF0\x9B\x85\xA6 + \xF0\x9B\x85\xA7 \xF0\x9B\x85\xB0 \xF0\x9B\x85\xB1 \xF0\x9B\x85\xB2 @@ -80224,6 +80440,136 @@ CHARMAP \xF0\x9E\x80\xA8 \xF0\x9E\x80\xA9 \xF0\x9E\x80\xAA + \xF0\x9E\x84\x80 + \xF0\x9E\x84\x81 + \xF0\x9E\x84\x82 + \xF0\x9E\x84\x83 + \xF0\x9E\x84\x84 + \xF0\x9E\x84\x85 + \xF0\x9E\x84\x86 + \xF0\x9E\x84\x87 + \xF0\x9E\x84\x88 + \xF0\x9E\x84\x89 + \xF0\x9E\x84\x8A + \xF0\x9E\x84\x8B + \xF0\x9E\x84\x8C + \xF0\x9E\x84\x8D + \xF0\x9E\x84\x8E + \xF0\x9E\x84\x8F + \xF0\x9E\x84\x90 + \xF0\x9E\x84\x91 + \xF0\x9E\x84\x92 + \xF0\x9E\x84\x93 + \xF0\x9E\x84\x94 + \xF0\x9E\x84\x95 + \xF0\x9E\x84\x96 + \xF0\x9E\x84\x97 + \xF0\x9E\x84\x98 + \xF0\x9E\x84\x99 + \xF0\x9E\x84\x9A + \xF0\x9E\x84\x9B + \xF0\x9E\x84\x9C + \xF0\x9E\x84\x9D + \xF0\x9E\x84\x9E + \xF0\x9E\x84\x9F + \xF0\x9E\x84\xA0 + \xF0\x9E\x84\xA1 + \xF0\x9E\x84\xA2 + \xF0\x9E\x84\xA3 + \xF0\x9E\x84\xA4 + \xF0\x9E\x84\xA5 + \xF0\x9E\x84\xA6 + \xF0\x9E\x84\xA7 + \xF0\x9E\x84\xA8 + \xF0\x9E\x84\xA9 + \xF0\x9E\x84\xAA + \xF0\x9E\x84\xAB + \xF0\x9E\x84\xAC + \xF0\x9E\x84\xB0 + \xF0\x9E\x84\xB1 + \xF0\x9E\x84\xB2 + \xF0\x9E\x84\xB3 + \xF0\x9E\x84\xB4 + \xF0\x9E\x84\xB5 + \xF0\x9E\x84\xB6 + \xF0\x9E\x84\xB7 + \xF0\x9E\x84\xB8 + \xF0\x9E\x84\xB9 + \xF0\x9E\x84\xBA + \xF0\x9E\x84\xBB + \xF0\x9E\x84\xBC + \xF0\x9E\x84\xBD + \xF0\x9E\x85\x80 + \xF0\x9E\x85\x81 + \xF0\x9E\x85\x82 + \xF0\x9E\x85\x83 + \xF0\x9E\x85\x84 + \xF0\x9E\x85\x85 + \xF0\x9E\x85\x86 + \xF0\x9E\x85\x87 + \xF0\x9E\x85\x88 + \xF0\x9E\x85\x89 + \xF0\x9E\x85\x8E + \xF0\x9E\x85\x8F + \xF0\x9E\x8B\x80 + \xF0\x9E\x8B\x81 + \xF0\x9E\x8B\x82 + \xF0\x9E\x8B\x83 + \xF0\x9E\x8B\x84 + \xF0\x9E\x8B\x85 + \xF0\x9E\x8B\x86 + \xF0\x9E\x8B\x87 + \xF0\x9E\x8B\x88 + \xF0\x9E\x8B\x89 + \xF0\x9E\x8B\x8A + \xF0\x9E\x8B\x8B + \xF0\x9E\x8B\x8C + \xF0\x9E\x8B\x8D + \xF0\x9E\x8B\x8E + \xF0\x9E\x8B\x8F + \xF0\x9E\x8B\x90 + \xF0\x9E\x8B\x91 + \xF0\x9E\x8B\x92 + \xF0\x9E\x8B\x93 + \xF0\x9E\x8B\x94 + \xF0\x9E\x8B\x95 + \xF0\x9E\x8B\x96 + \xF0\x9E\x8B\x97 + \xF0\x9E\x8B\x98 + \xF0\x9E\x8B\x99 + \xF0\x9E\x8B\x9A + \xF0\x9E\x8B\x9B + \xF0\x9E\x8B\x9C + \xF0\x9E\x8B\x9D + \xF0\x9E\x8B\x9E + \xF0\x9E\x8B\x9F + \xF0\x9E\x8B\xA0 + \xF0\x9E\x8B\xA1 + \xF0\x9E\x8B\xA2 + \xF0\x9E\x8B\xA3 + \xF0\x9E\x8B\xA4 + \xF0\x9E\x8B\xA5 + \xF0\x9E\x8B\xA6 + \xF0\x9E\x8B\xA7 + \xF0\x9E\x8B\xA8 + \xF0\x9E\x8B\xA9 + \xF0\x9E\x8B\xAA + \xF0\x9E\x8B\xAB + \xF0\x9E\x8B\xAC + \xF0\x9E\x8B\xAD + \xF0\x9E\x8B\xAE + \xF0\x9E\x8B\xAF + \xF0\x9E\x8B\xB0 + \xF0\x9E\x8B\xB1 + \xF0\x9E\x8B\xB2 + \xF0\x9E\x8B\xB3 + \xF0\x9E\x8B\xB4 + \xF0\x9E\x8B\xB5 + \xF0\x9E\x8B\xB6 + \xF0\x9E\x8B\xB7 + \xF0\x9E\x8B\xB8 + \xF0\x9E\x8B\xB9 + \xF0\x9E\x8B\xBF \xF0\x9E\xA0\x80 \xF0\x9E\xA0\x81 \xF0\x9E\xA0\x82 @@ -80512,6 +80858,7 @@ CHARMAP \xF0\x9E\xA5\x88 \xF0\x9E\xA5\x89 \xF0\x9E\xA5\x8A + \xF0\x9E\xA5\x8B \xF0\x9E\xA5\x90 \xF0\x9E\xA5\x91 \xF0\x9E\xA5\x92 @@ -80592,6 +80939,67 @@ CHARMAP \xF0\x9E\xB2\xB2 \xF0\x9E\xB2\xB3 \xF0\x9E\xB2\xB4 + \xF0\x9E\xB4\x81 + \xF0\x9E\xB4\x82 + \xF0\x9E\xB4\x83 + \xF0\x9E\xB4\x84 + \xF0\x9E\xB4\x85 + \xF0\x9E\xB4\x86 + \xF0\x9E\xB4\x87 + \xF0\x9E\xB4\x88 + \xF0\x9E\xB4\x89 + \xF0\x9E\xB4\x8A + \xF0\x9E\xB4\x8B + \xF0\x9E\xB4\x8C + \xF0\x9E\xB4\x8D + \xF0\x9E\xB4\x8E + \xF0\x9E\xB4\x8F + \xF0\x9E\xB4\x90 + \xF0\x9E\xB4\x91 + \xF0\x9E\xB4\x92 + \xF0\x9E\xB4\x93 + \xF0\x9E\xB4\x94 + \xF0\x9E\xB4\x95 + \xF0\x9E\xB4\x96 + \xF0\x9E\xB4\x97 + \xF0\x9E\xB4\x98 + \xF0\x9E\xB4\x99 + \xF0\x9E\xB4\x9A + \xF0\x9E\xB4\x9B + \xF0\x9E\xB4\x9C + \xF0\x9E\xB4\x9D + \xF0\x9E\xB4\x9E + \xF0\x9E\xB4\x9F + \xF0\x9E\xB4\xA0 + \xF0\x9E\xB4\xA1 + \xF0\x9E\xB4\xA2 + \xF0\x9E\xB4\xA3 + \xF0\x9E\xB4\xA4 + \xF0\x9E\xB4\xA5 + \xF0\x9E\xB4\xA6 + \xF0\x9E\xB4\xA7 + \xF0\x9E\xB4\xA8 + \xF0\x9E\xB4\xA9 + \xF0\x9E\xB4\xAA + \xF0\x9E\xB4\xAB + \xF0\x9E\xB4\xAC + \xF0\x9E\xB4\xAD + \xF0\x9E\xB4\xAE + \xF0\x9E\xB4\xAF + \xF0\x9E\xB4\xB0 + \xF0\x9E\xB4\xB1 + \xF0\x9E\xB4\xB2 + \xF0\x9E\xB4\xB3 + \xF0\x9E\xB4\xB4 + \xF0\x9E\xB4\xB5 + \xF0\x9E\xB4\xB6 + \xF0\x9E\xB4\xB7 + \xF0\x9E\xB4\xB8 + \xF0\x9E\xB4\xB9 + \xF0\x9E\xB4\xBA + \xF0\x9E\xB4\xBB + \xF0\x9E\xB4\xBC + \xF0\x9E\xB4\xBD \xF0\x9E\xB8\x80 \xF0\x9E\xB8\x81 \xF0\x9E\xB8\x82 @@ -81066,6 +81474,7 @@ CHARMAP \xF0\x9F\x85\xA9 \xF0\x9F\x85\xAA \xF0\x9F\x85\xAB + \xF0\x9F\x85\xAC \xF0\x9F\x85\xB0 \xF0\x9F\x85\xB1 \xF0\x9F\x85\xB2 @@ -82198,6 +82607,7 @@ CHARMAP \xF0\x9F\x9B\x92 \xF0\x9F\x9B\x93 \xF0\x9F\x9B\x94 + \xF0\x9F\x9B\x95 \xF0\x9F\x9B\xA0 \xF0\x9F\x9B\xA1 \xF0\x9F\x9B\xA2 @@ -82221,6 +82631,7 @@ CHARMAP \xF0\x9F\x9B\xB7 \xF0\x9F\x9B\xB8 \xF0\x9F\x9B\xB9 + \xF0\x9F\x9B\xBA \xF0\x9F\x9C\x80 \xF0\x9F\x9C\x81 \xF0\x9F\x9C\x82 @@ -82426,6 +82837,18 @@ CHARMAP \xF0\x9F\x9F\x96 \xF0\x9F\x9F\x97 \xF0\x9F\x9F\x98 + \xF0\x9F\x9F\xA0 + \xF0\x9F\x9F\xA1 + \xF0\x9F\x9F\xA2 + \xF0\x9F\x9F\xA3 + \xF0\x9F\x9F\xA4 + \xF0\x9F\x9F\xA5 + \xF0\x9F\x9F\xA6 + \xF0\x9F\x9F\xA7 + \xF0\x9F\x9F\xA8 + \xF0\x9F\x9F\xA9 + \xF0\x9F\x9F\xAA + \xF0\x9F\x9F\xAB \xF0\x9F\xA0\x80 \xF0\x9F\xA0\x81 \xF0\x9F\xA0\x82 @@ -82586,6 +83009,9 @@ CHARMAP \xF0\x9F\xA4\x89 \xF0\x9F\xA4\x8A \xF0\x9F\xA4\x8B + \xF0\x9F\xA4\x8D + \xF0\x9F\xA4\x8E + \xF0\x9F\xA4\x8F \xF0\x9F\xA4\x90 \xF0\x9F\xA4\x91 \xF0\x9F\xA4\x92 @@ -82633,6 +83059,7 @@ CHARMAP \xF0\x9F\xA4\xBC \xF0\x9F\xA4\xBD \xF0\x9F\xA4\xBE + \xF0\x9F\xA4\xBF \xF0\x9F\xA5\x80 \xF0\x9F\xA5\x81 \xF0\x9F\xA5\x82 @@ -82682,11 +83109,13 @@ CHARMAP \xF0\x9F\xA5\xAE \xF0\x9F\xA5\xAF \xF0\x9F\xA5\xB0 + \xF0\x9F\xA5\xB1 \xF0\x9F\xA5\xB3 \xF0\x9F\xA5\xB4 \xF0\x9F\xA5\xB5 \xF0\x9F\xA5\xB6 \xF0\x9F\xA5\xBA + \xF0\x9F\xA5\xBB \xF0\x9F\xA5\xBC \xF0\x9F\xA5\xBD \xF0\x9F\xA5\xBE @@ -82726,6 +83155,14 @@ CHARMAP \xF0\x9F\xA6\xA0 \xF0\x9F\xA6\xA1 \xF0\x9F\xA6\xA2 + \xF0\x9F\xA6\xA5 + \xF0\x9F\xA6\xA6 + \xF0\x9F\xA6\xA7 + \xF0\x9F\xA6\xA8 + \xF0\x9F\xA6\xA9 + \xF0\x9F\xA6\xAA + \xF0\x9F\xA6\xAE + \xF0\x9F\xA6\xAF \xF0\x9F\xA6\xB0 \xF0\x9F\xA6\xB1 \xF0\x9F\xA6\xB2 @@ -82736,9 +83173,26 @@ CHARMAP \xF0\x9F\xA6\xB7 \xF0\x9F\xA6\xB8 \xF0\x9F\xA6\xB9 + \xF0\x9F\xA6\xBA + \xF0\x9F\xA6\xBB + \xF0\x9F\xA6\xBC + \xF0\x9F\xA6\xBD + \xF0\x9F\xA6\xBE + \xF0\x9F\xA6\xBF \xF0\x9F\xA7\x80 \xF0\x9F\xA7\x81 \xF0\x9F\xA7\x82 + \xF0\x9F\xA7\x83 + \xF0\x9F\xA7\x84 + \xF0\x9F\xA7\x85 + \xF0\x9F\xA7\x86 + \xF0\x9F\xA7\x87 + \xF0\x9F\xA7\x88 + \xF0\x9F\xA7\x89 + \xF0\x9F\xA7\x8A + \xF0\x9F\xA7\x8D + \xF0\x9F\xA7\x8E + \xF0\x9F\xA7\x8F \xF0\x9F\xA7\x90 \xF0\x9F\xA7\x91 \xF0\x9F\xA7\x92 @@ -82787,6 +83241,90 @@ CHARMAP \xF0\x9F\xA7\xBD \xF0\x9F\xA7\xBE \xF0\x9F\xA7\xBF + \xF0\x9F\xA8\x80 + \xF0\x9F\xA8\x81 + \xF0\x9F\xA8\x82 + \xF0\x9F\xA8\x83 + \xF0\x9F\xA8\x84 + \xF0\x9F\xA8\x85 + \xF0\x9F\xA8\x86 + \xF0\x9F\xA8\x87 + \xF0\x9F\xA8\x88 + \xF0\x9F\xA8\x89 + \xF0\x9F\xA8\x8A + \xF0\x9F\xA8\x8B + \xF0\x9F\xA8\x8C + \xF0\x9F\xA8\x8D + \xF0\x9F\xA8\x8E + \xF0\x9F\xA8\x8F + \xF0\x9F\xA8\x90 + \xF0\x9F\xA8\x91 + \xF0\x9F\xA8\x92 + \xF0\x9F\xA8\x93 + \xF0\x9F\xA8\x94 + \xF0\x9F\xA8\x95 + \xF0\x9F\xA8\x96 + \xF0\x9F\xA8\x97 + \xF0\x9F\xA8\x98 + \xF0\x9F\xA8\x99 + \xF0\x9F\xA8\x9A + \xF0\x9F\xA8\x9B + \xF0\x9F\xA8\x9C + \xF0\x9F\xA8\x9D + \xF0\x9F\xA8\x9E + \xF0\x9F\xA8\x9F + \xF0\x9F\xA8\xA0 + \xF0\x9F\xA8\xA1 + \xF0\x9F\xA8\xA2 + \xF0\x9F\xA8\xA3 + \xF0\x9F\xA8\xA4 + \xF0\x9F\xA8\xA5 + \xF0\x9F\xA8\xA6 + \xF0\x9F\xA8\xA7 + \xF0\x9F\xA8\xA8 + \xF0\x9F\xA8\xA9 + \xF0\x9F\xA8\xAA + \xF0\x9F\xA8\xAB + \xF0\x9F\xA8\xAC + \xF0\x9F\xA8\xAD + \xF0\x9F\xA8\xAE + \xF0\x9F\xA8\xAF + \xF0\x9F\xA8\xB0 + \xF0\x9F\xA8\xB1 + \xF0\x9F\xA8\xB2 + \xF0\x9F\xA8\xB3 + \xF0\x9F\xA8\xB4 + \xF0\x9F\xA8\xB5 + \xF0\x9F\xA8\xB6 + \xF0\x9F\xA8\xB7 + \xF0\x9F\xA8\xB8 + \xF0\x9F\xA8\xB9 + \xF0\x9F\xA8\xBA + \xF0\x9F\xA8\xBB + \xF0\x9F\xA8\xBC + \xF0\x9F\xA8\xBD + \xF0\x9F\xA8\xBE + \xF0\x9F\xA8\xBF + \xF0\x9F\xA9\x80 + \xF0\x9F\xA9\x81 + \xF0\x9F\xA9\x82 + \xF0\x9F\xA9\x83 + \xF0\x9F\xA9\x84 + \xF0\x9F\xA9\x85 + \xF0\x9F\xA9\x86 + \xF0\x9F\xA9\x87 + \xF0\x9F\xA9\x88 + \xF0\x9F\xA9\x89 + \xF0\x9F\xA9\x8A + \xF0\x9F\xA9\x8B + \xF0\x9F\xA9\x8C + \xF0\x9F\xA9\x8D + \xF0\x9F\xA9\x8E + \xF0\x9F\xA9\x8F + \xF0\x9F\xA9\x90 + \xF0\x9F\xA9\x91 + \xF0\x9F\xA9\x92 + \xF0\x9F\xA9\x93 \xF0\x9F\xA9\xA0 \xF0\x9F\xA9\xA1 \xF0\x9F\xA9\xA2 @@ -82801,6 +83339,22 @@ CHARMAP \xF0\x9F\xA9\xAB \xF0\x9F\xA9\xAC \xF0\x9F\xA9\xAD + \xF0\x9F\xA9\xB0 + \xF0\x9F\xA9\xB1 + \xF0\x9F\xA9\xB2 + \xF0\x9F\xA9\xB3 + \xF0\x9F\xA9\xB8 + \xF0\x9F\xA9\xB9 + \xF0\x9F\xA9\xBA + \xF0\x9F\xAA\x80 + \xF0\x9F\xAA\x81 + \xF0\x9F\xAA\x82 + \xF0\x9F\xAA\x90 + \xF0\x9F\xAA\x91 + \xF0\x9F\xAA\x92 + \xF0\x9F\xAA\x93 + \xF0\x9F\xAA\x94 + \xF0\x9F\xAA\x95 \xF0\xA0\x80\x80 \xF0\xA0\x80\x81 \xF0\xA0\x80\x82 diff --git a/tools/tools/locale/patch/patch-UnicodeData.txt b/tools/tools/locale/patch/patch-UnicodeData.txt new file mode 100644 index 000000000000..fe65ebacd16a --- /dev/null +++ b/tools/tools/locale/patch/patch-UnicodeData.txt @@ -0,0 +1,29 @@ +--- UnicodeData.txt.orig 2020-06-29 14:05:49.483379000 +0900 ++++ UnicodeData.txt 2020-06-29 14:12:09.808622000 +0900 +@@ -12138,7 +12138,7 @@ + 33FE;IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE;So;0;L; 0033 0031 65E5;;;;N;;;;; + 33FF;SQUARE GAL;So;0;ON; 0067 0061 006C;;;;N;;;;; + 3400;;Lo;0;L;;;;;N;;;;; +-4DBF;;Lo;0;L;;;;;N;;;;; ++4DB5;;Lo;0;L;;;;;N;;;;; + 4DC0;HEXAGRAM FOR THE CREATIVE HEAVEN;So;0;ON;;;;;N;;;;; + 4DC1;HEXAGRAM FOR THE RECEPTIVE EARTH;So;0;ON;;;;;N;;;;; + 4DC2;HEXAGRAM FOR DIFFICULTY AT THE BEGINNING;So;0;ON;;;;;N;;;;; +@@ -12204,7 +12204,7 @@ + 4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;; + 4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;; + 4E00;;Lo;0;L;;;;;N;;;;; +-9FFC;;Lo;0;L;;;;;N;;;;; ++9FEF;;Lo;0;L;;;;;N;;;;; + A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;; + A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;; + A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;; +@@ -32901,7 +32901,7 @@ + 1FBF8;SEGMENTED DIGIT EIGHT;Nd;0;EN; 0038;8;8;8;N;;;;; + 1FBF9;SEGMENTED DIGIT NINE;Nd;0;EN; 0039;9;9;9;N;;;;; + 20000;;Lo;0;L;;;;;N;;;;; +-2A6DD;;Lo;0;L;;;;;N;;;;; ++2A6D6;;Lo;0;L;;;;;N;;;;; + 2A700;;Lo;0;L;;;;;N;;;;; + 2B734;;Lo;0;L;;;;;N;;;;; + 2B740;;Lo;0;L;;;;;N;;;;; diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl index 3a6f8ac79d18..5f756cc3895a 100755 --- a/tools/tools/locale/tools/cldr2def.pl +++ b/tools/tools/locale/tools/cldr2def.pl @@ -460,6 +460,11 @@ sub transform_ctypes { foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { next if ($enc eq $DEFENCODING); $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; + if ($file eq 'ja_JP') { + # Override $filename for ja_JP because + # its CTYPE is not compatible with UTF-8. + $filename = "$UNIDIR/posix/$file.eucJP.src"; + } if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; diff --git a/tools/tools/locale/tools/convert_map.pl b/tools/tools/locale/tools/convert_map.pl index 88222531d064..8b54ff33381b 100755 --- a/tools/tools/locale/tools/convert_map.pl +++ b/tools/tools/locale/tools/convert_map.pl @@ -87,7 +87,7 @@ sub load_utf8_cm { my $file = shift; - open(UTF8, "$file") || die "open"; + open(UTF8, "$file") || die "$!: open: $file"; while () { next if (/^#/); @@ -158,7 +158,8 @@ $mf = shift(@ARGV); $codeset = shift(@ARGV); my $max_mb; -load_utf8_cm("etc/final-maps/map.UTF-8"); +my $etcdir = (exists $ENV{'ETCDIR'}) ? $ENV{'ETCDIR'} : "etc"; +load_utf8_cm("${etcdir}/final-maps/map.UTF-8"); load_map($mf); diff --git a/tools/tools/locale/tools/finalize b/tools/tools/locale/tools/finalize index f4dfd7d0892f..88dfcad0cb24 100755 --- a/tools/tools/locale/tools/finalize +++ b/tools/tools/locale/tools/finalize @@ -47,15 +47,21 @@ usage () $1 = "numericdef" -o $1 = "timedef" -o $1 = "ctypedef" ] || usage self=$(realpath $0) -base=$(dirname ${self}) -old=${base}/../${1}.draft -new=${base}/../${1} -TEMP=/tmp/${1}.locales -TEMP2=/tmp/${1}.hashes -TEMP3=/tmp/${1}.symlinks -TEMP4=/tmp/${1}.mapped -FULLMAP=/tmp/utf8-map -FULLEXTRACT=/tmp/extracted-names +base=${BASEDIR:-$(dirname ${self})} +: ${ETCDIR:=${base}/../etc} +: ${TOOLSDIR:=${base}} +: ${OUTBASEDIR:=${base}/../${1}} +: ${OLD_DIR:=${OUTBASEDIR}.draft} +: ${NEW_DIR:=${OUTBASEDIR}} +old=${OLD_DIR} +new=${NEW_DIR} +: ${TMPDIR:=/tmp} +TEMP=${TMPDIR}/${1}.locales +TEMP2=${TMPDIR}/${1}.hashes +TEMP3=${TMPDIR}/${1}.symlinks +TEMP4=${TMPDIR}/${1}.mapped +FULLMAP=${TMPDIR}/utf8-map +FULLEXTRACT=${TMPDIR}/extracted-names AWKCMD="/## PLACEHOLDER/ { \ while ( getline line < \"${TEMP}\" ) {print line} } \ /## SYMPAIRS/ { \ @@ -65,6 +71,7 @@ AWKCMD="/## PLACEHOLDER/ { \ !/## / { print \$0 }" # Rename the sources with 3 components name into the POSIX version of the name using @modifier +mkdir -p $old $new cd $old pwd for i in *_*_*.*.src; do @@ -142,13 +149,13 @@ then rm -f ${TEMP2} /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${UNIDIR}/posix/UTF-8.cm \ - > ${base}/../etc/final-maps/map.UTF-8 + > ${ETCDIR}/final-maps/map.UTF-8 /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${UNIDIR}/posix/eucCN.cm \ - > ${base}/../etc/final-maps/map.eucCN + > ${ETCDIR}/final-maps/map.eucCN /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${UNIDIR}/posix/eucCN.cm \ - > ${base}/../etc/final-maps/map.GB2312 + > ${ETCDIR}/final-maps/map.GB2312 # GB18030 and Big5 are pre-generated from CLDR data CHARMAPS="ARMSCII-8 CP1131 CP1251 \ @@ -160,10 +167,11 @@ then for map in ${CHARMAPS} do encoding=${map} - /usr/local/bin/perl ${base}/convert_map.pl \ - ${base}/../etc/charmaps/${map}.TXT ${encoding} \ + env ETCDIR="${ETCDIR}" \ + /usr/local/bin/perl ${TOOLSDIR}/convert_map.pl \ + ${ETCDIR}/charmaps/${map}.TXT ${encoding} \ | /usr/bin/sed -E -e 's/ +/ /g' \ - > ${base}/../etc/final-maps/map.${map} + > ${ETCDIR}/final-maps/map.${map} echo map ${map} converted. done diff --git a/tools/tools/locale/tools/utf8-rollup.pl b/tools/tools/locale/tools/utf8-rollup.pl index da93d2f4398a..b275828d52c9 100755 --- a/tools/tools/locale/tools/utf8-rollup.pl +++ b/tools/tools/locale/tools/utf8-rollup.pl @@ -30,6 +30,7 @@ use strict; use Getopt::Long; +use Encode qw(encode decode); if ($#ARGV != 0) { print "Usage: $0 --unidir=\n"; @@ -52,6 +53,23 @@ generate_footer (); ############################ +sub utf8to32 { + my @kl = split /\\x/, $_[0]; + + shift @kl if ($kl[0] eq ''); + my $k = pack('H2' x scalar @kl, @kl); + my $ux = encode('UTF-32BE', decode('UTF-8', $k)); + my $u = uc(unpack('H*', $ux)); + # Remove BOM + $u =~ s/^0000FEFF//; + # Remove heading bytes of 0 + while ($u =~ m/^0/ and length($u) > 4) { + $u =~ s/^0//; + } + + return $u; +} + sub get_utf8map { my $file = shift; @@ -75,9 +93,10 @@ sub get_utf8map { last if ($l eq "END CHARMAP"); $l =~ /^(<[^\s]+>)\s+(.*)/; - my $k = $2; + my $k = utf8to32($2); # UTF-8 char code my $v = $1; - $k =~ s/\\x//g; # UTF-8 char code + +# print STDERR "register: $k - $v\n"; $utf8map{$k} = $v; } } @@ -143,7 +162,7 @@ sub parse_unidata { foreach my $l (@lines) { my @d = split(/;/, $l, -1); - my $mb = wctomb($d[0]); + my $mb = $d[0]; my $cat; # XXX There are code points present in UnicodeData.txt @@ -180,9 +199,9 @@ sub parse_unidata { # Check if there's upper/lower mapping if ($d[12] ne "") { - $data{'toupper'}{$mb} = wctomb($d[12]); + $data{'toupper'}{$mb} = $d[12]; } elsif ($d[13] ne "") { - $data{'tolower'}{$mb} = wctomb($d[13]); + $data{'tolower'}{$mb} = $d[13]; } } @@ -193,7 +212,7 @@ sub parse_unidata { foreach my $cat (sort keys (%data)) { print FOUT "$cat\t"; $first = 1; - foreach my $mb (sort keys (%{$data{$cat}})) { + foreach my $mb (sort {hex($a) <=> hex($b)} keys (%{$data{$cat}})) { if ($first == 1) { $first = 0; } elsif ($inrange == 1) {