- Update to Unicode CLDR v35 (Unicode 12.0).

- Update tools/tools/locale to add make targets to automatically
  generate locale source files.  With this change, just typing
  "make obj && make -j4" will rebuild them.  Check README for more details.

- Fix issues in ja_JP ctypedef and range specification support
  in utf8-rollup.pl.

- Add a temporary patch for UnicodeData.txt to fix code ranges of
  CJK Ideograph Extension A and Extension B.

- tools/cldr2def.pl:
    Use eucJP for ja_JP ctypedef because eucJP is not compatible with UTF-8.

- tools/convert_map.pl:
    Add a verbose error message.

- tools/utf8-rollup.pl:
    Normalize entries to use Unicode, not UTF-8.

Reviewed by:		bapt
Differential Revision:	https://reviews.freebsd.org/D25503
This commit is contained in:
Hiroki Sato 2020-08-15 07:19:37 +00:00
parent fc4c42c9e3
commit 84fa0ef97d
8 changed files with 821 additions and 114 deletions

View File

@ -7,19 +7,30 @@
#
# Modified by John Marino to suit DragonFly needs
#
.OBJDIR: .
.if !defined(UNIDIR)
.error UNIDIR is not set
.if ${.CURDIR} == ${.OBJDIR}
.error Do make obj first.
.endif
PASSON= UNIDIR="${UNIDIR}"
ETCDIR= ${.CURDIR}/etc
LOCALESRCDIR?= ${DESTDIR}/usr/src/share
TMPDIR?= /tmp
BASEDIR= ${.CURDIR}
ETCDIR= ${BASEDIR}/etc
TOOLSDIR= ${BASEDIR}/tools
PATCHDIR= ${BASEDIR}/patch
UNIDIR= ${.OBJDIR:tA}/unicode
PKGS= openjdk8 \
apache-ant \
p5-XML-Parser \
p5-Tie-IxHash \
p5-Text-Iconv
tools-test:
pkg info -e ${PKGS}
@echo tools ok.
KNOWN= monetdef numericdef msgdef colldef ctypedef # timedef
TYPES?= ${KNOWN}
LOCALE_DESTDIR?= /tmp/generated-locales/
COLLATION_SPECIAL?= \
cs_CZ ISO8859-2 \
@ -44,65 +55,79 @@ COLLATION_SPECIAL?= \
.for area enc in ${COLLATION_SPECIAL}
COLLATIONS_SPECIAL_ENV+= ${area}.${enc}
.endfor
PASSON+= COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}"
SETENV= env -i \
PATH="${PATH}" \
TMPDIR="${TMPDIR}" \
COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" \
UNIDIR="${UNIDIR}" \
BASEDIR="${BASEDIR}" \
TOOLSDIR="${TOOLSDIR}" \
ETCDIR="${ETCDIR}"
all:
.for t in ${TYPES}
. if ${KNOWN:M${t}}
test -d ${t} || mkdir ${t}
make build-${t}
. endif
.endfor
all: posix build afterbuild
.ORDER: posix build afterbuild
afterbuild: build
@echo ""
@find . -name *failed
.for t in ${TYPES}
. if ${KNOWN:M${t}}
build: build-${t}
.ORDER: build-${t} afterbuild
. endif
.endfor
diff:
.for t in ${TYPES}
. if ${KNOWN:M${t}}
diff: diff-${t}
diff-${t}:
-/usr/bin/diff -ruN -x Makefile -x Makefile.depend \
${LOCALESRCDIR}/${t} ${t}
. endif
.endfor
install:
.for t in ${TYPES}
. if ${KNOWN:M${t}}
install: install-${t}
install-${t}:
. if ${KNOWN:M${t}}
rm -rf ${.CURDIR}/${t}.draft
rm -f ${.CURDIR}/../../../share/${t}/Makefile
rm -f ${.CURDIR}/../../../share/${t}/*.src
mv ${.CURDIR}/${t}/* ${.CURDIR}/../../../share/${t}/
cd ${LOCALESRCDIR}/${t} && \
rm -f Makefile *.src && \
install -c ${t}/* ${LOCALESRCDIR}/${t}
. endif
.endfor
post-install:
.for t in ${TYPES}
. if ${KNOWN:M${t}}
(cd ${.CURDIR}/../../../share/${t} && \
make && make install && make clean)
cd ${LOCALSRCDIR}/${t} && \
make && make install && make clean
. endif
.endfor
.for t in ${TYPES}
gen-${t}:
mkdir -p ${t} ${t}.draft
perl -I tools tools/cldr2def.pl \
--unidir=$$(realpath ${UNIDIR}) \
--etc=$$(realpath ${ETCDIR}) \
CLEANDIRS+= ${t} ${t}.draft
${t}:
mkdir -p ${t} ${t}.draft && \
perl -I ${TOOLSDIR} ${TOOLSDIR}/cldr2def.pl \
--unidir=${UNIDIR:tA} \
--etc=${ETCDIR:tA} \
--type=${t}
build-${t}: gen-${t}
env ${PASSON} tools/finalize ${t}
build-${t}: ${t}
${SETENV} OUTBASEDIR="${.OBJDIR}/${t}" ${TOOLSDIR}/finalize ${t}
.endfor
gen-ctypedef: ctype-rollup
static-colldef: gen-colldef
static-colldef: colldef
build-colldef: static-colldef
static-colldef:
.for area enc in ${COLLATION_SPECIAL}
awk -f tools/extract-colldef.awk ${UNIDIR}/posix/${area}.${enc}.src > \
colldef.draft/${area}.${enc}.src
.endfor
ctype-rollup:
perl -I tools tools/utf8-rollup.pl --unidir=$$(realpath ${UNIDIR})
clean:
.for t in ${TYPES}
rm -rf ${t} ${t}.draft
colldef.draft/${area}.${enc}.src: posix/${area}.${enc}.src
awk -f ${TOOLSDIR}/extract-colldef.awk \
${.ALLSRC} > ${.TARGET} || (rm -f ${.TARGET} && false)
.endfor
BASE_LOCALES_OF_INTEREST?= \
@ -145,31 +170,71 @@ ENCODINGS= Big5 \
US-ASCII \
UTF-8
POSIX: posixsrc posixcol posixcm
.if !exists(${UNIDIR}/tools/java/cldr.jar)
.error check README about building cldr.jar
# CLDR files
CLDRFILES_CORE= https://unicode.org/Public/cldr/35/core.zip
CLDRFILES_KEY= https://unicode.org/Public/cldr/35/keyboards.zip
CLDRFILES_TOOLS=https://unicode.org/Public/cldr/35/tools.zip
CLDRFILES_UCD= http://www.unicode.org/Public/zipped/latest/UCD.zip
# fetch and extract targets
${UNIDIR}:
mkdir -p ${UNIDIR}
.for N in CORE KEY TOOLS UCD
${CLDRFILES_${N}:T}:
fetch ${CLDRFILES_${N}}
fetch: ${CLDRFILES_${N}:T}
extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UNIDIR}
cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T}
extract: extract-${CLDRFILES_${N}:T}
.endfor
patch::
.if exists(${PATCHDIR})
cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch
.endif
.if !exists(${UNIDIR}/tools/java/cldr.jar)
.ORDER: extract patch
build-tools: extract patch tools-test ${UNIDIR}
cd ${UNIDIR}/tools/java && ${SETENV} ant all jar
.else
build-tools:
@echo cldr.jar is ready.
.endif
JAVA_CLDR= java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar
posix: posixcm post-posixcm posixsrc posixcol
.ORDER: posixcm post-posixcm posixsrc posixcol
${UNIDIR}/posix:
ln -s -f ../posix ${.TARGET}
clean-posix:
rm -rf posix ${UNIDIR}/posix
post-posixcm: ${UNIDIR}/posix
perl -I ${TOOLSDIR} ${TOOLSDIR}/utf8-rollup.pl \
--unidir=${UNIDIR}
.for enc in ${ENCODINGS}
posixcm: build-tools posix/${enc}.cm
.ORDER: build-tools posix/${enc}.cm
posix/${enc}.cm:
mkdir -p posix && \
${JAVA_CLDR} org.unicode.cldr.posix.GenerateCharmap \
-d posix -c ${enc}
.endfor
.for area in ${BASE_LOCALES_OF_INTEREST}
posixsrc: ${UNIDIR}/posix/${area}.UTF-8.src
${UNIDIR}/posix/${area}.UTF-8.src:
java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \
org.unicode.cldr.posix.GeneratePOSIX \
-d ${UNIDIR}/posix -m ${area} -c UTF-8
posixsrc: build-tools posix/${area}.UTF-8.src
.ORDER: build-tools posix/${area}.UTF-8.src
posix/${area}.UTF-8.src:
mkdir -p posix && \
${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \
-d posix -m ${area} -c UTF-8
.endfor
.for area encoding in ${COLLATION_SPECIAL}
posixcol: ${UNIDIR}/posix/${area}.${encoding}.src
${UNIDIR}/posix/${area}.${encoding}.src:
java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \
org.unicode.cldr.posix.GeneratePOSIX \
-d ${UNIDIR}/posix -m ${area} -c ${encoding}
.endfor
.for enc in ${ENCODINGS}
posixcm: ${UNIDIR}/posix/${enc}.cm
${UNIDIR}/posix/${enc}.cm:
java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \
org.unicode.cldr.posix.GenerateCharmap \
-d ${UNIDIR}/posix -c ${enc}
posixcol: build-tools posix/${area}.${encoding}.src
.ORDER: build-tools posix/${area}.${encoding}.src
posix/${area}.${encoding}.src:
mkdir -p posix && \
${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \
-d posix -m ${area} -c ${encoding}
.endfor
clean-POSIX:
rm -f ${UNIDIR}/posix/*
.include <bsd.obj.mk>

View File

@ -1,32 +1,58 @@
# $FreeBSD$
To generate the locales:
Files in this directory are used to generate locale source files
from files in CLDR (Unicode Common Locale Data Repository).
Tools needed:
java (openjdk >= 8)
perl
converters/p5-Text-Iconv
devel/apache-ant
devel/p5-Tie-IxHash
textproc/p5-XML-Parser
To generate the files, do the following:
1. Fetch CLDR data from: http://unicode.org/Public/cldr/. You need all of the
core.zip, keyboards.zip, and tools.zip.
2. Fetch unidata (UCD.zip) from http://www.unicode.org/Public/zipped/latest.
3. Extract:
mkdir -p ~/unicode
cd ~/unicode
unzip ~/core.zip
unzip ~/keyboards.zip
unzip ~/tools.zip
unzip ~/UCD.zip
4. Export variable:
UNIDIR=~/unicode; export UNIDIR
5. Build the CLDR tools:
cd $UNIDIR/tools/java
ant jar
6. Build POSIX data files from CLDR data:
make POSIX
7. Build and install new locale data:
make
cd /usr/src/tools/tools/locale
make obj (mandatory)
make -j16 (-jN recommended)
make diff (check if the changes are reasonable)
make install
"make" downloads the necessary files, build them, and install the
results into /usr/src/share/* as source files for locales.
More details are as follows:
Variables:
LOCALESRCDIR
Destination path for the generated locale files.
Default: $DESTDIR/usr/src/share.
TMPDIR
Temporary directory.
Default: /tmp
Targets:
make obj
Create a temporary directory for building.
make clean
Clean up the obj directories.
make cleandir
Remove the obj directories completely.
make tools-test
Check if necessary tools are installed or not.
If something is missing, install them.
make fetch
Download necessary files from CLDR.
make build-tools
Build a tool to generate locale source files.
make posix
Build POSIX locale source files.
make build
Build locale files.
make diff
Run diff(1) the build results against $LOCALESRCDIR.
make install
Install the build results into $LOCALESRCDIR.
[EOF]

View File

@ -2969,6 +2969,7 @@ CHARMAP
<TELUGU_DIGIT_SEVEN> \xE0\xB1\xAD
<TELUGU_DIGIT_EIGHT> \xE0\xB1\xAE
<TELUGU_DIGIT_NINE> \xE0\xB1\xAF
<TELUGU_SIGN_SIDDHAM> \xE0\xB1\xB7
<TELUGU_FRACTION_DIGIT_ZERO_FOR_ODD_POWERS_OF_FOUR> \xE0\xB1\xB8
<TELUGU_FRACTION_DIGIT_ONE_FOR_ODD_POWERS_OF_FOUR> \xE0\xB1\xB9
<TELUGU_FRACTION_DIGIT_TWO_FOR_ODD_POWERS_OF_FOUR> \xE0\xB1\xBA
@ -3363,14 +3364,24 @@ CHARMAP
<LAO_LETTER_KO> \xE0\xBA\x81
<LAO_LETTER_KHO_SUNG> \xE0\xBA\x82
<LAO_LETTER_KHO_TAM> \xE0\xBA\x84
<LAO_LETTER_PALI_GHA> \xE0\xBA\x86
<LAO_LETTER_NGO> \xE0\xBA\x87
<LAO_LETTER_CO> \xE0\xBA\x88
<LAO_LETTER_PALI_CHA> \xE0\xBA\x89
<LAO_LETTER_SO_TAM> \xE0\xBA\x8A
<LAO_LETTER_PALI_JHA> \xE0\xBA\x8C
<LAO_LETTER_NYO> \xE0\xBA\x8D
<LAO_LETTER_PALI_NYA> \xE0\xBA\x8E
<LAO_LETTER_PALI_TTA> \xE0\xBA\x8F
<LAO_LETTER_PALI_TTHA> \xE0\xBA\x90
<LAO_LETTER_PALI_DDA> \xE0\xBA\x91
<LAO_LETTER_PALI_DDHA> \xE0\xBA\x92
<LAO_LETTER_PALI_NNA> \xE0\xBA\x93
<LAO_LETTER_DO> \xE0\xBA\x94
<LAO_LETTER_TO> \xE0\xBA\x95
<LAO_LETTER_THO_SUNG> \xE0\xBA\x96
<LAO_LETTER_THO_TAM> \xE0\xBA\x97
<LAO_LETTER_PALI_DHA> \xE0\xBA\x98
<LAO_LETTER_NO> \xE0\xBA\x99
<LAO_LETTER_BO> \xE0\xBA\x9A
<LAO_LETTER_PO> \xE0\xBA\x9B
@ -3378,13 +3389,17 @@ CHARMAP
<LAO_LETTER_FO_TAM> \xE0\xBA\x9D
<LAO_LETTER_PHO_TAM> \xE0\xBA\x9E
<LAO_LETTER_FO_SUNG> \xE0\xBA\x9F
<LAO_LETTER_PALI_BHA> \xE0\xBA\xA0
<LAO_LETTER_MO> \xE0\xBA\xA1
<LAO_LETTER_YO> \xE0\xBA\xA2
<LAO_LETTER_LO_LING> \xE0\xBA\xA3
<LAO_LETTER_LO_LOOT> \xE0\xBA\xA5
<LAO_LETTER_WO> \xE0\xBA\xA7
<LAO_LETTER_SANSKRIT_SHA> \xE0\xBA\xA8
<LAO_LETTER_SANSKRIT_SSA> \xE0\xBA\xA9
<LAO_LETTER_SO_SUNG> \xE0\xBA\xAA
<LAO_LETTER_HO_SUNG> \xE0\xBA\xAB
<LAO_LETTER_PALI_LLA> \xE0\xBA\xAC
<LAO_LETTER_O> \xE0\xBA\xAD
<LAO_LETTER_HO_TAM> \xE0\xBA\xAE
<LAO_ELLIPSIS> \xE0\xBA\xAF
@ -3398,6 +3413,7 @@ CHARMAP
<LAO_VOWEL_SIGN_YY> \xE0\xBA\xB7
<LAO_VOWEL_SIGN_U> \xE0\xBA\xB8
<LAO_VOWEL_SIGN_UU> \xE0\xBA\xB9
<LAO_SIGN_PALI_VIRAMA> \xE0\xBA\xBA
<LAO_VOWEL_SIGN_MAI_KON> \xE0\xBA\xBB
<LAO_SEMIVOWEL_SIGN_LO> \xE0\xBA\xBC
<LAO_SEMIVOWEL_SIGN_NYO> \xE0\xBA\xBD
@ -6656,6 +6672,7 @@ CHARMAP
<VEDIC_SIGN_ATIKRAMA> \xE1\xB3\xB7
<VEDIC_TONE_RING_ABOVE> \xE1\xB3\xB8
<VEDIC_TONE_DOUBLE_RING_ABOVE> \xE1\xB3\xB9
<VEDIC_SIGN_DOUBLE_ANUSVARA_ANTARGOMUKHA> \xE1\xB3\xBA
<LATIN_LETTER_SMALL_CAPITAL_A> \xE1\xB4\x80
<LATIN_LETTER_SMALL_CAPITAL_AE> \xE1\xB4\x81
<LATIN_SMALL_LETTER_TURNED_AE> \xE1\xB4\x82
@ -10325,6 +10342,7 @@ CHARMAP
<BLACK_MEDIUM_DOWN-POINTING_TRIANGLE_CENTRED> \xE2\xAF\x86
<BLACK_MEDIUM_LEFT-POINTING_TRIANGLE_CENTRED> \xE2\xAF\x87
<BLACK_MEDIUM_RIGHT-POINTING_TRIANGLE_CENTRED> \xE2\xAF\x88
<NEPTUNE_FORM_TWO> \xE2\xAF\x89
<TOP_HALF_BLACK_CIRCLE> \xE2\xAF\x8A
<BOTTOM_HALF_BLACK_CIRCLE> \xE2\xAF\x8B
<LIGHT_FOUR_POINTED_BLACK_CUSP> \xE2\xAF\x8C
@ -10378,6 +10396,7 @@ CHARMAP
<DOUBLED_SYMBOL> \xE2\xAF\xBC
<PASSED_SYMBOL> \xE2\xAF\xBD
<REVERSED_RIGHT_ANGLE> \xE2\xAF\xBE
<HELLSCHREIBER_PAUSE_SYMBOL> \xE2\xAF\xBF
<GLAGOLITIC_CAPITAL_LETTER_AZU> \xE2\xB0\x80
<GLAGOLITIC_CAPITAL_LETTER_BUKY> \xE2\xB0\x81
<GLAGOLITIC_CAPITAL_LETTER_VEDE> \xE2\xB0\x82
@ -10916,6 +10935,7 @@ CHARMAP
<MEDIEVAL_COMMA> \xE2\xB9\x8C
<PARAGRAPHUS_MARK> \xE2\xB9\x8D
<PUNCTUS_ELEVATUS_MARK> \xE2\xB9\x8E
<CORNISH_VERSE_DIVIDER> \xE2\xB9\x8F
<CJK_RADICAL_REPEAT> \xE2\xBA\x80
<CJK_RADICAL_CLIFF> \xE2\xBA\x81
<CJK_RADICAL_SECOND_ONE> \xE2\xBA\x82
@ -41812,6 +41832,17 @@ CHARMAP
<LATIN_SMALL_LETTER_OMEGA> \xEA\x9E\xB7
<LATIN_CAPITAL_LETTER_U_WITH_STROKE> \xEA\x9E\xB8
<LATIN_SMALL_LETTER_U_WITH_STROKE> \xEA\x9E\xB9
<LATIN_CAPITAL_LETTER_GLOTTAL_A> \xEA\x9E\xBA
<LATIN_SMALL_LETTER_GLOTTAL_A> \xEA\x9E\xBB
<LATIN_CAPITAL_LETTER_GLOTTAL_I> \xEA\x9E\xBC
<LATIN_SMALL_LETTER_GLOTTAL_I> \xEA\x9E\xBD
<LATIN_CAPITAL_LETTER_GLOTTAL_U> \xEA\x9E\xBE
<LATIN_SMALL_LETTER_GLOTTAL_U> \xEA\x9E\xBF
<LATIN_CAPITAL_LETTER_ANGLICANA_W> \xEA\x9F\x82
<LATIN_SMALL_LETTER_ANGLICANA_W> \xEA\x9F\x83
<LATIN_CAPITAL_LETTER_C_WITH_PALATAL_HOOK> \xEA\x9F\x84
<LATIN_CAPITAL_LETTER_S_WITH_HOOK> \xEA\x9F\x85
<LATIN_CAPITAL_LETTER_Z_WITH_PALATAL_HOOK> \xEA\x9F\x86
<LATIN_EPIGRAPHIC_LETTER_SIDEWAYS_I> \xEA\x9F\xB7
<MODIFIER_LETTER_CAPITAL_H_WITH_STROKE> \xEA\x9F\xB8
<MODIFIER_LETTER_SMALL_LIGATURE_OE> \xEA\x9F\xB9
@ -42577,6 +42608,8 @@ CHARMAP
<LATIN_SMALL_LETTER_UO> \xEA\xAD\xA3
<LATIN_SMALL_LETTER_INVERTED_ALPHA> \xEA\xAD\xA4
<GREEK_LETTER_SMALL_CAPITAL_OMEGA> \xEA\xAD\xA5
<LATIN_SMALL_LETTER_DZ_DIGRAPH_WITH_RETROFLEX_HOOK> \xEA\xAD\xA6
<LATIN_SMALL_LETTER_TS_DIGRAPH_WITH_RETROFLEX_HOOK> \xEA\xAD\xA7
<CHEROKEE_SMALL_LETTER_A> \xEA\xAD\xB0
<CHEROKEE_SMALL_LETTER_E> \xEA\xAD\xB1
<CHEROKEE_SMALL_LETTER_I> \xEA\xAD\xB2
@ -64381,6 +64414,29 @@ CHARMAP
<SOGDIAN_PUNCTUATION_CIRCLE_WITH_DOT> \xF0\x90\xBD\x97
<SOGDIAN_PUNCTUATION_TWO_CIRCLES_WITH_DOTS> \xF0\x90\xBD\x98
<SOGDIAN_PUNCTUATION_HALF_CIRCLE_WITH_DOT> \xF0\x90\xBD\x99
<ELYMAIC_LETTER_ALEPH> \xF0\x90\xBF\xA0
<ELYMAIC_LETTER_BETH> \xF0\x90\xBF\xA1
<ELYMAIC_LETTER_GIMEL> \xF0\x90\xBF\xA2
<ELYMAIC_LETTER_DALETH> \xF0\x90\xBF\xA3
<ELYMAIC_LETTER_HE> \xF0\x90\xBF\xA4
<ELYMAIC_LETTER_WAW> \xF0\x90\xBF\xA5
<ELYMAIC_LETTER_ZAYIN> \xF0\x90\xBF\xA6
<ELYMAIC_LETTER_HETH> \xF0\x90\xBF\xA7
<ELYMAIC_LETTER_TETH> \xF0\x90\xBF\xA8
<ELYMAIC_LETTER_YODH> \xF0\x90\xBF\xA9
<ELYMAIC_LETTER_KAPH> \xF0\x90\xBF\xAA
<ELYMAIC_LETTER_LAMEDH> \xF0\x90\xBF\xAB
<ELYMAIC_LETTER_MEM> \xF0\x90\xBF\xAC
<ELYMAIC_LETTER_NUN> \xF0\x90\xBF\xAD
<ELYMAIC_LETTER_SAMEKH> \xF0\x90\xBF\xAE
<ELYMAIC_LETTER_AYIN> \xF0\x90\xBF\xAF
<ELYMAIC_LETTER_PE> \xF0\x90\xBF\xB0
<ELYMAIC_LETTER_SADHE> \xF0\x90\xBF\xB1
<ELYMAIC_LETTER_QOPH> \xF0\x90\xBF\xB2
<ELYMAIC_LETTER_RESH> \xF0\x90\xBF\xB3
<ELYMAIC_LETTER_SHIN> \xF0\x90\xBF\xB4
<ELYMAIC_LETTER_TAW> \xF0\x90\xBF\xB5
<ELYMAIC_LIGATURE_ZAYIN-YODH> \xF0\x90\xBF\xB6
<BRAHMI_SIGN_CANDRABINDU> \xF0\x91\x80\x80
<BRAHMI_SIGN_ANUSVARA> \xF0\x91\x80\x81
<BRAHMI_SIGN_VISARGA> \xF0\x91\x80\x82
@ -65163,6 +65219,7 @@ CHARMAP
<NEWA_PLACEHOLDER_MARK> \xF0\x91\x91\x9B
<NEWA_INSERTION_SIGN> \xF0\x91\x91\x9D
<NEWA_SANDHI_MARK> \xF0\x91\x91\x9E
<NEWA_LETTER_VEDIC_ANUSVARA> \xF0\x91\x91\x9F
<TIRHUTA_ANJI> \xF0\x91\x92\x80
<TIRHUTA_LETTER_A> \xF0\x91\x92\x81
<TIRHUTA_LETTER_AA> \xF0\x91\x92\x82
@ -65485,6 +65542,7 @@ CHARMAP
<TAKRI_VOWEL_SIGN_AU> \xF0\x91\x9A\xB5
<TAKRI_SIGN_VIRAMA> \xF0\x91\x9A\xB6
<TAKRI_SIGN_NUKTA> \xF0\x91\x9A\xB7
<TAKRI_LETTER_ARCHAIC_KHA> \xF0\x91\x9A\xB8
<TAKRI_DIGIT_ZERO> \xF0\x91\x9B\x80
<TAKRI_DIGIT_ONE> \xF0\x91\x9B\x81
<TAKRI_DIGIT_TWO> \xF0\x91\x9B\x82
@ -65697,6 +65755,71 @@ CHARMAP
<WARANG_CITI_NUMBER_EIGHTY> \xF0\x91\xA3\xB1
<WARANG_CITI_NUMBER_NINETY> \xF0\x91\xA3\xB2
<WARANG_CITI_OM> \xF0\x91\xA3\xBF
<NANDINAGARI_LETTER_A> \xF0\x91\xA6\xA0
<NANDINAGARI_LETTER_AA> \xF0\x91\xA6\xA1
<NANDINAGARI_LETTER_I> \xF0\x91\xA6\xA2
<NANDINAGARI_LETTER_II> \xF0\x91\xA6\xA3
<NANDINAGARI_LETTER_U> \xF0\x91\xA6\xA4
<NANDINAGARI_LETTER_UU> \xF0\x91\xA6\xA5
<NANDINAGARI_LETTER_VOCALIC_R> \xF0\x91\xA6\xA6
<NANDINAGARI_LETTER_VOCALIC_RR> \xF0\x91\xA6\xA7
<NANDINAGARI_LETTER_E> \xF0\x91\xA6\xAA
<NANDINAGARI_LETTER_AI> \xF0\x91\xA6\xAB
<NANDINAGARI_LETTER_O> \xF0\x91\xA6\xAC
<NANDINAGARI_LETTER_AU> \xF0\x91\xA6\xAD
<NANDINAGARI_LETTER_KA> \xF0\x91\xA6\xAE
<NANDINAGARI_LETTER_KHA> \xF0\x91\xA6\xAF
<NANDINAGARI_LETTER_GA> \xF0\x91\xA6\xB0
<NANDINAGARI_LETTER_GHA> \xF0\x91\xA6\xB1
<NANDINAGARI_LETTER_NGA> \xF0\x91\xA6\xB2
<NANDINAGARI_LETTER_CA> \xF0\x91\xA6\xB3
<NANDINAGARI_LETTER_CHA> \xF0\x91\xA6\xB4
<NANDINAGARI_LETTER_JA> \xF0\x91\xA6\xB5
<NANDINAGARI_LETTER_JHA> \xF0\x91\xA6\xB6
<NANDINAGARI_LETTER_NYA> \xF0\x91\xA6\xB7
<NANDINAGARI_LETTER_TTA> \xF0\x91\xA6\xB8
<NANDINAGARI_LETTER_TTHA> \xF0\x91\xA6\xB9
<NANDINAGARI_LETTER_DDA> \xF0\x91\xA6\xBA
<NANDINAGARI_LETTER_DDHA> \xF0\x91\xA6\xBB
<NANDINAGARI_LETTER_NNA> \xF0\x91\xA6\xBC
<NANDINAGARI_LETTER_TA> \xF0\x91\xA6\xBD
<NANDINAGARI_LETTER_THA> \xF0\x91\xA6\xBE
<NANDINAGARI_LETTER_DA> \xF0\x91\xA6\xBF
<NANDINAGARI_LETTER_DHA> \xF0\x91\xA7\x80
<NANDINAGARI_LETTER_NA> \xF0\x91\xA7\x81
<NANDINAGARI_LETTER_PA> \xF0\x91\xA7\x82
<NANDINAGARI_LETTER_PHA> \xF0\x91\xA7\x83
<NANDINAGARI_LETTER_BA> \xF0\x91\xA7\x84
<NANDINAGARI_LETTER_BHA> \xF0\x91\xA7\x85
<NANDINAGARI_LETTER_MA> \xF0\x91\xA7\x86
<NANDINAGARI_LETTER_YA> \xF0\x91\xA7\x87
<NANDINAGARI_LETTER_RA> \xF0\x91\xA7\x88
<NANDINAGARI_LETTER_LA> \xF0\x91\xA7\x89
<NANDINAGARI_LETTER_VA> \xF0\x91\xA7\x8A
<NANDINAGARI_LETTER_SHA> \xF0\x91\xA7\x8B
<NANDINAGARI_LETTER_SSA> \xF0\x91\xA7\x8C
<NANDINAGARI_LETTER_SA> \xF0\x91\xA7\x8D
<NANDINAGARI_LETTER_HA> \xF0\x91\xA7\x8E
<NANDINAGARI_LETTER_LLA> \xF0\x91\xA7\x8F
<NANDINAGARI_LETTER_RRA> \xF0\x91\xA7\x90
<NANDINAGARI_VOWEL_SIGN_AA> \xF0\x91\xA7\x91
<NANDINAGARI_VOWEL_SIGN_I> \xF0\x91\xA7\x92
<NANDINAGARI_VOWEL_SIGN_II> \xF0\x91\xA7\x93
<NANDINAGARI_VOWEL_SIGN_U> \xF0\x91\xA7\x94
<NANDINAGARI_VOWEL_SIGN_UU> \xF0\x91\xA7\x95
<NANDINAGARI_VOWEL_SIGN_VOCALIC_R> \xF0\x91\xA7\x96
<NANDINAGARI_VOWEL_SIGN_VOCALIC_RR> \xF0\x91\xA7\x97
<NANDINAGARI_VOWEL_SIGN_E> \xF0\x91\xA7\x9A
<NANDINAGARI_VOWEL_SIGN_AI> \xF0\x91\xA7\x9B
<NANDINAGARI_VOWEL_SIGN_O> \xF0\x91\xA7\x9C
<NANDINAGARI_VOWEL_SIGN_AU> \xF0\x91\xA7\x9D
<NANDINAGARI_SIGN_ANUSVARA> \xF0\x91\xA7\x9E
<NANDINAGARI_SIGN_VISARGA> \xF0\x91\xA7\x9F
<NANDINAGARI_SIGN_VIRAMA> \xF0\x91\xA7\xA0
<NANDINAGARI_SIGN_AVAGRAHA> \xF0\x91\xA7\xA1
<NANDINAGARI_SIGN_SIDDHAM> \xF0\x91\xA7\xA2
<NANDINAGARI_HEADSTROKE> \xF0\x91\xA7\xA3
<NANDINAGARI_VOWEL_SIGN_PRISHTHAMATRA_E> \xF0\x91\xA7\xA4
<ZANABAZAR_SQUARE_LETTER_A> \xF0\x91\xA8\x80
<ZANABAZAR_SQUARE_VOWEL_SIGN_I> \xF0\x91\xA8\x81
<ZANABAZAR_SQUARE_VOWEL_SIGN_UE> \xF0\x91\xA8\x82
@ -65821,6 +65944,8 @@ CHARMAP
<SOYOMBO_LETTER_SA> \xF0\x91\xAA\x81
<SOYOMBO_LETTER_HA> \xF0\x91\xAA\x82
<SOYOMBO_LETTER_KSSA> \xF0\x91\xAA\x83
<SOYOMBO_SIGN_JIHVAMULIYA> \xF0\x91\xAA\x84
<SOYOMBO_SIGN_UPADHMANIYA> \xF0\x91\xAA\x85
<SOYOMBO_CLUSTER-INITIAL_LETTER_RA> \xF0\x91\xAA\x86
<SOYOMBO_CLUSTER-INITIAL_LETTER_LA> \xF0\x91\xAA\x87
<SOYOMBO_CLUSTER-INITIAL_LETTER_SHA> \xF0\x91\xAA\x88
@ -66235,6 +66360,57 @@ CHARMAP
<MAKASAR_VOWEL_SIGN_O> \xF0\x91\xBB\xB6
<MAKASAR_PASSIMBANG> \xF0\x91\xBB\xB7
<MAKASAR_END_OF_SECTION> \xF0\x91\xBB\xB8
<TAMIL_FRACTION_ONE_THREE-HUNDRED-AND-TWENTIETH> \xF0\x91\xBF\x80
<TAMIL_FRACTION_ONE_ONE-HUNDRED-AND-SIXTIETH> \xF0\x91\xBF\x81
<TAMIL_FRACTION_ONE_EIGHTIETH> \xF0\x91\xBF\x82
<TAMIL_FRACTION_ONE_SIXTY-FOURTH> \xF0\x91\xBF\x83
<TAMIL_FRACTION_ONE_FORTIETH> \xF0\x91\xBF\x84
<TAMIL_FRACTION_ONE_THIRTY-SECOND> \xF0\x91\xBF\x85
<TAMIL_FRACTION_THREE_EIGHTIETHS> \xF0\x91\xBF\x86
<TAMIL_FRACTION_THREE_SIXTY-FOURTHS> \xF0\x91\xBF\x87
<TAMIL_FRACTION_ONE_TWENTIETH> \xF0\x91\xBF\x88
<TAMIL_FRACTION_ONE_SIXTEENTH-1> \xF0\x91\xBF\x89
<TAMIL_FRACTION_ONE_SIXTEENTH-2> \xF0\x91\xBF\x8A
<TAMIL_FRACTION_ONE_TENTH> \xF0\x91\xBF\x8B
<TAMIL_FRACTION_ONE_EIGHTH> \xF0\x91\xBF\x8C
<TAMIL_FRACTION_THREE_TWENTIETHS> \xF0\x91\xBF\x8D
<TAMIL_FRACTION_THREE_SIXTEENTHS> \xF0\x91\xBF\x8E
<TAMIL_FRACTION_ONE_FIFTH> \xF0\x91\xBF\x8F
<TAMIL_FRACTION_ONE_QUARTER> \xF0\x91\xBF\x90
<TAMIL_FRACTION_ONE_HALF-1> \xF0\x91\xBF\x91
<TAMIL_FRACTION_ONE_HALF-2> \xF0\x91\xBF\x92
<TAMIL_FRACTION_THREE_QUARTERS> \xF0\x91\xBF\x93
<TAMIL_FRACTION_DOWNSCALING_FACTOR_KIIZH> \xF0\x91\xBF\x94
<TAMIL_SIGN_NEL> \xF0\x91\xBF\x95
<TAMIL_SIGN_CEVITU> \xF0\x91\xBF\x96
<TAMIL_SIGN_AAZHAAKKU> \xF0\x91\xBF\x97
<TAMIL_SIGN_UZHAKKU> \xF0\x91\xBF\x98
<TAMIL_SIGN_MUUVUZHAKKU> \xF0\x91\xBF\x99
<TAMIL_SIGN_KURUNI> \xF0\x91\xBF\x9A
<TAMIL_SIGN_PATHAKKU> \xF0\x91\xBF\x9B
<TAMIL_SIGN_MUKKURUNI> \xF0\x91\xBF\x9C
<TAMIL_SIGN_KAACU> \xF0\x91\xBF\x9D
<TAMIL_SIGN_PANAM> \xF0\x91\xBF\x9E
<TAMIL_SIGN_PON> \xF0\x91\xBF\x9F
<TAMIL_SIGN_VARAAKAN> \xF0\x91\xBF\xA0
<TAMIL_SIGN_PAARAM> \xF0\x91\xBF\xA1
<TAMIL_SIGN_KUZHI> \xF0\x91\xBF\xA2
<TAMIL_SIGN_VELI> \xF0\x91\xBF\xA3
<TAMIL_WET_CULTIVATION_SIGN> \xF0\x91\xBF\xA4
<TAMIL_DRY_CULTIVATION_SIGN> \xF0\x91\xBF\xA5
<TAMIL_LAND_SIGN> \xF0\x91\xBF\xA6
<TAMIL_SALT_PAN_SIGN> \xF0\x91\xBF\xA7
<TAMIL_TRADITIONAL_CREDIT_SIGN> \xF0\x91\xBF\xA8
<TAMIL_TRADITIONAL_NUMBER_SIGN> \xF0\x91\xBF\xA9
<TAMIL_CURRENT_SIGN> \xF0\x91\xBF\xAA
<TAMIL_AND_ODD_SIGN> \xF0\x91\xBF\xAB
<TAMIL_SPENT_SIGN> \xF0\x91\xBF\xAC
<TAMIL_TOTAL_SIGN> \xF0\x91\xBF\xAD
<TAMIL_IN_POSSESSION_SIGN> \xF0\x91\xBF\xAE
<TAMIL_STARTING_FROM_SIGN> \xF0\x91\xBF\xAF
<TAMIL_SIGN_MUTHALIYA> \xF0\x91\xBF\xB0
<TAMIL_SIGN_VAKAIYARAA> \xF0\x91\xBF\xB1
<TAMIL_PUNCTUATION_END_OF_TEXT> \xF0\x91\xBF\xBF
<CUNEIFORM_SIGN_A> \xF0\x92\x80\x80
<CUNEIFORM_SIGN_A_TIMES_A> \xF0\x92\x80\x81
<CUNEIFORM_SIGN_A_TIMES_BAD> \xF0\x92\x80\x82
@ -68540,6 +68716,15 @@ CHARMAP
<EGYPTIAN_HIEROGLYPH_AA030> \xF0\x93\x90\xAC
<EGYPTIAN_HIEROGLYPH_AA031> \xF0\x93\x90\xAD
<EGYPTIAN_HIEROGLYPH_AA032> \xF0\x93\x90\xAE
<EGYPTIAN_HIEROGLYPH_VERTICAL_JOINER> \xF0\x93\x90\xB0
<EGYPTIAN_HIEROGLYPH_HORIZONTAL_JOINER> \xF0\x93\x90\xB1
<EGYPTIAN_HIEROGLYPH_INSERT_AT_TOP_START> \xF0\x93\x90\xB2
<EGYPTIAN_HIEROGLYPH_INSERT_AT_BOTTOM_START> \xF0\x93\x90\xB3
<EGYPTIAN_HIEROGLYPH_INSERT_AT_TOP_END> \xF0\x93\x90\xB4
<EGYPTIAN_HIEROGLYPH_INSERT_AT_BOTTOM_END> \xF0\x93\x90\xB5
<EGYPTIAN_HIEROGLYPH_OVERLAY_MIDDLE> \xF0\x93\x90\xB6
<EGYPTIAN_HIEROGLYPH_BEGIN_SEGMENT> \xF0\x93\x90\xB7
<EGYPTIAN_HIEROGLYPH_END_SEGMENT> \xF0\x93\x90\xB8
<ANATOLIAN_HIEROGLYPH_A001> \xF0\x94\x90\x80
<ANATOLIAN_HIEROGLYPH_A002> \xF0\x94\x90\x81
<ANATOLIAN_HIEROGLYPH_A003> \xF0\x94\x90\x82
@ -70058,6 +70243,13 @@ CHARMAP
<MIAO_LETTER_WA> \xF0\x96\xBD\x82
<MIAO_LETTER_AH> \xF0\x96\xBD\x83
<MIAO_LETTER_HHA> \xF0\x96\xBD\x84
<MIAO_LETTER_BRI> \xF0\x96\xBD\x85
<MIAO_LETTER_SYI> \xF0\x96\xBD\x86
<MIAO_LETTER_DZYI> \xF0\x96\xBD\x87
<MIAO_LETTER_TE> \xF0\x96\xBD\x88
<MIAO_LETTER_TSE> \xF0\x96\xBD\x89
<MIAO_LETTER_RTE> \xF0\x96\xBD\x8A
<MIAO_SIGN_CONSONANT_MODIFIER_BAR> \xF0\x96\xBD\x8F
<MIAO_LETTER_NASALIZATION> \xF0\x96\xBD\x90
<MIAO_SIGN_ASPIRATION> \xF0\x96\xBD\x91
<MIAO_SIGN_REFORMED_VOICING> \xF0\x96\xBD\x92
@ -70105,6 +70297,15 @@ CHARMAP
<MIAO_VOWEL_SIGN_OU> \xF0\x96\xBD\xBC
<MIAO_VOWEL_SIGN_N> \xF0\x96\xBD\xBD
<MIAO_VOWEL_SIGN_NG> \xF0\x96\xBD\xBE
<MIAO_VOWEL_SIGN_UOG> \xF0\x96\xBD\xBF
<MIAO_VOWEL_SIGN_YUI> \xF0\x96\xBE\x80
<MIAO_VOWEL_SIGN_OG> \xF0\x96\xBE\x81
<MIAO_VOWEL_SIGN_OER> \xF0\x96\xBE\x82
<MIAO_VOWEL_SIGN_VW> \xF0\x96\xBE\x83
<MIAO_VOWEL_SIGN_IG> \xF0\x96\xBE\x84
<MIAO_VOWEL_SIGN_EA> \xF0\x96\xBE\x85
<MIAO_VOWEL_SIGN_IONG> \xF0\x96\xBE\x86
<MIAO_VOWEL_SIGN_UI> \xF0\x96\xBE\x87
<MIAO_TONE_RIGHT> \xF0\x96\xBE\x8F
<MIAO_TONE_TOP_RIGHT> \xF0\x96\xBE\x90
<MIAO_TONE_ABOVE> \xF0\x96\xBE\x91
@ -70124,6 +70325,8 @@ CHARMAP
<MIAO_LETTER_REFORMED_TONE-8> \xF0\x96\xBE\x9F
<TANGUT_ITERATION_MARK> \xF0\x96\xBF\xA0
<NUSHU_ITERATION_MARK> \xF0\x96\xBF\xA1
<OLD_CHINESE_HOOK_MARK> \xF0\x96\xBF\xA2
<OLD_CHINESE_ITERATION_MARK> \xF0\x96\xBF\xA3
<TANGUT_IDEOGRAPH-17000> \xF0\x97\x80\x80
<TANGUT_IDEOGRAPH-17001> \xF0\x97\x80\x81
<TANGUT_IDEOGRAPH-17002> \xF0\x97\x80\x82
@ -76254,6 +76457,12 @@ CHARMAP
<TANGUT_IDEOGRAPH-187EF> \xF0\x98\x9F\xAF
<TANGUT_IDEOGRAPH-187F0> \xF0\x98\x9F\xB0
<TANGUT_IDEOGRAPH-187F1> \xF0\x98\x9F\xB1
<TANGUT_IDEOGRAPH-187F2> \xF0\x98\x9F\xB2
<TANGUT_IDEOGRAPH-187F3> \xF0\x98\x9F\xB3
<TANGUT_IDEOGRAPH-187F4> \xF0\x98\x9F\xB4
<TANGUT_IDEOGRAPH-187F5> \xF0\x98\x9F\xB5
<TANGUT_IDEOGRAPH-187F6> \xF0\x98\x9F\xB6
<TANGUT_IDEOGRAPH-187F7> \xF0\x98\x9F\xB7
<TANGUT_COMPONENT-001> \xF0\x98\xA0\x80
<TANGUT_COMPONENT-002> \xF0\x98\xA0\x81
<TANGUT_COMPONENT-003> \xF0\x98\xA0\x82
@ -77296,6 +77505,13 @@ CHARMAP
<HENTAIGANA_LETTER_WO-7> \xF0\x9B\x84\x9C
<HENTAIGANA_LETTER_N-MU-MO-1> \xF0\x9B\x84\x9D
<HENTAIGANA_LETTER_N-MU-MO-2> \xF0\x9B\x84\x9E
<HIRAGANA_LETTER_SMALL_WI> \xF0\x9B\x85\x90
<HIRAGANA_LETTER_SMALL_WE> \xF0\x9B\x85\x91
<HIRAGANA_LETTER_SMALL_WO> \xF0\x9B\x85\x92
<KATAKANA_LETTER_SMALL_WI> \xF0\x9B\x85\xA4
<KATAKANA_LETTER_SMALL_WE> \xF0\x9B\x85\xA5
<KATAKANA_LETTER_SMALL_WO> \xF0\x9B\x85\xA6
<KATAKANA_LETTER_SMALL_N> \xF0\x9B\x85\xA7
<NUSHU_CHARACTER-1B170> \xF0\x9B\x85\xB0
<NUSHU_CHARACTER-1B171> \xF0\x9B\x85\xB1
<NUSHU_CHARACTER-1B172> \xF0\x9B\x85\xB2
@ -80224,6 +80440,136 @@ CHARMAP
<COMBINING_GLAGOLITIC_LETTER_BIG_YUS> \xF0\x9E\x80\xA8
<COMBINING_GLAGOLITIC_LETTER_IOTATED_BIG_YUS> \xF0\x9E\x80\xA9
<COMBINING_GLAGOLITIC_LETTER_FITA> \xF0\x9E\x80\xAA
<NYIAKENG_PUACHUE_HMONG_LETTER_MA> \xF0\x9E\x84\x80
<NYIAKENG_PUACHUE_HMONG_LETTER_TSA> \xF0\x9E\x84\x81
<NYIAKENG_PUACHUE_HMONG_LETTER_NTA> \xF0\x9E\x84\x82
<NYIAKENG_PUACHUE_HMONG_LETTER_TA> \xF0\x9E\x84\x83
<NYIAKENG_PUACHUE_HMONG_LETTER_HA> \xF0\x9E\x84\x84
<NYIAKENG_PUACHUE_HMONG_LETTER_NA> \xF0\x9E\x84\x85
<NYIAKENG_PUACHUE_HMONG_LETTER_XA> \xF0\x9E\x84\x86
<NYIAKENG_PUACHUE_HMONG_LETTER_NKA> \xF0\x9E\x84\x87
<NYIAKENG_PUACHUE_HMONG_LETTER_CA> \xF0\x9E\x84\x88
<NYIAKENG_PUACHUE_HMONG_LETTER_LA> \xF0\x9E\x84\x89
<NYIAKENG_PUACHUE_HMONG_LETTER_SA> \xF0\x9E\x84\x8A
<NYIAKENG_PUACHUE_HMONG_LETTER_ZA> \xF0\x9E\x84\x8B
<NYIAKENG_PUACHUE_HMONG_LETTER_NCA> \xF0\x9E\x84\x8C
<NYIAKENG_PUACHUE_HMONG_LETTER_NTSA> \xF0\x9E\x84\x8D
<NYIAKENG_PUACHUE_HMONG_LETTER_KA> \xF0\x9E\x84\x8E
<NYIAKENG_PUACHUE_HMONG_LETTER_DA> \xF0\x9E\x84\x8F
<NYIAKENG_PUACHUE_HMONG_LETTER_NYA> \xF0\x9E\x84\x90
<NYIAKENG_PUACHUE_HMONG_LETTER_NRA> \xF0\x9E\x84\x91
<NYIAKENG_PUACHUE_HMONG_LETTER_VA> \xF0\x9E\x84\x92
<NYIAKENG_PUACHUE_HMONG_LETTER_NTXA> \xF0\x9E\x84\x93
<NYIAKENG_PUACHUE_HMONG_LETTER_TXA> \xF0\x9E\x84\x94
<NYIAKENG_PUACHUE_HMONG_LETTER_FA> \xF0\x9E\x84\x95
<NYIAKENG_PUACHUE_HMONG_LETTER_RA> \xF0\x9E\x84\x96
<NYIAKENG_PUACHUE_HMONG_LETTER_QA> \xF0\x9E\x84\x97
<NYIAKENG_PUACHUE_HMONG_LETTER_YA> \xF0\x9E\x84\x98
<NYIAKENG_PUACHUE_HMONG_LETTER_NQA> \xF0\x9E\x84\x99
<NYIAKENG_PUACHUE_HMONG_LETTER_PA> \xF0\x9E\x84\x9A
<NYIAKENG_PUACHUE_HMONG_LETTER_XYA> \xF0\x9E\x84\x9B
<NYIAKENG_PUACHUE_HMONG_LETTER_NPA> \xF0\x9E\x84\x9C
<NYIAKENG_PUACHUE_HMONG_LETTER_DLA> \xF0\x9E\x84\x9D
<NYIAKENG_PUACHUE_HMONG_LETTER_NPLA> \xF0\x9E\x84\x9E
<NYIAKENG_PUACHUE_HMONG_LETTER_HAH> \xF0\x9E\x84\x9F
<NYIAKENG_PUACHUE_HMONG_LETTER_MLA> \xF0\x9E\x84\xA0
<NYIAKENG_PUACHUE_HMONG_LETTER_PLA> \xF0\x9E\x84\xA1
<NYIAKENG_PUACHUE_HMONG_LETTER_GA> \xF0\x9E\x84\xA2
<NYIAKENG_PUACHUE_HMONG_LETTER_RRA> \xF0\x9E\x84\xA3
<NYIAKENG_PUACHUE_HMONG_LETTER_A> \xF0\x9E\x84\xA4
<NYIAKENG_PUACHUE_HMONG_LETTER_AA> \xF0\x9E\x84\xA5
<NYIAKENG_PUACHUE_HMONG_LETTER_I> \xF0\x9E\x84\xA6
<NYIAKENG_PUACHUE_HMONG_LETTER_U> \xF0\x9E\x84\xA7
<NYIAKENG_PUACHUE_HMONG_LETTER_O> \xF0\x9E\x84\xA8
<NYIAKENG_PUACHUE_HMONG_LETTER_OO> \xF0\x9E\x84\xA9
<NYIAKENG_PUACHUE_HMONG_LETTER_E> \xF0\x9E\x84\xAA
<NYIAKENG_PUACHUE_HMONG_LETTER_EE> \xF0\x9E\x84\xAB
<NYIAKENG_PUACHUE_HMONG_LETTER_W> \xF0\x9E\x84\xAC
<NYIAKENG_PUACHUE_HMONG_TONE-B> \xF0\x9E\x84\xB0
<NYIAKENG_PUACHUE_HMONG_TONE-M> \xF0\x9E\x84\xB1
<NYIAKENG_PUACHUE_HMONG_TONE-J> \xF0\x9E\x84\xB2
<NYIAKENG_PUACHUE_HMONG_TONE-V> \xF0\x9E\x84\xB3
<NYIAKENG_PUACHUE_HMONG_TONE-S> \xF0\x9E\x84\xB4
<NYIAKENG_PUACHUE_HMONG_TONE-G> \xF0\x9E\x84\xB5
<NYIAKENG_PUACHUE_HMONG_TONE-D> \xF0\x9E\x84\xB6
<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_PERSON> \xF0\x9E\x84\xB7
<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_THING> \xF0\x9E\x84\xB8
<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_LOCATION> \xF0\x9E\x84\xB9
<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_ANIMAL> \xF0\x9E\x84\xBA
<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_INVERTEBRATE> \xF0\x9E\x84\xBB
<NYIAKENG_PUACHUE_HMONG_SIGN_XW_XW> \xF0\x9E\x84\xBC
<NYIAKENG_PUACHUE_HMONG_SYLLABLE_LENGTHENER> \xF0\x9E\x84\xBD
<NYIAKENG_PUACHUE_HMONG_DIGIT_ZERO> \xF0\x9E\x85\x80
<NYIAKENG_PUACHUE_HMONG_DIGIT_ONE> \xF0\x9E\x85\x81
<NYIAKENG_PUACHUE_HMONG_DIGIT_TWO> \xF0\x9E\x85\x82
<NYIAKENG_PUACHUE_HMONG_DIGIT_THREE> \xF0\x9E\x85\x83
<NYIAKENG_PUACHUE_HMONG_DIGIT_FOUR> \xF0\x9E\x85\x84
<NYIAKENG_PUACHUE_HMONG_DIGIT_FIVE> \xF0\x9E\x85\x85
<NYIAKENG_PUACHUE_HMONG_DIGIT_SIX> \xF0\x9E\x85\x86
<NYIAKENG_PUACHUE_HMONG_DIGIT_SEVEN> \xF0\x9E\x85\x87
<NYIAKENG_PUACHUE_HMONG_DIGIT_EIGHT> \xF0\x9E\x85\x88
<NYIAKENG_PUACHUE_HMONG_DIGIT_NINE> \xF0\x9E\x85\x89
<NYIAKENG_PUACHUE_HMONG_LOGOGRAM_NYAJ> \xF0\x9E\x85\x8E
<NYIAKENG_PUACHUE_HMONG_CIRCLED_CA> \xF0\x9E\x85\x8F
<WANCHO_LETTER_AA> \xF0\x9E\x8B\x80
<WANCHO_LETTER_A> \xF0\x9E\x8B\x81
<WANCHO_LETTER_BA> \xF0\x9E\x8B\x82
<WANCHO_LETTER_CA> \xF0\x9E\x8B\x83
<WANCHO_LETTER_DA> \xF0\x9E\x8B\x84
<WANCHO_LETTER_GA> \xF0\x9E\x8B\x85
<WANCHO_LETTER_YA> \xF0\x9E\x8B\x86
<WANCHO_LETTER_PHA> \xF0\x9E\x8B\x87
<WANCHO_LETTER_LA> \xF0\x9E\x8B\x88
<WANCHO_LETTER_NA> \xF0\x9E\x8B\x89
<WANCHO_LETTER_PA> \xF0\x9E\x8B\x8A
<WANCHO_LETTER_TA> \xF0\x9E\x8B\x8B
<WANCHO_LETTER_THA> \xF0\x9E\x8B\x8C
<WANCHO_LETTER_FA> \xF0\x9E\x8B\x8D
<WANCHO_LETTER_SA> \xF0\x9E\x8B\x8E
<WANCHO_LETTER_SHA> \xF0\x9E\x8B\x8F
<WANCHO_LETTER_JA> \xF0\x9E\x8B\x90
<WANCHO_LETTER_ZA> \xF0\x9E\x8B\x91
<WANCHO_LETTER_WA> \xF0\x9E\x8B\x92
<WANCHO_LETTER_VA> \xF0\x9E\x8B\x93
<WANCHO_LETTER_KA> \xF0\x9E\x8B\x94
<WANCHO_LETTER_O> \xF0\x9E\x8B\x95
<WANCHO_LETTER_AU> \xF0\x9E\x8B\x96
<WANCHO_LETTER_RA> \xF0\x9E\x8B\x97
<WANCHO_LETTER_MA> \xF0\x9E\x8B\x98
<WANCHO_LETTER_KHA> \xF0\x9E\x8B\x99
<WANCHO_LETTER_HA> \xF0\x9E\x8B\x9A
<WANCHO_LETTER_E> \xF0\x9E\x8B\x9B
<WANCHO_LETTER_I> \xF0\x9E\x8B\x9C
<WANCHO_LETTER_NGA> \xF0\x9E\x8B\x9D
<WANCHO_LETTER_U> \xF0\x9E\x8B\x9E
<WANCHO_LETTER_LLHA> \xF0\x9E\x8B\x9F
<WANCHO_LETTER_TSA> \xF0\x9E\x8B\xA0
<WANCHO_LETTER_TRA> \xF0\x9E\x8B\xA1
<WANCHO_LETTER_ONG> \xF0\x9E\x8B\xA2
<WANCHO_LETTER_AANG> \xF0\x9E\x8B\xA3
<WANCHO_LETTER_ANG> \xF0\x9E\x8B\xA4
<WANCHO_LETTER_ING> \xF0\x9E\x8B\xA5
<WANCHO_LETTER_ON> \xF0\x9E\x8B\xA6
<WANCHO_LETTER_EN> \xF0\x9E\x8B\xA7
<WANCHO_LETTER_AAN> \xF0\x9E\x8B\xA8
<WANCHO_LETTER_NYA> \xF0\x9E\x8B\xA9
<WANCHO_LETTER_UEN> \xF0\x9E\x8B\xAA
<WANCHO_LETTER_YIH> \xF0\x9E\x8B\xAB
<WANCHO_TONE_TUP> \xF0\x9E\x8B\xAC
<WANCHO_TONE_TUPNI> \xF0\x9E\x8B\xAD
<WANCHO_TONE_KOI> \xF0\x9E\x8B\xAE
<WANCHO_TONE_KOINI> \xF0\x9E\x8B\xAF
<WANCHO_DIGIT_ZERO> \xF0\x9E\x8B\xB0
<WANCHO_DIGIT_ONE> \xF0\x9E\x8B\xB1
<WANCHO_DIGIT_TWO> \xF0\x9E\x8B\xB2
<WANCHO_DIGIT_THREE> \xF0\x9E\x8B\xB3
<WANCHO_DIGIT_FOUR> \xF0\x9E\x8B\xB4
<WANCHO_DIGIT_FIVE> \xF0\x9E\x8B\xB5
<WANCHO_DIGIT_SIX> \xF0\x9E\x8B\xB6
<WANCHO_DIGIT_SEVEN> \xF0\x9E\x8B\xB7
<WANCHO_DIGIT_EIGHT> \xF0\x9E\x8B\xB8
<WANCHO_DIGIT_NINE> \xF0\x9E\x8B\xB9
<WANCHO_NGUN_SIGN> \xF0\x9E\x8B\xBF
<MENDE_KIKAKUI_SYLLABLE_M001_KI> \xF0\x9E\xA0\x80
<MENDE_KIKAKUI_SYLLABLE_M002_KA> \xF0\x9E\xA0\x81
<MENDE_KIKAKUI_SYLLABLE_M003_KU> \xF0\x9E\xA0\x82
@ -80512,6 +80858,7 @@ CHARMAP
<ADLAM_CONSONANT_MODIFIER> \xF0\x9E\xA5\x88
<ADLAM_GEMINATE_CONSONANT_MODIFIER> \xF0\x9E\xA5\x89
<ADLAM_NUKTA> \xF0\x9E\xA5\x8A
<ADLAM_NASALIZATION_MARK> \xF0\x9E\xA5\x8B
<ADLAM_DIGIT_ZERO> \xF0\x9E\xA5\x90
<ADLAM_DIGIT_ONE> \xF0\x9E\xA5\x91
<ADLAM_DIGIT_TWO> \xF0\x9E\xA5\x92
@ -80592,6 +80939,67 @@ CHARMAP
<INDIC_SIYAQ_NUMBER_ALTERNATE_TWO> \xF0\x9E\xB2\xB2
<INDIC_SIYAQ_NUMBER_ALTERNATE_TEN_THOUSAND> \xF0\x9E\xB2\xB3
<INDIC_SIYAQ_ALTERNATE_LAKH_MARK> \xF0\x9E\xB2\xB4
<OTTOMAN_SIYAQ_NUMBER_ONE> \xF0\x9E\xB4\x81
<OTTOMAN_SIYAQ_NUMBER_TWO> \xF0\x9E\xB4\x82
<OTTOMAN_SIYAQ_NUMBER_THREE> \xF0\x9E\xB4\x83
<OTTOMAN_SIYAQ_NUMBER_FOUR> \xF0\x9E\xB4\x84
<OTTOMAN_SIYAQ_NUMBER_FIVE> \xF0\x9E\xB4\x85
<OTTOMAN_SIYAQ_NUMBER_SIX> \xF0\x9E\xB4\x86
<OTTOMAN_SIYAQ_NUMBER_SEVEN> \xF0\x9E\xB4\x87
<OTTOMAN_SIYAQ_NUMBER_EIGHT> \xF0\x9E\xB4\x88
<OTTOMAN_SIYAQ_NUMBER_NINE> \xF0\x9E\xB4\x89
<OTTOMAN_SIYAQ_NUMBER_TEN> \xF0\x9E\xB4\x8A
<OTTOMAN_SIYAQ_NUMBER_TWENTY> \xF0\x9E\xB4\x8B
<OTTOMAN_SIYAQ_NUMBER_THIRTY> \xF0\x9E\xB4\x8C
<OTTOMAN_SIYAQ_NUMBER_FORTY> \xF0\x9E\xB4\x8D
<OTTOMAN_SIYAQ_NUMBER_FIFTY> \xF0\x9E\xB4\x8E
<OTTOMAN_SIYAQ_NUMBER_SIXTY> \xF0\x9E\xB4\x8F
<OTTOMAN_SIYAQ_NUMBER_SEVENTY> \xF0\x9E\xB4\x90
<OTTOMAN_SIYAQ_NUMBER_EIGHTY> \xF0\x9E\xB4\x91
<OTTOMAN_SIYAQ_NUMBER_NINETY> \xF0\x9E\xB4\x92
<OTTOMAN_SIYAQ_NUMBER_ONE_HUNDRED> \xF0\x9E\xB4\x93
<OTTOMAN_SIYAQ_NUMBER_TWO_HUNDRED> \xF0\x9E\xB4\x94
<OTTOMAN_SIYAQ_NUMBER_THREE_HUNDRED> \xF0\x9E\xB4\x95
<OTTOMAN_SIYAQ_NUMBER_FOUR_HUNDRED> \xF0\x9E\xB4\x96
<OTTOMAN_SIYAQ_NUMBER_FIVE_HUNDRED> \xF0\x9E\xB4\x97
<OTTOMAN_SIYAQ_NUMBER_SIX_HUNDRED> \xF0\x9E\xB4\x98
<OTTOMAN_SIYAQ_NUMBER_SEVEN_HUNDRED> \xF0\x9E\xB4\x99
<OTTOMAN_SIYAQ_NUMBER_EIGHT_HUNDRED> \xF0\x9E\xB4\x9A
<OTTOMAN_SIYAQ_NUMBER_NINE_HUNDRED> \xF0\x9E\xB4\x9B
<OTTOMAN_SIYAQ_NUMBER_ONE_THOUSAND> \xF0\x9E\xB4\x9C
<OTTOMAN_SIYAQ_NUMBER_TWO_THOUSAND> \xF0\x9E\xB4\x9D
<OTTOMAN_SIYAQ_NUMBER_THREE_THOUSAND> \xF0\x9E\xB4\x9E
<OTTOMAN_SIYAQ_NUMBER_FOUR_THOUSAND> \xF0\x9E\xB4\x9F
<OTTOMAN_SIYAQ_NUMBER_FIVE_THOUSAND> \xF0\x9E\xB4\xA0
<OTTOMAN_SIYAQ_NUMBER_SIX_THOUSAND> \xF0\x9E\xB4\xA1
<OTTOMAN_SIYAQ_NUMBER_SEVEN_THOUSAND> \xF0\x9E\xB4\xA2
<OTTOMAN_SIYAQ_NUMBER_EIGHT_THOUSAND> \xF0\x9E\xB4\xA3
<OTTOMAN_SIYAQ_NUMBER_NINE_THOUSAND> \xF0\x9E\xB4\xA4
<OTTOMAN_SIYAQ_NUMBER_TEN_THOUSAND> \xF0\x9E\xB4\xA5
<OTTOMAN_SIYAQ_NUMBER_TWENTY_THOUSAND> \xF0\x9E\xB4\xA6
<OTTOMAN_SIYAQ_NUMBER_THIRTY_THOUSAND> \xF0\x9E\xB4\xA7
<OTTOMAN_SIYAQ_NUMBER_FORTY_THOUSAND> \xF0\x9E\xB4\xA8
<OTTOMAN_SIYAQ_NUMBER_FIFTY_THOUSAND> \xF0\x9E\xB4\xA9
<OTTOMAN_SIYAQ_NUMBER_SIXTY_THOUSAND> \xF0\x9E\xB4\xAA
<OTTOMAN_SIYAQ_NUMBER_SEVENTY_THOUSAND> \xF0\x9E\xB4\xAB
<OTTOMAN_SIYAQ_NUMBER_EIGHTY_THOUSAND> \xF0\x9E\xB4\xAC
<OTTOMAN_SIYAQ_NUMBER_NINETY_THOUSAND> \xF0\x9E\xB4\xAD
<OTTOMAN_SIYAQ_MARRATAN> \xF0\x9E\xB4\xAE
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TWO> \xF0\x9E\xB4\xAF
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_THREE> \xF0\x9E\xB4\xB0
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_FOUR> \xF0\x9E\xB4\xB1
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_FIVE> \xF0\x9E\xB4\xB2
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_SIX> \xF0\x9E\xB4\xB3
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_SEVEN> \xF0\x9E\xB4\xB4
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_EIGHT> \xF0\x9E\xB4\xB5
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_NINE> \xF0\x9E\xB4\xB6
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TEN> \xF0\x9E\xB4\xB7
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_FOUR_HUNDRED> \xF0\x9E\xB4\xB8
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_SIX_HUNDRED> \xF0\x9E\xB4\xB9
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TWO_THOUSAND> \xF0\x9E\xB4\xBA
<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TEN_THOUSAND> \xF0\x9E\xB4\xBB
<OTTOMAN_SIYAQ_FRACTION_ONE_HALF> \xF0\x9E\xB4\xBC
<OTTOMAN_SIYAQ_FRACTION_ONE_SIXTH> \xF0\x9E\xB4\xBD
<ARABIC_MATHEMATICAL_ALEF> \xF0\x9E\xB8\x80
<ARABIC_MATHEMATICAL_BEH> \xF0\x9E\xB8\x81
<ARABIC_MATHEMATICAL_JEEM> \xF0\x9E\xB8\x82
@ -81066,6 +81474,7 @@ CHARMAP
<NEGATIVE_CIRCLED_LATIN_CAPITAL_LETTER_Z> \xF0\x9F\x85\xA9
<RAISED_MC_SIGN> \xF0\x9F\x85\xAA
<RAISED_MD_SIGN> \xF0\x9F\x85\xAB
<RAISED_MR_SIGN> \xF0\x9F\x85\xAC
<NEGATIVE_SQUARED_LATIN_CAPITAL_LETTER_A> \xF0\x9F\x85\xB0
<NEGATIVE_SQUARED_LATIN_CAPITAL_LETTER_B> \xF0\x9F\x85\xB1
<NEGATIVE_SQUARED_LATIN_CAPITAL_LETTER_C> \xF0\x9F\x85\xB2
@ -82198,6 +82607,7 @@ CHARMAP
<SHOPPING_TROLLEY> \xF0\x9F\x9B\x92
<STUPA> \xF0\x9F\x9B\x93
<PAGODA> \xF0\x9F\x9B\x94
<HINDU_TEMPLE> \xF0\x9F\x9B\x95
<HAMMER_AND_WRENCH> \xF0\x9F\x9B\xA0
<SHIELD> \xF0\x9F\x9B\xA1
<OIL_DRUM> \xF0\x9F\x9B\xA2
@ -82221,6 +82631,7 @@ CHARMAP
<SLED> \xF0\x9F\x9B\xB7
<FLYING_SAUCER> \xF0\x9F\x9B\xB8
<SKATEBOARD> \xF0\x9F\x9B\xB9
<AUTO_RICKSHAW> \xF0\x9F\x9B\xBA
<ALCHEMICAL_SYMBOL_FOR_QUINTESSENCE> \xF0\x9F\x9C\x80
<ALCHEMICAL_SYMBOL_FOR_AIR> \xF0\x9F\x9C\x81
<ALCHEMICAL_SYMBOL_FOR_FIRE> \xF0\x9F\x9C\x82
@ -82426,6 +82837,18 @@ CHARMAP
<NEGATIVE_CIRCLED_TRIANGLE> \xF0\x9F\x9F\x96
<CIRCLED_SQUARE> \xF0\x9F\x9F\x97
<NEGATIVE_CIRCLED_SQUARE> \xF0\x9F\x9F\x98
<LARGE_ORANGE_CIRCLE> \xF0\x9F\x9F\xA0
<LARGE_YELLOW_CIRCLE> \xF0\x9F\x9F\xA1
<LARGE_GREEN_CIRCLE> \xF0\x9F\x9F\xA2
<LARGE_PURPLE_CIRCLE> \xF0\x9F\x9F\xA3
<LARGE_BROWN_CIRCLE> \xF0\x9F\x9F\xA4
<LARGE_RED_SQUARE> \xF0\x9F\x9F\xA5
<LARGE_BLUE_SQUARE> \xF0\x9F\x9F\xA6
<LARGE_ORANGE_SQUARE> \xF0\x9F\x9F\xA7
<LARGE_YELLOW_SQUARE> \xF0\x9F\x9F\xA8
<LARGE_GREEN_SQUARE> \xF0\x9F\x9F\xA9
<LARGE_PURPLE_SQUARE> \xF0\x9F\x9F\xAA
<LARGE_BROWN_SQUARE> \xF0\x9F\x9F\xAB
<LEFTWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD> \xF0\x9F\xA0\x80
<UPWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD> \xF0\x9F\xA0\x81
<RIGHTWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD> \xF0\x9F\xA0\x82
@ -82586,6 +83009,9 @@ CHARMAP
<DOWNWARD_FACING_NOTCHED_HOOK> \xF0\x9F\xA4\x89
<DOWNWARD_FACING_HOOK_WITH_DOT> \xF0\x9F\xA4\x8A
<DOWNWARD_FACING_NOTCHED_HOOK_WITH_DOT> \xF0\x9F\xA4\x8B
<WHITE_HEART> \xF0\x9F\xA4\x8D
<BROWN_HEART> \xF0\x9F\xA4\x8E
<PINCHING_HAND> \xF0\x9F\xA4\x8F
<ZIPPER-MOUTH_FACE> \xF0\x9F\xA4\x90
<MONEY-MOUTH_FACE> \xF0\x9F\xA4\x91
<FACE_WITH_THERMOMETER> \xF0\x9F\xA4\x92
@ -82633,6 +83059,7 @@ CHARMAP
<WRESTLERS> \xF0\x9F\xA4\xBC
<WATER_POLO> \xF0\x9F\xA4\xBD
<HANDBALL> \xF0\x9F\xA4\xBE
<DIVING_MASK> \xF0\x9F\xA4\xBF
<WILTED_FLOWER> \xF0\x9F\xA5\x80
<DRUM_WITH_DRUMSTICKS> \xF0\x9F\xA5\x81
<CLINKING_GLASSES> \xF0\x9F\xA5\x82
@ -82682,11 +83109,13 @@ CHARMAP
<MOON_CAKE> \xF0\x9F\xA5\xAE
<BAGEL> \xF0\x9F\xA5\xAF
<SMILING_FACE_WITH_SMILING_EYES_AND_THREE_HEARTS> \xF0\x9F\xA5\xB0
<YAWNING_FACE> \xF0\x9F\xA5\xB1
<FACE_WITH_PARTY_HORN_AND_PARTY_HAT> \xF0\x9F\xA5\xB3
<FACE_WITH_UNEVEN_EYES_AND_WAVY_MOUTH> \xF0\x9F\xA5\xB4
<OVERHEATED_FACE> \xF0\x9F\xA5\xB5
<FREEZING_FACE> \xF0\x9F\xA5\xB6
<FACE_WITH_PLEADING_EYES> \xF0\x9F\xA5\xBA
<SARI> \xF0\x9F\xA5\xBB
<LAB_COAT> \xF0\x9F\xA5\xBC
<GOGGLES> \xF0\x9F\xA5\xBD
<HIKING_BOOT> \xF0\x9F\xA5\xBE
@ -82726,6 +83155,14 @@ CHARMAP
<MICROBE> \xF0\x9F\xA6\xA0
<BADGER> \xF0\x9F\xA6\xA1
<SWAN> \xF0\x9F\xA6\xA2
<SLOTH> \xF0\x9F\xA6\xA5
<OTTER> \xF0\x9F\xA6\xA6
<ORANGUTAN> \xF0\x9F\xA6\xA7
<SKUNK> \xF0\x9F\xA6\xA8
<FLAMINGO> \xF0\x9F\xA6\xA9
<OYSTER> \xF0\x9F\xA6\xAA
<GUIDE_DOG> \xF0\x9F\xA6\xAE
<PROBING_CANE> \xF0\x9F\xA6\xAF
<EMOJI_COMPONENT_RED_HAIR> \xF0\x9F\xA6\xB0
<EMOJI_COMPONENT_CURLY_HAIR> \xF0\x9F\xA6\xB1
<EMOJI_COMPONENT_BALD> \xF0\x9F\xA6\xB2
@ -82736,9 +83173,26 @@ CHARMAP
<TOOTH> \xF0\x9F\xA6\xB7
<SUPERHERO> \xF0\x9F\xA6\xB8
<SUPERVILLAIN> \xF0\x9F\xA6\xB9
<SAFETY_VEST> \xF0\x9F\xA6\xBA
<EAR_WITH_HEARING_AID> \xF0\x9F\xA6\xBB
<MOTORIZED_WHEELCHAIR> \xF0\x9F\xA6\xBC
<MANUAL_WHEELCHAIR> \xF0\x9F\xA6\xBD
<MECHANICAL_ARM> \xF0\x9F\xA6\xBE
<MECHANICAL_LEG> \xF0\x9F\xA6\xBF
<CHEESE_WEDGE> \xF0\x9F\xA7\x80
<CUPCAKE> \xF0\x9F\xA7\x81
<SALT_SHAKER> \xF0\x9F\xA7\x82
<BEVERAGE_BOX> \xF0\x9F\xA7\x83
<GARLIC> \xF0\x9F\xA7\x84
<ONION> \xF0\x9F\xA7\x85
<FALAFEL> \xF0\x9F\xA7\x86
<WAFFLE> \xF0\x9F\xA7\x87
<BUTTER> \xF0\x9F\xA7\x88
<MATE_DRINK> \xF0\x9F\xA7\x89
<ICE_CUBE> \xF0\x9F\xA7\x8A
<STANDING_PERSON> \xF0\x9F\xA7\x8D
<KNEELING_PERSON> \xF0\x9F\xA7\x8E
<DEAF_PERSON> \xF0\x9F\xA7\x8F
<FACE_WITH_MONOCLE> \xF0\x9F\xA7\x90
<ADULT> \xF0\x9F\xA7\x91
<CHILD> \xF0\x9F\xA7\x92
@ -82787,6 +83241,90 @@ CHARMAP
<SPONGE> \xF0\x9F\xA7\xBD
<RECEIPT> \xF0\x9F\xA7\xBE
<NAZAR_AMULET> \xF0\x9F\xA7\xBF
<NEUTRAL_CHESS_KING> \xF0\x9F\xA8\x80
<NEUTRAL_CHESS_QUEEN> \xF0\x9F\xA8\x81
<NEUTRAL_CHESS_ROOK> \xF0\x9F\xA8\x82
<NEUTRAL_CHESS_BISHOP> \xF0\x9F\xA8\x83
<NEUTRAL_CHESS_KNIGHT> \xF0\x9F\xA8\x84
<NEUTRAL_CHESS_PAWN> \xF0\x9F\xA8\x85
<WHITE_CHESS_KNIGHT_ROTATED_FORTY-FIVE_DEGREES> \xF0\x9F\xA8\x86
<BLACK_CHESS_KNIGHT_ROTATED_FORTY-FIVE_DEGREES> \xF0\x9F\xA8\x87
<NEUTRAL_CHESS_KNIGHT_ROTATED_FORTY-FIVE_DEGREES> \xF0\x9F\xA8\x88
<WHITE_CHESS_KING_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x89
<WHITE_CHESS_QUEEN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8A
<WHITE_CHESS_ROOK_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8B
<WHITE_CHESS_BISHOP_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8C
<WHITE_CHESS_KNIGHT_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8D
<WHITE_CHESS_PAWN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8E
<BLACK_CHESS_KING_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8F
<BLACK_CHESS_QUEEN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x90
<BLACK_CHESS_ROOK_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x91
<BLACK_CHESS_BISHOP_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x92
<BLACK_CHESS_KNIGHT_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x93
<BLACK_CHESS_PAWN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x94
<NEUTRAL_CHESS_KING_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x95
<NEUTRAL_CHESS_QUEEN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x96
<NEUTRAL_CHESS_ROOK_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x97
<NEUTRAL_CHESS_BISHOP_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x98
<NEUTRAL_CHESS_KNIGHT_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x99
<NEUTRAL_CHESS_PAWN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x9A
<WHITE_CHESS_KNIGHT_ROTATED_ONE_HUNDRED_THIRTY-FIVE_DEGREES> \xF0\x9F\xA8\x9B
<BLACK_CHESS_KNIGHT_ROTATED_ONE_HUNDRED_THIRTY-FIVE_DEGREES> \xF0\x9F\xA8\x9C
<NEUTRAL_CHESS_KNIGHT_ROTATED_ONE_HUNDRED_THIRTY-FIVE_DEGREES> \xF0\x9F\xA8\x9D
<WHITE_CHESS_TURNED_KING> \xF0\x9F\xA8\x9E
<WHITE_CHESS_TURNED_QUEEN> \xF0\x9F\xA8\x9F
<WHITE_CHESS_TURNED_ROOK> \xF0\x9F\xA8\xA0
<WHITE_CHESS_TURNED_BISHOP> \xF0\x9F\xA8\xA1
<WHITE_CHESS_TURNED_KNIGHT> \xF0\x9F\xA8\xA2
<WHITE_CHESS_TURNED_PAWN> \xF0\x9F\xA8\xA3
<BLACK_CHESS_TURNED_KING> \xF0\x9F\xA8\xA4
<BLACK_CHESS_TURNED_QUEEN> \xF0\x9F\xA8\xA5
<BLACK_CHESS_TURNED_ROOK> \xF0\x9F\xA8\xA6
<BLACK_CHESS_TURNED_BISHOP> \xF0\x9F\xA8\xA7
<BLACK_CHESS_TURNED_KNIGHT> \xF0\x9F\xA8\xA8
<BLACK_CHESS_TURNED_PAWN> \xF0\x9F\xA8\xA9
<NEUTRAL_CHESS_TURNED_KING> \xF0\x9F\xA8\xAA
<NEUTRAL_CHESS_TURNED_QUEEN> \xF0\x9F\xA8\xAB
<NEUTRAL_CHESS_TURNED_ROOK> \xF0\x9F\xA8\xAC
<NEUTRAL_CHESS_TURNED_BISHOP> \xF0\x9F\xA8\xAD
<NEUTRAL_CHESS_TURNED_KNIGHT> \xF0\x9F\xA8\xAE
<NEUTRAL_CHESS_TURNED_PAWN> \xF0\x9F\xA8\xAF
<WHITE_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_TWENTY-FIVE_DEGREES> \xF0\x9F\xA8\xB0
<BLACK_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_TWENTY-FIVE_DEGREES> \xF0\x9F\xA8\xB1
<NEUTRAL_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_TWENTY-FIVE_DEGREES> \xF0\x9F\xA8\xB2
<WHITE_CHESS_KING_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB3
<WHITE_CHESS_QUEEN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB4
<WHITE_CHESS_ROOK_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB5
<WHITE_CHESS_BISHOP_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB6
<WHITE_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB7
<WHITE_CHESS_PAWN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB8
<BLACK_CHESS_KING_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB9
<BLACK_CHESS_QUEEN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBA
<BLACK_CHESS_ROOK_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBB
<BLACK_CHESS_BISHOP_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBC
<BLACK_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBD
<BLACK_CHESS_PAWN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBE
<NEUTRAL_CHESS_KING_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBF
<NEUTRAL_CHESS_QUEEN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x80
<NEUTRAL_CHESS_ROOK_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x81
<NEUTRAL_CHESS_BISHOP_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x82
<NEUTRAL_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x83
<NEUTRAL_CHESS_PAWN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x84
<WHITE_CHESS_KNIGHT_ROTATED_THREE_HUNDRED_FIFTEEN_DEGREES> \xF0\x9F\xA9\x85
<BLACK_CHESS_KNIGHT_ROTATED_THREE_HUNDRED_FIFTEEN_DEGREES> \xF0\x9F\xA9\x86
<NEUTRAL_CHESS_KNIGHT_ROTATED_THREE_HUNDRED_FIFTEEN_DEGREES> \xF0\x9F\xA9\x87
<WHITE_CHESS_EQUIHOPPER> \xF0\x9F\xA9\x88
<BLACK_CHESS_EQUIHOPPER> \xF0\x9F\xA9\x89
<NEUTRAL_CHESS_EQUIHOPPER> \xF0\x9F\xA9\x8A
<WHITE_CHESS_EQUIHOPPER_ROTATED_NINETY_DEGREES> \xF0\x9F\xA9\x8B
<BLACK_CHESS_EQUIHOPPER_ROTATED_NINETY_DEGREES> \xF0\x9F\xA9\x8C
<NEUTRAL_CHESS_EQUIHOPPER_ROTATED_NINETY_DEGREES> \xF0\x9F\xA9\x8D
<WHITE_CHESS_KNIGHT-QUEEN> \xF0\x9F\xA9\x8E
<WHITE_CHESS_KNIGHT-ROOK> \xF0\x9F\xA9\x8F
<WHITE_CHESS_KNIGHT-BISHOP> \xF0\x9F\xA9\x90
<BLACK_CHESS_KNIGHT-QUEEN> \xF0\x9F\xA9\x91
<BLACK_CHESS_KNIGHT-ROOK> \xF0\x9F\xA9\x92
<BLACK_CHESS_KNIGHT-BISHOP> \xF0\x9F\xA9\x93
<XIANGQI_RED_GENERAL> \xF0\x9F\xA9\xA0
<XIANGQI_RED_MANDARIN> \xF0\x9F\xA9\xA1
<XIANGQI_RED_ELEPHANT> \xF0\x9F\xA9\xA2
@ -82801,6 +83339,22 @@ CHARMAP
<XIANGQI_BLACK_CHARIOT> \xF0\x9F\xA9\xAB
<XIANGQI_BLACK_CANNON> \xF0\x9F\xA9\xAC
<XIANGQI_BLACK_SOLDIER> \xF0\x9F\xA9\xAD
<BALLET_SHOES> \xF0\x9F\xA9\xB0
<ONE-PIECE_SWIMSUIT> \xF0\x9F\xA9\xB1
<BRIEFS> \xF0\x9F\xA9\xB2
<SHORTS> \xF0\x9F\xA9\xB3
<DROP_OF_BLOOD> \xF0\x9F\xA9\xB8
<ADHESIVE_BANDAGE> \xF0\x9F\xA9\xB9
<STETHOSCOPE> \xF0\x9F\xA9\xBA
<YO-YO> \xF0\x9F\xAA\x80
<KITE> \xF0\x9F\xAA\x81
<PARACHUTE> \xF0\x9F\xAA\x82
<RINGED_PLANET> \xF0\x9F\xAA\x90
<CHAIR> \xF0\x9F\xAA\x91
<RAZOR> \xF0\x9F\xAA\x92
<AXE> \xF0\x9F\xAA\x93
<DIYA_LAMP> \xF0\x9F\xAA\x94
<BANJO> \xF0\x9F\xAA\x95
<CJK_UNIFIED_IDEOGRAPH-20000> \xF0\xA0\x80\x80
<CJK_UNIFIED_IDEOGRAPH-20001> \xF0\xA0\x80\x81
<CJK_UNIFIED_IDEOGRAPH-20002> \xF0\xA0\x80\x82

View File

@ -0,0 +1,29 @@
--- UnicodeData.txt.orig 2020-06-29 14:05:49.483379000 +0900
+++ UnicodeData.txt 2020-06-29 14:12:09.808622000 +0900
@@ -12138,7 +12138,7 @@
33FE;IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE;So;0;L;<compat> 0033 0031 65E5;;;;N;;;;;
33FF;SQUARE GAL;So;0;ON;<square> 0067 0061 006C;;;;N;;;;;
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
-4DBF;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
+4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
4DC0;HEXAGRAM FOR THE CREATIVE HEAVEN;So;0;ON;;;;;N;;;;;
4DC1;HEXAGRAM FOR THE RECEPTIVE EARTH;So;0;ON;;;;;N;;;;;
4DC2;HEXAGRAM FOR DIFFICULTY AT THE BEGINNING;So;0;ON;;;;;N;;;;;
@@ -12204,7 +12204,7 @@
4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;;
4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;;
4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
-9FFC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
+9FEF;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;;
A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;;
A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;;
@@ -32901,7 +32901,7 @@
1FBF8;SEGMENTED DIGIT EIGHT;Nd;0;EN;<font> 0038;8;8;8;N;;;;;
1FBF9;SEGMENTED DIGIT NINE;Nd;0;EN;<font> 0039;9;9;9;N;;;;;
20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
-2A6DD;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
+2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;

View File

@ -460,6 +460,11 @@ sub transform_ctypes {
foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
next if ($enc eq $DEFENCODING);
$filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
if ($file eq 'ja_JP') {
# Override $filename for ja_JP because
# its CTYPE is not compatible with UTF-8.
$filename = "$UNIDIR/posix/$file.eucJP.src";
}
if (! -f $filename) {
print STDERR "Cannot open $filename\n";
next;

View File

@ -87,7 +87,7 @@ sub load_utf8_cm
{
my $file = shift;
open(UTF8, "$file") || die "open";
open(UTF8, "$file") || die "$!: open: $file";
while (<UTF8>) {
next if (/^#/);
@ -158,7 +158,8 @@ $mf = shift(@ARGV);
$codeset = shift(@ARGV);
my $max_mb;
load_utf8_cm("etc/final-maps/map.UTF-8");
my $etcdir = (exists $ENV{'ETCDIR'}) ? $ENV{'ETCDIR'} : "etc";
load_utf8_cm("${etcdir}/final-maps/map.UTF-8");
load_map($mf);

View File

@ -47,15 +47,21 @@ usage ()
$1 = "numericdef" -o $1 = "timedef" -o $1 = "ctypedef" ] || usage
self=$(realpath $0)
base=$(dirname ${self})
old=${base}/../${1}.draft
new=${base}/../${1}
TEMP=/tmp/${1}.locales
TEMP2=/tmp/${1}.hashes
TEMP3=/tmp/${1}.symlinks
TEMP4=/tmp/${1}.mapped
FULLMAP=/tmp/utf8-map
FULLEXTRACT=/tmp/extracted-names
base=${BASEDIR:-$(dirname ${self})}
: ${ETCDIR:=${base}/../etc}
: ${TOOLSDIR:=${base}}
: ${OUTBASEDIR:=${base}/../${1}}
: ${OLD_DIR:=${OUTBASEDIR}.draft}
: ${NEW_DIR:=${OUTBASEDIR}}
old=${OLD_DIR}
new=${NEW_DIR}
: ${TMPDIR:=/tmp}
TEMP=${TMPDIR}/${1}.locales
TEMP2=${TMPDIR}/${1}.hashes
TEMP3=${TMPDIR}/${1}.symlinks
TEMP4=${TMPDIR}/${1}.mapped
FULLMAP=${TMPDIR}/utf8-map
FULLEXTRACT=${TMPDIR}/extracted-names
AWKCMD="/## PLACEHOLDER/ { \
while ( getline line < \"${TEMP}\" ) {print line} } \
/## SYMPAIRS/ { \
@ -65,6 +71,7 @@ AWKCMD="/## PLACEHOLDER/ { \
!/## / { print \$0 }"
# Rename the sources with 3 components name into the POSIX version of the name using @modifier
mkdir -p $old $new
cd $old
pwd
for i in *_*_*.*.src; do
@ -142,13 +149,13 @@ then
rm -f ${TEMP2}
/usr/bin/sed -E -e 's/[ ]+/ /g' \
${UNIDIR}/posix/UTF-8.cm \
> ${base}/../etc/final-maps/map.UTF-8
> ${ETCDIR}/final-maps/map.UTF-8
/usr/bin/sed -E -e 's/[ ]+/ /g' \
${UNIDIR}/posix/eucCN.cm \
> ${base}/../etc/final-maps/map.eucCN
> ${ETCDIR}/final-maps/map.eucCN
/usr/bin/sed -E -e 's/[ ]+/ /g' \
${UNIDIR}/posix/eucCN.cm \
> ${base}/../etc/final-maps/map.GB2312
> ${ETCDIR}/final-maps/map.GB2312
# GB18030 and Big5 are pre-generated from CLDR data
CHARMAPS="ARMSCII-8 CP1131 CP1251 \
@ -160,10 +167,11 @@ then
for map in ${CHARMAPS}
do
encoding=${map}
/usr/local/bin/perl ${base}/convert_map.pl \
${base}/../etc/charmaps/${map}.TXT ${encoding} \
env ETCDIR="${ETCDIR}" \
/usr/local/bin/perl ${TOOLSDIR}/convert_map.pl \
${ETCDIR}/charmaps/${map}.TXT ${encoding} \
| /usr/bin/sed -E -e 's/ +/ /g' \
> ${base}/../etc/final-maps/map.${map}
> ${ETCDIR}/final-maps/map.${map}
echo map ${map} converted.
done

View File

@ -30,6 +30,7 @@
use strict;
use Getopt::Long;
use Encode qw(encode decode);
if ($#ARGV != 0) {
print "Usage: $0 --unidir=<unidir>\n";
@ -52,6 +53,23 @@ generate_footer ();
############################
sub utf8to32 {
my @kl = split /\\x/, $_[0];
shift @kl if ($kl[0] eq '');
my $k = pack('H2' x scalar @kl, @kl);
my $ux = encode('UTF-32BE', decode('UTF-8', $k));
my $u = uc(unpack('H*', $ux));
# Remove BOM
$u =~ s/^0000FEFF//;
# Remove heading bytes of 0
while ($u =~ m/^0/ and length($u) > 4) {
$u =~ s/^0//;
}
return $u;
}
sub get_utf8map {
my $file = shift;
@ -75,9 +93,10 @@ sub get_utf8map {
last if ($l eq "END CHARMAP");
$l =~ /^(<[^\s]+>)\s+(.*)/;
my $k = $2;
my $k = utf8to32($2); # UTF-8 char code
my $v = $1;
$k =~ s/\\x//g; # UTF-8 char code
# print STDERR "register: $k - $v\n";
$utf8map{$k} = $v;
}
}
@ -143,7 +162,7 @@ sub parse_unidata {
foreach my $l (@lines) {
my @d = split(/;/, $l, -1);
my $mb = wctomb($d[0]);
my $mb = $d[0];
my $cat;
# XXX There are code points present in UnicodeData.txt
@ -180,9 +199,9 @@ sub parse_unidata {
# Check if there's upper/lower mapping
if ($d[12] ne "") {
$data{'toupper'}{$mb} = wctomb($d[12]);
$data{'toupper'}{$mb} = $d[12];
} elsif ($d[13] ne "") {
$data{'tolower'}{$mb} = wctomb($d[13]);
$data{'tolower'}{$mb} = $d[13];
}
}
@ -193,7 +212,7 @@ sub parse_unidata {
foreach my $cat (sort keys (%data)) {
print FOUT "$cat\t";
$first = 1;
foreach my $mb (sort keys (%{$data{$cat}})) {
foreach my $mb (sort {hex($a) <=> hex($b)} keys (%{$data{$cat}})) {
if ($first == 1) {
$first = 0;
} elsif ($inrange == 1) {