Cleanup locale tools:
- Simplify the source dir specification, and update README appropriately - Drop the LC (doonly) processing, it's broken, and even if fixed, not really useful - Don't remove the target directories while installing new data as it removes Makefile.depend which we don't manage; only rm the files we are going to add/replace/delete instead - Restrict adding bsd.endian.mk to colldef and ctypedef Makefiles, it's not needed in other (text-only) categories - GC unused scripts; they don't seem to be particularly helpful standalone as well Reviewed by: bapt Approved by: kib (mentor, implicit) Differential Revision: https://reviews.freebsd.org/D17858
This commit is contained in:
parent
aed0dab28a
commit
96845afdec
@ -10,13 +10,10 @@
|
||||
|
||||
.OBJDIR: .
|
||||
|
||||
.if !defined(CLDRDIR)
|
||||
CLDRDIR!= grep ^cldr etc/unicode.conf | cut -f 2 -d " "
|
||||
.if !defined(UNIDIR)
|
||||
.error UNIDIR is not set
|
||||
.endif
|
||||
.if !defined(UNIDATADIR)
|
||||
UNIDATADIR!= grep ^unidata etc/unicode.conf | cut -f 2 -d " "
|
||||
.endif
|
||||
PASSON= CLDRDIR="${CLDRDIR}" UNIDATADIR="${UNIDATADIR}"
|
||||
PASSON= UNIDIR="${UNIDIR}"
|
||||
|
||||
ETCDIR= ${.CURDIR}/etc
|
||||
|
||||
@ -49,12 +46,8 @@ COLLATIONS_SPECIAL_ENV+= ${area}.${enc}
|
||||
.endfor
|
||||
PASSON+= COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}"
|
||||
|
||||
.if defined(LC)
|
||||
LC:= --lc=${LC}
|
||||
.endif
|
||||
|
||||
all:
|
||||
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src
|
||||
cp ${ETCDIR}/common.UTF-8.src ${UNIDIR}/posix/xx_Comm_C.UTF-8.src
|
||||
.for t in ${TYPES}
|
||||
. if ${KNOWN:M${t}}
|
||||
test -d ${t} || mkdir ${t}
|
||||
@ -69,8 +62,9 @@ install: install-${t}
|
||||
install-${t}:
|
||||
. if ${KNOWN:M${t}}
|
||||
rm -rf ${.CURDIR}/${t}.draft
|
||||
rm -rf ${.CURDIR}/../../../share/${t}
|
||||
mv ${.CURDIR}/${t} ${.CURDIR}/../../../share/${t}
|
||||
rm -f ${.CURDIR}/../../../share/${t}/Makefile
|
||||
rm -f ${.CURDIR}/../../../share/${t}/*.src
|
||||
mv ${.CURDIR}/${t}/* ${.CURDIR}/../../../share/${t}/
|
||||
. endif
|
||||
.endfor
|
||||
|
||||
@ -86,10 +80,9 @@ post-install:
|
||||
gen-${t}:
|
||||
mkdir -p ${t} ${t}.draft
|
||||
perl -I tools tools/cldr2def.pl \
|
||||
--cldr=$$(realpath ${CLDRDIR}) \
|
||||
--unidata=$$(realpath ${UNIDATADIR}) \
|
||||
--unidir=$$(realpath ${UNIDIR}) \
|
||||
--etc=$$(realpath ${ETCDIR}) \
|
||||
--type=${t} ${LC}
|
||||
--type=${t}
|
||||
|
||||
build-${t}: gen-${t}
|
||||
env ${PASSON} tools/finalize ${t}
|
||||
@ -101,15 +94,16 @@ build-colldef: static-colldef
|
||||
|
||||
static-colldef:
|
||||
.for area enc in ${COLLATION_SPECIAL}
|
||||
awk -f tools/extract-colldef.awk ${CLDRDIR}/posix/${area}.${enc}.src > colldef.draft/${area}.${enc}.src
|
||||
awk -f tools/extract-colldef.awk ${UNIDIR}/posix/${area}.${enc}.src > \
|
||||
colldef.draft/${area}.${enc}.src
|
||||
.endfor
|
||||
|
||||
transfer-rollup:
|
||||
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src
|
||||
cp ${ETCDIR}/common.UTF-8.src ${UNIDIR}/posix/xx_Comm_C.UTF-8.src
|
||||
|
||||
rollup:
|
||||
perl -I tools tools/utf8-rollup.pl \
|
||||
--cldr=$$(realpath ${CLDRDIR}) \
|
||||
--unidir=$$(realpath ${UNIDIR}) \
|
||||
--etc=$$(realpath ${ETCDIR})
|
||||
|
||||
clean:
|
||||
@ -159,33 +153,33 @@ ENCODINGS= Big5 \
|
||||
|
||||
|
||||
POSIX:
|
||||
.if exists (${CLDRDIR}/tools/java/cldr.jar)
|
||||
mkdir -p ${CLDRDIR}/posix
|
||||
.if exists (${UNIDIR}/tools/java/cldr.jar)
|
||||
mkdir -p ${UNIDIR}/posix
|
||||
. for area in ${BASE_LOCALES_OF_INTEREST}
|
||||
. if !exists(${CLDRDIR}/posix/${area}.UTF-8.src)
|
||||
java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \
|
||||
. if !exists(${UNIDIR}/posix/${area}.UTF-8.src)
|
||||
java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \
|
||||
org.unicode.cldr.posix.GeneratePOSIX \
|
||||
-d ${CLDRDIR}/posix -m ${area} -c UTF-8
|
||||
-d ${UNIDIR}/posix -m ${area} -c UTF-8
|
||||
. endif
|
||||
. endfor
|
||||
. for area encoding in ${COLLATION_SPECIAL}
|
||||
. if !exists(${CLDRDIR}/posix/${area}.${encoding}.src)
|
||||
java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \
|
||||
. if !exists(${UNIDIR}/posix/${area}.${encoding}.src)
|
||||
java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \
|
||||
org.unicode.cldr.posix.GeneratePOSIX \
|
||||
-d ${CLDRDIR}/posix -m ${area} -c ${encoding}
|
||||
-d ${UNIDIR}/posix -m ${area} -c ${encoding}
|
||||
. endif
|
||||
. endfor
|
||||
. for enc in ${ENCODINGS}
|
||||
. if !exists(${CLDRDIR}/posix/${enc}.cm)
|
||||
java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \
|
||||
. if !exists(${UNIDIR}/posix/${enc}.cm)
|
||||
java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \
|
||||
org.unicode.cldr.posix.GenerateCharmap \
|
||||
-d ${CLDRDIR}/posix -c ${enc}
|
||||
-d ${UNIDIR}/posix -c ${enc}
|
||||
. endif
|
||||
. endfor
|
||||
.else
|
||||
@echo "Please install CLDR toolset for the desired release"
|
||||
@echo "It should go at ${CLDRDIR}/tools"
|
||||
@echo "It should go at ${UNIDIR}/tools"
|
||||
.endif
|
||||
|
||||
clean-POSIX:
|
||||
rm -f ${CLDRDIR}/posix/*
|
||||
rm -f ${UNIDIR}/posix/*
|
||||
|
@ -9,30 +9,23 @@ Tools needed:
|
||||
devel/p5-Tie-IxHash
|
||||
textproc/p5-XML-Parser
|
||||
|
||||
Fetch CLDR data from: http://unicode.org/Public/cldr/. You need all of the
|
||||
1. Fetch CLDR data from: http://unicode.org/Public/cldr/. You need all of the
|
||||
core.zip, keyboards.zip, and tools.zip.
|
||||
|
||||
Extract:
|
||||
mkdir -p ~/unicode/cldr/v33.0
|
||||
cd ~/unicode/cldr/v33.0
|
||||
unzip ~/core.zip ~/keyboards.zip ~/tools.zip
|
||||
|
||||
Fetch unidata (UCD.zip) from http://www.unicode.org/Public/zipped/latest.
|
||||
|
||||
Extract:
|
||||
mkdir -p ~/unicode/UNIDATA/11.0.0
|
||||
cd ~/unicode/UNIDATA/11.0.0
|
||||
2. Fetch unidata (UCD.zip) from http://www.unicode.org/Public/zipped/latest.
|
||||
3. Extract:
|
||||
mkdir -p ~/unicode
|
||||
cd ~/unicode
|
||||
unzip ~/core.zip
|
||||
unzip ~/keyboards.zip
|
||||
unzip ~/tools.zip
|
||||
unzip ~/UCD.zip
|
||||
|
||||
Either modify tools/tools/locales/etc/unicode.conf or export variables:
|
||||
CLDRDIR=~/unicode/cldr/v33.0; export CLDRDIR
|
||||
UNIDATADIR=~/unicode/UNIDATA/9.0.0; export UNIDATADIR
|
||||
|
||||
Build the CLDR tools:
|
||||
cd $CLDRDIR/tools/java
|
||||
4. Export variable:
|
||||
UNIDIR=~/unicode; export UNIDIR
|
||||
5. Build the CLDR tools:
|
||||
cd $UNIDIR/tools/java
|
||||
ant jar
|
||||
|
||||
Run:
|
||||
6. Build POSIX data files from CLDR data:
|
||||
make POSIX
|
||||
7. Build and install new locale data:
|
||||
make
|
||||
make install
|
||||
|
@ -1,4 +0,0 @@
|
||||
# $FreeBSD$
|
||||
|
||||
cldr ~/unicode/cldr/30.0.3
|
||||
unidata ~/unicode/UNIDATA/9.0.0
|
@ -1,78 +0,0 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use Data::Dumper;
|
||||
|
||||
if ($#ARGV != 1) {
|
||||
print "Usage: $0 <cldr dir> <input file>\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
open(FIN, "$ARGV[0]/posix/UTF-8.cm");
|
||||
my @lines = <FIN>;
|
||||
chomp(@lines);
|
||||
close(FIN);
|
||||
|
||||
my %cm = ();
|
||||
foreach my $line (@lines) {
|
||||
next if ($line =~ /^#/);
|
||||
next if ($line eq "");
|
||||
next if ($line !~ /^</);
|
||||
|
||||
my @a = split(" ", $line);
|
||||
next if ($#a != 1);
|
||||
|
||||
$a[1] =~ s/\\x//g;
|
||||
$a[0] =~ s/_/ /g;
|
||||
$cm{$a[1]} = $a[0] if (!defined $cm{$a[1]});
|
||||
}
|
||||
|
||||
open(FIN, $ARGV[1]);
|
||||
@lines = <FIN>;
|
||||
chomp(@lines);
|
||||
close(FIN);
|
||||
|
||||
foreach my $line (@lines) {
|
||||
if ($line =~ /^#/) {
|
||||
print "$line\n";
|
||||
next;
|
||||
}
|
||||
|
||||
my @l = split(//, $line);
|
||||
for (my $i = 0; $i <= $#l; $i++) {
|
||||
my $hex = sprintf("%X", ord($l[$i]));
|
||||
|
||||
if (( $l[$i] gt "\x20")
|
||||
&& ($l[$i] lt "a" || $l[$i] gt "z")
|
||||
&& ($l[$i] lt "A" || $l[$i] gt "Z")
|
||||
&& ($l[$i] lt "0" || $l[$i] gt "9")
|
||||
&& ($l[$i] lt "\x80")) {
|
||||
print $l[$i];
|
||||
next;
|
||||
}
|
||||
|
||||
if (defined $cm{$hex}) {
|
||||
print $cm{$hex};
|
||||
next;
|
||||
}
|
||||
|
||||
$hex = sprintf("%X%X", ord($l[$i]), ord($l[$i + 1]));
|
||||
if (defined $cm{$hex}) {
|
||||
$i += 1;
|
||||
print $cm{$hex};
|
||||
next;
|
||||
}
|
||||
|
||||
$hex = sprintf("%X%X%X",
|
||||
ord($l[$i]), ord($l[$i + 1]), ord($l[$i + 2 ]));
|
||||
if (defined $cm{$hex}) {
|
||||
$i += 2;
|
||||
print $cm{$hex};
|
||||
next;
|
||||
}
|
||||
|
||||
print "\n--$hex--\n";
|
||||
}
|
||||
print "\n";
|
||||
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
if ($#ARGV != 2) {
|
||||
print STDERR "Usage: $0 <charmap in> <charmap out> <offset>\n";
|
||||
print STDERR "offset should be in hex and can be prefixed with a -.\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
$fin = $ARGV[0];
|
||||
$fout = $ARGV[1];
|
||||
$offset = hex($ARGV[2]);
|
||||
|
||||
open(FIN, "$fin.TXT") or die "Cannot open $fin.TXT for reading";
|
||||
open(FOUT, ">$fout.TXT");
|
||||
|
||||
foreach my $l (<FIN>) {
|
||||
my @a = split(" ", $l);
|
||||
|
||||
if ($a[0] =~ /^0x[0-9a-fA-F]+$/) {
|
||||
my $c = length($a[0]);
|
||||
my $h = hex($a[0]) + $offset;
|
||||
|
||||
$l = sprintf("0x%*X%s", $c - 2, $h, substr($l, $c));
|
||||
}
|
||||
|
||||
print FOUT $l;
|
||||
}
|
||||
|
||||
close(FOUT);
|
||||
close(FIN);
|
@ -6,32 +6,27 @@ use File::Copy;
|
||||
use XML::Parser;
|
||||
use Tie::IxHash;
|
||||
use Text::Iconv;
|
||||
use Data::Dumper;
|
||||
#use Data::Dumper;
|
||||
use Getopt::Long;
|
||||
use Digest::SHA qw(sha1_hex);
|
||||
require "charmaps.pm";
|
||||
|
||||
|
||||
if ($#ARGV < 2) {
|
||||
print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
|
||||
print "Usage: $0 --unidir=<unidir> --etc=<etcdir> --type=<type>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $DEFENCODING = "UTF-8";
|
||||
my @filter = ();
|
||||
|
||||
my $CLDRDIR = undef;
|
||||
my $UNIDATADIR = undef;
|
||||
my $UNIDIR = undef;
|
||||
my $ETCDIR = undef;
|
||||
my $TYPE = undef;
|
||||
my $doonly = undef;
|
||||
|
||||
my $result = GetOptions (
|
||||
"cldr=s" => \$CLDRDIR,
|
||||
"unidata=s" => \$UNIDATADIR,
|
||||
"unidir=s" => \$UNIDIR,
|
||||
"etc=s" => \$ETCDIR,
|
||||
"type=s" => \$TYPE,
|
||||
"lc=s" => \$doonly
|
||||
);
|
||||
|
||||
my %convertors = ();
|
||||
@ -47,8 +42,8 @@ get_languages();
|
||||
|
||||
my %utf8map = ();
|
||||
my %utf8aliases = ();
|
||||
get_unidata($UNIDATADIR);
|
||||
get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
|
||||
get_unidata($UNIDIR);
|
||||
get_utf8map("$UNIDIR/posix/$DEFENCODING.cm");
|
||||
get_encodings("$ETCDIR/charmaps");
|
||||
|
||||
my %keys = ();
|
||||
@ -397,22 +392,6 @@ sub get_languages {
|
||||
%translations = %{$data{T}};
|
||||
%alternativemonths = %{$data{AM}};
|
||||
%encodings = %{$data{E}};
|
||||
|
||||
return if (!defined $doonly);
|
||||
|
||||
my @a = split(/_/, $doonly);
|
||||
if ($#a == 1) {
|
||||
$filter[0] = $a[0];
|
||||
$filter[1] = "x";
|
||||
$filter[2] = $a[1];
|
||||
} elsif ($#a == 2) {
|
||||
$filter[0] = $a[0];
|
||||
$filter[1] = $a[1];
|
||||
$filter[2] = $a[2];
|
||||
}
|
||||
|
||||
print Dumper(@filter);
|
||||
return;
|
||||
}
|
||||
|
||||
sub transform_ctypes {
|
||||
@ -422,8 +401,6 @@ sub transform_ctypes {
|
||||
foreach my $l (sort keys(%languages)) {
|
||||
foreach my $f (sort keys(%{$languages{$l}})) {
|
||||
foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
|
||||
next if ($#filter == 2 && ($filter[0] ne $l
|
||||
|| $filter[1] ne $f || $filter[2] ne $c));
|
||||
next if (defined $languages{$l}{$f}{definitions}
|
||||
&& $languages{$l}{$f}{definitions} !~ /$TYPE/);
|
||||
$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
|
||||
@ -432,7 +409,7 @@ sub transform_ctypes {
|
||||
$file .= "_" . $c if ($c ne "x");
|
||||
my $actfile = $file;
|
||||
|
||||
my $filename = "$CLDRDIR/posix/xx_Comm_C.UTF-8.src";
|
||||
my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src";
|
||||
if (! -f $filename) {
|
||||
print STDERR "Cannot open $filename\n";
|
||||
next;
|
||||
@ -455,7 +432,7 @@ sub transform_ctypes {
|
||||
close(FOUT);
|
||||
foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
|
||||
next if ($enc eq $DEFENCODING);
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
$filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
|
||||
if (! -f $filename) {
|
||||
print STDERR "Cannot open $filename\n";
|
||||
next;
|
||||
@ -494,8 +471,6 @@ sub transform_collation {
|
||||
foreach my $l (sort keys(%languages)) {
|
||||
foreach my $f (sort keys(%{$languages{$l}})) {
|
||||
foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
|
||||
next if ($#filter == 2 && ($filter[0] ne $l
|
||||
|| $filter[1] ne $f || $filter[2] ne $c));
|
||||
next if (defined $languages{$l}{$f}{definitions}
|
||||
&& $languages{$l}{$f}{definitions} !~ /$TYPE/);
|
||||
$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
|
||||
@ -505,15 +480,15 @@ sub transform_collation {
|
||||
$file .= $c;
|
||||
my $actfile = $file;
|
||||
|
||||
my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
|
||||
$filename = "$ETCDIR/$file.$DEFENCODING.src"
|
||||
if (! -f $filename);
|
||||
if (! -f $filename
|
||||
&& defined $languages{$l}{$f}{fallback}) {
|
||||
$file = $languages{$l}{$f}{fallback};
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
$filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
|
||||
}
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
|
||||
$filename = "$UNIDIR/posix/$file.$DEFENCODING.src"
|
||||
if (! -f $filename);
|
||||
if (! -f $filename) {
|
||||
print STDERR
|
||||
@ -564,8 +539,6 @@ sub get_fields {
|
||||
foreach my $l (sort keys(%languages)) {
|
||||
foreach my $f (sort keys(%{$languages{$l}})) {
|
||||
foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
|
||||
next if ($#filter == 2 && ($filter[0] ne $l
|
||||
|| $filter[1] ne $f || $filter[2] ne $c));
|
||||
next if (defined $languages{$l}{$f}{definitions}
|
||||
&& $languages{$l}{$f}{definitions} !~ /$TYPE/);
|
||||
|
||||
@ -575,15 +548,15 @@ sub get_fields {
|
||||
$file .= $f . "_" if ($f ne "x");
|
||||
$file .= $c;
|
||||
|
||||
my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
|
||||
$filename = "$ETCDIR/$file.$DEFENCODING.src"
|
||||
if (! -f $filename);
|
||||
if (! -f $filename
|
||||
&& defined $languages{$l}{$f}{fallback}) {
|
||||
$file = $languages{$l}{$f}{fallback};
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
$filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
|
||||
}
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
|
||||
$filename = "$UNIDIR/posix/$file.$DEFENCODING.src"
|
||||
if (! -f $filename);
|
||||
if (! -f $filename) {
|
||||
print STDERR
|
||||
@ -703,8 +676,6 @@ sub print_fields {
|
||||
foreach my $l (sort keys(%languages)) {
|
||||
foreach my $f (sort keys(%{$languages{$l}})) {
|
||||
foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
|
||||
next if ($#filter == 2 && ($filter[0] ne $l
|
||||
|| $filter[1] ne $f || $filter[2] ne $c));
|
||||
next if (defined $languages{$l}{$f}{definitions}
|
||||
&& $languages{$l}{$f}{definitions} !~ /$TYPE/);
|
||||
foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
|
||||
@ -851,7 +822,6 @@ EOF
|
||||
}
|
||||
|
||||
sub make_makefile {
|
||||
return if ($#filter > -1);
|
||||
print "Creating Makefile for $TYPE\n";
|
||||
my $SRCOUT;
|
||||
my $SRCOUT2;
|
||||
@ -913,8 +883,16 @@ LOCALEDIR= \${SHAREDIR}/locale
|
||||
FILESNAME= $FILESNAMES{$TYPE}
|
||||
.SUFFIXES: .src .${SRCOUT2}
|
||||
${MAPLOC}
|
||||
EOF
|
||||
|
||||
if ($TYPE eq "colldef" || $TYPE eq "ctypedef") {
|
||||
print FOUT <<EOF;
|
||||
.include <bsd.endian.mk>
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
print FOUT <<EOF;
|
||||
.src.${SRCOUT2}:
|
||||
$SRCOUT
|
||||
|
||||
@ -979,8 +957,6 @@ EOF
|
||||
foreach my $l (sort keys(%languages)) {
|
||||
foreach my $f (sort keys(%{$languages{$l}})) {
|
||||
foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
|
||||
next if ($#filter == 2 && ($filter[0] ne $l
|
||||
|| $filter[1] ne $f || $filter[2] ne $c));
|
||||
next if (defined $languages{$l}{$f}{definitions}
|
||||
&& $languages{$l}{$f}{definitions} !~ /$TYPE/);
|
||||
if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
|
||||
|
@ -116,13 +116,13 @@ then
|
||||
else {last1 = $1; last2 = $2}}' ${TEMP2} > ${TEMP3}
|
||||
rm -f ${TEMP2}
|
||||
/usr/bin/sed -E -e 's/[ ]+/ /g' \
|
||||
${CLDRDIR}/posix/UTF-8.cm \
|
||||
${UNIDIR}/posix/UTF-8.cm \
|
||||
> ${base}/../etc/final-maps/map.UTF-8
|
||||
/usr/bin/sed -E -e 's/[ ]+/ /g' \
|
||||
${CLDRDIR}/posix/eucCN.cm \
|
||||
${UNIDIR}/posix/eucCN.cm \
|
||||
> ${base}/../etc/final-maps/map.eucCN
|
||||
/usr/bin/sed -E -e 's/[ ]+/ /g' \
|
||||
${CLDRDIR}/posix/eucCN.cm \
|
||||
${UNIDIR}/posix/eucCN.cm \
|
||||
> ${base}/../etc/final-maps/map.GB2312
|
||||
CHARMAPS="ARMSCII-8 Big5 CP1131 CP1251 \
|
||||
CP866 GBK ISCII-DEV ISO8859-1 \
|
||||
|
@ -1,144 +0,0 @@
|
||||
#!/usr/bin/perl -wC
|
||||
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
use strict;
|
||||
use XML::Parser;
|
||||
use Tie::IxHash;
|
||||
use Data::Dumper;
|
||||
use Getopt::Long;
|
||||
use Digest::SHA qw(sha1_hex);
|
||||
|
||||
|
||||
if ($#ARGV < 2) {
|
||||
print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --input=<inputfile> --output=<outputfile>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my @filter = ();
|
||||
|
||||
my $CLDRDIR = undef;
|
||||
my $UNIDATADIR = undef;
|
||||
my $ETCDIR = undef;
|
||||
my $TYPE = undef;
|
||||
my $INPUT = undef;
|
||||
my $OUTPUT = undef;
|
||||
|
||||
my $result = GetOptions (
|
||||
"cldr=s" => \$CLDRDIR,
|
||||
"unidata=s" => \$UNIDATADIR,
|
||||
"etc=s" => \$ETCDIR,
|
||||
"type=s" => \$TYPE,
|
||||
"input=s" => \$INPUT,
|
||||
"output=s" => \$OUTPUT,
|
||||
);
|
||||
|
||||
my %ucd = ();
|
||||
my %utf8map = ();
|
||||
my %utf8aliases = ();
|
||||
get_unidata($UNIDATADIR);
|
||||
get_utf8map("$CLDRDIR/posix/UTF-8.cm");
|
||||
convert($INPUT, $OUTPUT);
|
||||
|
||||
############################
|
||||
|
||||
sub get_unidata {
|
||||
my $directory = shift;
|
||||
|
||||
open(FIN, "$directory/UnicodeData.txt")
|
||||
or die("Cannot open $directory/UnicodeData.txt");;
|
||||
my @lines = <FIN>;
|
||||
chomp(@lines);
|
||||
close(FIN);
|
||||
|
||||
foreach my $l (@lines) {
|
||||
my @a = split(/;/, $l);
|
||||
|
||||
$ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name
|
||||
$ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code
|
||||
}
|
||||
}
|
||||
|
||||
sub get_utf8map {
|
||||
my $file = shift;
|
||||
|
||||
open(FIN, $file);
|
||||
my @lines = <FIN>;
|
||||
close(FIN);
|
||||
chomp(@lines);
|
||||
|
||||
my $prev_k = undef;
|
||||
my $prev_v = "";
|
||||
my $incharmap = 0;
|
||||
foreach my $l (@lines) {
|
||||
$l =~ s/\r//;
|
||||
next if ($l =~ /^\#/);
|
||||
next if ($l eq "");
|
||||
|
||||
if ($l eq "CHARMAP") {
|
||||
$incharmap = 1;
|
||||
next;
|
||||
}
|
||||
|
||||
next if (!$incharmap);
|
||||
last if ($l eq "END CHARMAP");
|
||||
|
||||
$l =~ /^<([^\s]+)>\s+(.*)/;
|
||||
my $k = $1;
|
||||
my $v = $2;
|
||||
$k =~ s/_/ /g; # unicode char string
|
||||
$v =~ s/\\x//g; # UTF-8 char code
|
||||
$utf8map{$k} = $v;
|
||||
|
||||
$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
|
||||
|
||||
$prev_v = $v;
|
||||
$prev_k = $k;
|
||||
}
|
||||
}
|
||||
|
||||
sub decode_cldr {
|
||||
my $s = shift;
|
||||
|
||||
my $v = $utf8map{$s};
|
||||
$v = $utf8aliases{$s} if (!defined $v);
|
||||
die "Cannot convert $s" if (!defined $v);
|
||||
|
||||
return pack("C", hex($v)) if (length($v) == 2);
|
||||
return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
|
||||
if (length($v) == 4);
|
||||
return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
|
||||
hex(substr($v, 4, 2))) if (length($v) == 6);
|
||||
print STDERR "Cannot convert $s\n";
|
||||
return "length = " . length($v);
|
||||
}
|
||||
|
||||
sub convert {
|
||||
my $IN = shift;
|
||||
my $OUT = shift;
|
||||
|
||||
open(FIN, "$IN");
|
||||
open(FOUT, ">$OUT");
|
||||
|
||||
# print Dumper(%utf8map);
|
||||
|
||||
my $l;
|
||||
while (defined ($l = <FIN>)) {
|
||||
chomp($l);
|
||||
|
||||
if ($l =~ /^#/) {
|
||||
print FOUT $l, "\n";
|
||||
next;
|
||||
}
|
||||
|
||||
while ($l =~ /^(.*?)<(.*?)>(.*)$/) {
|
||||
$l = $1 . decode_cldr($2) . $3;
|
||||
}
|
||||
print FOUT $l, "\n";
|
||||
}
|
||||
|
||||
close(FOUT);
|
||||
close(FIN);
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
#!/usr/local/bin/perl -wC
|
||||
# $FreeBSD$
|
||||
|
||||
use strict;
|
||||
#use File::Copy;
|
||||
@ -11,15 +12,15 @@ use Getopt::Long;
|
||||
|
||||
|
||||
if ($#ARGV != 1) {
|
||||
print "Usage: $0 --cldr=<cldrdir> --etc=<etcdir>\n";
|
||||
print "Usage: $0 --unidir=<unidir> --etc=<etcdir>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $CLDRDIR = undef;
|
||||
my $UNIDIR = undef;
|
||||
my $ETCDIR = undef;
|
||||
|
||||
my $result = GetOptions (
|
||||
"cldr=s" => \$CLDRDIR,
|
||||
"unidir=s" => \$UNIDIR,
|
||||
"etc=s" => \$ETCDIR,
|
||||
);
|
||||
|
||||
@ -118,7 +119,7 @@ my $outfilename = "$ETCDIR/common.UTF-8.src";
|
||||
my $manual_file = "$ETCDIR/manual-input.UTF-8";
|
||||
my $stars = "**********************************************************************\n";
|
||||
|
||||
get_utf8map("$CLDRDIR/posix/UTF-8.cm");
|
||||
get_utf8map("$UNIDIR/posix/UTF-8.cm");
|
||||
generate_header ();
|
||||
generate_sections ();
|
||||
generate_footer ();
|
||||
@ -252,7 +253,7 @@ sub compress_ctype {
|
||||
|
||||
my @lines = initialize_lines ($territory);
|
||||
|
||||
my $filename = "$CLDRDIR/posix/$territory.UTF-8.src";
|
||||
my $filename = "$UNIDIR/posix/$territory.UTF-8.src";
|
||||
if (! -f $filename) {
|
||||
print STDERR "Cannot open $filename\n";
|
||||
return;
|
||||
|
@ -1,29 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
UNIDATA=$(grep ^unidata etc/unicode.conf | cut -f 2 -d " ")
|
||||
UTF8=$(grep ^cldr etc/unicode.conf | cut -f 2 -d " ")/UTF-8.cm
|
||||
CHARMAPS=etc/charmaps
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: $0 <unicode string>"
|
||||
exit
|
||||
fi
|
||||
|
||||
UCS=$*
|
||||
UCS_=$(echo $* | sed -e 's/ /./g')
|
||||
echo UCS: ${UCS}
|
||||
|
||||
echo UTF-8.cm:
|
||||
grep "${UCS_}" ${UTF8} | sed -e 's/ */ /g'
|
||||
|
||||
echo UNIDATA:
|
||||
grep "${UCS_}" ${UNIDATA}
|
||||
L=$(grep "${UCS_}" ${UNIDATA})
|
||||
|
||||
echo UCC:
|
||||
grep "${UCS_}" ${UNIDATA} | awk -F\; '{ print $1 }'
|
||||
|
||||
|
||||
echo CHARMAPS:
|
||||
grep ${UCS_} ${CHARMAPS}/* | sed -e "s|${CHARMAPS}/||g"
|
||||
grep ${UCC} ${CHARMAPS}/* | sed -e "s|${CHARMAPS}/||g"
|
Loading…
x
Reference in New Issue
Block a user