Add hybrid C.UTF-8 locale being identical to default C locale except

that it uses the same ctype maps and functions as other UTF-8 locales.

Reviewed by:	bapt, cem, eadler
Approved by:	kib (mentor, implicit)
Differential Revision:	https://reviews.freebsd.org/D17833
This commit is contained in:
yuripv 2018-11-04 22:13:22 +00:00
parent 57ccf4b76b
commit b6fca3ee80
6 changed files with 99 additions and 92 deletions

View File

@ -84,7 +84,8 @@ destruct_collate(void *t)
void *
__collate_load(const char *encoding, __unused locale_t unused)
{
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 ||
strncmp(encoding, "C.", 2) == 0) {
return &__xlocale_C_collate;
}
struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1);
@ -122,7 +123,8 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
table->__collate_load_error = 1;
/* 'encoding' must be already checked. */
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 ||
strncmp(encoding, "C.", 2) == 0) {
return (_LDP_CACHE);
}

View File

@ -63,7 +63,8 @@ __part_load_locale(const char *name,
size_t namesize, bufsize;
/* 'name' must be already checked. */
if (strcmp(name, "C") == 0 || strcmp(name, "POSIX") == 0) {
if (strcmp(name, "C") == 0 || strcmp(name, "POSIX") == 0 ||
strncmp(name, "C.", 2) == 0) {
*using_locale = 0;
return (_LDP_CACHE);
}

View File

@ -14,6 +14,7 @@ MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps
-f ${MAPLOC}/map.${.IMPSRC:T:R:E} \
-i ${.IMPSRC} ${.OBJDIR}/${.IMPSRC:T:R} || true
LOCALES+= C.UTF-8
LOCALES+= be_BY.CP1131
LOCALES+= ca_IT.ISO8859-1
LOCALES+= ca_IT.ISO8859-15
@ -21,7 +22,6 @@ LOCALES+= el_GR.ISO8859-7
LOCALES+= en_US.ISO8859-1
LOCALES+= en_US.ISO8859-15
LOCALES+= en_US.US-ASCII
LOCALES+= en_US.UTF-8
LOCALES+= hi_IN.ISCII-DEV
LOCALES+= hy_AM.ARMSCII-8
LOCALES+= ja_JP.SJIS
@ -45,82 +45,83 @@ LOCALES+= zh_TW.Big5
SAME+= en_US.UTF-8 ru_RU.UTF-8
SAME+= en_US.UTF-8 zh_TW.UTF-8
SAME+= en_US.UTF-8 zh_HK.UTF-8
SAME+= en_US.UTF-8 zh_CN.UTF-8
SAME+= en_US.UTF-8 uk_UA.UTF-8
SAME+= en_US.UTF-8 tr_TR.UTF-8
SAME+= en_US.UTF-8 sv_SE.UTF-8
SAME+= en_US.UTF-8 sv_FI.UTF-8
SAME+= en_US.UTF-8 sr_RS.UTF-8@latin
SAME+= en_US.UTF-8 sr_RS.UTF-8
SAME+= en_US.UTF-8 sl_SI.UTF-8
SAME+= en_US.UTF-8 sk_SK.UTF-8
SAME+= en_US.UTF-8 se_NO.UTF-8
SAME+= en_US.UTF-8 se_FI.UTF-8
SAME+= en_US.UTF-8 ro_RO.UTF-8
SAME+= en_US.UTF-8 pt_PT.UTF-8
SAME+= en_US.UTF-8 pt_BR.UTF-8
SAME+= en_US.UTF-8 pl_PL.UTF-8
SAME+= en_US.UTF-8 nn_NO.UTF-8
SAME+= en_US.UTF-8 nl_NL.UTF-8
SAME+= en_US.UTF-8 nl_BE.UTF-8
SAME+= en_US.UTF-8 nb_NO.UTF-8
SAME+= en_US.UTF-8 mn_MN.UTF-8
SAME+= en_US.UTF-8 lv_LV.UTF-8
SAME+= en_US.UTF-8 lt_LT.UTF-8
SAME+= en_US.UTF-8 ko_KR.UTF-8
SAME+= en_US.UTF-8 kk_KZ.UTF-8
SAME+= en_US.UTF-8 ja_JP.UTF-8
SAME+= en_US.UTF-8 it_IT.UTF-8
SAME+= en_US.UTF-8 it_CH.UTF-8
SAME+= en_US.UTF-8 is_IS.UTF-8
SAME+= en_US.UTF-8 hy_AM.UTF-8
SAME+= en_US.UTF-8 hu_HU.UTF-8
SAME+= en_US.UTF-8 hr_HR.UTF-8
SAME+= en_US.UTF-8 hi_IN.UTF-8
SAME+= en_US.UTF-8 he_IL.UTF-8
SAME+= en_US.UTF-8 fr_FR.UTF-8
SAME+= en_US.UTF-8 fr_CH.UTF-8
SAME+= en_US.UTF-8 fr_CA.UTF-8
SAME+= en_US.UTF-8 fr_BE.UTF-8
SAME+= en_US.UTF-8 fi_FI.UTF-8
SAME+= en_US.UTF-8 eu_ES.UTF-8
SAME+= en_US.UTF-8 et_EE.UTF-8
SAME+= en_US.UTF-8 es_MX.UTF-8
SAME+= en_US.UTF-8 es_ES.UTF-8
SAME+= en_US.UTF-8 es_CR.UTF-8
SAME+= en_US.UTF-8 es_AR.UTF-8
SAME+= en_US.UTF-8 en_ZA.UTF-8
SAME+= en_US.UTF-8 en_SG.UTF-8
SAME+= en_US.UTF-8 en_PH.UTF-8
SAME+= en_US.UTF-8 en_NZ.UTF-8
SAME+= en_US.UTF-8 en_IE.UTF-8
SAME+= en_US.UTF-8 en_HK.UTF-8
SAME+= en_US.UTF-8 en_GB.UTF-8
SAME+= en_US.UTF-8 en_CA.UTF-8
SAME+= en_US.UTF-8 en_AU.UTF-8
SAME+= en_US.UTF-8 el_GR.UTF-8
SAME+= en_US.UTF-8 de_DE.UTF-8
SAME+= en_US.UTF-8 de_CH.UTF-8
SAME+= en_US.UTF-8 de_AT.UTF-8
SAME+= en_US.UTF-8 da_DK.UTF-8
SAME+= en_US.UTF-8 cs_CZ.UTF-8
SAME+= en_US.UTF-8 ca_IT.UTF-8
SAME+= en_US.UTF-8 ca_FR.UTF-8
SAME+= en_US.UTF-8 ca_ES.UTF-8
SAME+= en_US.UTF-8 ca_AD.UTF-8
SAME+= en_US.UTF-8 bg_BG.UTF-8
SAME+= en_US.UTF-8 be_BY.UTF-8
SAME+= en_US.UTF-8 ar_SA.UTF-8
SAME+= en_US.UTF-8 ar_QA.UTF-8
SAME+= en_US.UTF-8 ar_MA.UTF-8
SAME+= en_US.UTF-8 ar_JO.UTF-8
SAME+= en_US.UTF-8 ar_EG.UTF-8
SAME+= en_US.UTF-8 ar_AE.UTF-8
SAME+= en_US.UTF-8 am_ET.UTF-8
SAME+= en_US.UTF-8 af_ZA.UTF-8
SAME+= C.UTF-8 en_US.UTF-8
SAME+= C.UTF-8 ru_RU.UTF-8
SAME+= C.UTF-8 zh_TW.UTF-8
SAME+= C.UTF-8 zh_HK.UTF-8
SAME+= C.UTF-8 zh_CN.UTF-8
SAME+= C.UTF-8 uk_UA.UTF-8
SAME+= C.UTF-8 tr_TR.UTF-8
SAME+= C.UTF-8 sv_SE.UTF-8
SAME+= C.UTF-8 sv_FI.UTF-8
SAME+= C.UTF-8 sr_RS.UTF-8@latin
SAME+= C.UTF-8 sr_RS.UTF-8
SAME+= C.UTF-8 sl_SI.UTF-8
SAME+= C.UTF-8 sk_SK.UTF-8
SAME+= C.UTF-8 se_NO.UTF-8
SAME+= C.UTF-8 se_FI.UTF-8
SAME+= C.UTF-8 ro_RO.UTF-8
SAME+= C.UTF-8 pt_PT.UTF-8
SAME+= C.UTF-8 pt_BR.UTF-8
SAME+= C.UTF-8 pl_PL.UTF-8
SAME+= C.UTF-8 nn_NO.UTF-8
SAME+= C.UTF-8 nl_NL.UTF-8
SAME+= C.UTF-8 nl_BE.UTF-8
SAME+= C.UTF-8 nb_NO.UTF-8
SAME+= C.UTF-8 mn_MN.UTF-8
SAME+= C.UTF-8 lv_LV.UTF-8
SAME+= C.UTF-8 lt_LT.UTF-8
SAME+= C.UTF-8 ko_KR.UTF-8
SAME+= C.UTF-8 kk_KZ.UTF-8
SAME+= C.UTF-8 ja_JP.UTF-8
SAME+= C.UTF-8 it_IT.UTF-8
SAME+= C.UTF-8 it_CH.UTF-8
SAME+= C.UTF-8 is_IS.UTF-8
SAME+= C.UTF-8 hy_AM.UTF-8
SAME+= C.UTF-8 hu_HU.UTF-8
SAME+= C.UTF-8 hr_HR.UTF-8
SAME+= C.UTF-8 hi_IN.UTF-8
SAME+= C.UTF-8 he_IL.UTF-8
SAME+= C.UTF-8 fr_FR.UTF-8
SAME+= C.UTF-8 fr_CH.UTF-8
SAME+= C.UTF-8 fr_CA.UTF-8
SAME+= C.UTF-8 fr_BE.UTF-8
SAME+= C.UTF-8 fi_FI.UTF-8
SAME+= C.UTF-8 eu_ES.UTF-8
SAME+= C.UTF-8 et_EE.UTF-8
SAME+= C.UTF-8 es_MX.UTF-8
SAME+= C.UTF-8 es_ES.UTF-8
SAME+= C.UTF-8 es_CR.UTF-8
SAME+= C.UTF-8 es_AR.UTF-8
SAME+= C.UTF-8 en_ZA.UTF-8
SAME+= C.UTF-8 en_SG.UTF-8
SAME+= C.UTF-8 en_PH.UTF-8
SAME+= C.UTF-8 en_NZ.UTF-8
SAME+= C.UTF-8 en_IE.UTF-8
SAME+= C.UTF-8 en_HK.UTF-8
SAME+= C.UTF-8 en_GB.UTF-8
SAME+= C.UTF-8 en_CA.UTF-8
SAME+= C.UTF-8 en_AU.UTF-8
SAME+= C.UTF-8 el_GR.UTF-8
SAME+= C.UTF-8 de_DE.UTF-8
SAME+= C.UTF-8 de_CH.UTF-8
SAME+= C.UTF-8 de_AT.UTF-8
SAME+= C.UTF-8 da_DK.UTF-8
SAME+= C.UTF-8 cs_CZ.UTF-8
SAME+= C.UTF-8 ca_IT.UTF-8
SAME+= C.UTF-8 ca_FR.UTF-8
SAME+= C.UTF-8 ca_ES.UTF-8
SAME+= C.UTF-8 ca_AD.UTF-8
SAME+= C.UTF-8 bg_BG.UTF-8
SAME+= C.UTF-8 be_BY.UTF-8
SAME+= C.UTF-8 ar_SA.UTF-8
SAME+= C.UTF-8 ar_QA.UTF-8
SAME+= C.UTF-8 ar_MA.UTF-8
SAME+= C.UTF-8 ar_JO.UTF-8
SAME+= C.UTF-8 ar_EG.UTF-8
SAME+= C.UTF-8 ar_AE.UTF-8
SAME+= C.UTF-8 am_ET.UTF-8
SAME+= C.UTF-8 af_ZA.UTF-8
SAME+= en_US.ISO8859-1 sv_SE.ISO8859-1
SAME+= en_US.ISO8859-1 sv_FI.ISO8859-1
SAME+= en_US.ISO8859-1 pt_PT.ISO8859-1

View File

@ -54,7 +54,7 @@ LC:= --lc=${LC}
.endif
all:
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src
.for t in ${TYPES}
. if ${KNOWN:M${t}}
test -d ${t} || mkdir ${t}
@ -105,7 +105,7 @@ static-colldef:
.endfor
transfer-rollup:
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src
rollup:
perl -I tools tools/utf8-rollup.pl \

View File

@ -416,6 +416,9 @@ sub get_languages {
}
sub transform_ctypes {
# Add the C.UTF-8
$languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef;
foreach my $l (sort keys(%languages)) {
foreach my $f (sort keys(%{$languages{$l}})) {
foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
@ -424,13 +427,12 @@ sub transform_ctypes {
next if (defined $languages{$l}{$f}{definitions}
&& $languages{$l}{$f}{definitions} !~ /$TYPE/);
$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
my $file;
$file = $l . "_";
$file .= $f . "_" if ($f ne "x");
$file .= $c;
my $file = $l;
$file .= "_" . $f if ($f ne "x");
$file .= "_" . $c if ($c ne "x");
my $actfile = $file;
my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
my $filename = "$CLDRDIR/posix/xx_Comm_C.UTF-8.src";
if (! -f $filename) {
print STDERR "Cannot open $filename\n";
next;
@ -939,8 +941,8 @@ EOF
} keys(%{$hashtable{$hash}});
} elsif ($TYPE eq "ctypedef") {
@files = sort {
if ($a eq 'en_x_US.UTF-8') { return -1; }
elsif ($b eq 'en_x_US.UTF-8') { return 1; }
if ($a eq 'C_x_x.UTF-8') { return -1; }
elsif ($b eq 'C_x_x.UTF-8') { return 1; }
if ($a =~ /^en_x_US/) { return -1; }
elsif ($b =~ /^en_x_US/) { return 1; }
@ -962,6 +964,7 @@ EOF
}
if ($#files > 0) {
my $link = shift(@files);
$link =~ s/_x_x//; # special case for C
$link =~ s/_x_/_/; # strip family if none there
foreach my $file (@files) {
my @a = split(/_/, $file);
@ -987,9 +990,9 @@ EOF
next;
}
foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
my $file = $l . "_";
$file .= $f . "_" if ($f ne "x");
$file .= $c;
my $file = $l;
$file .= "_" . $f if ($f ne "x");
$file .= "_" . $c if ($c ne "x");
next if (!defined $languages{$l}{$f}{data}{$c}{$e});
print FOUT "LOCALES+=\t$file.$e\n";
}