From cc7edd258c2564fe9e3c4a0dc839acc4a71caff9 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sun, 8 Nov 2020 02:50:34 +0000 Subject: [PATCH] Add collation version support to querylocale(3). Provide a way to ask for an opaque version string for a locale_t, so that potential changes in sort order can be detected. Similar to ICU's ucol_getVersion() and Windows' GetNLSVersionEx(), this API is intended to allow databases to detect when text order-based indexes might need to be rebuilt. The CLDR version is extracted from CLDR source data by the Makefile under tools/tools/locale, written into the machine-generated Makefile under shared/colldef, passed to localedef -V, and then written into LC_COLLATE file headers. The initial version is 34.0. tools/tools/locale was recently updated to pull down 35.0, but the output hasn't been committed under share/colldef yet, so that will provide the first observable change when it happens. Other versioning schemes are possible in future, because the format is unspecified. Reviewed by: bapt, 0mp, kib, yuripv (albeit a long time ago) Differential Revision: https://reviews.freebsd.org/D17166 --- include/xlocale/_locale.h | 1 + lib/libc/locale/collate.c | 10 ++++-- lib/libc/locale/collate.h | 7 ++-- lib/libc/locale/querylocale.3 | 32 +++++++++++++++---- lib/libc/locale/xlocale.c | 19 ++++++++--- lib/libc/locale/xlocale_private.h | 5 +++ share/colldef/Makefile | 4 +++ tools/tools/locale/Makefile | 2 ++ tools/tools/locale/tools/cldr2def.pl | 19 +++++++++++ .../bootstrap/bootstrap_xlocale_private.h | 3 ++ usr.bin/localedef/collate.c | 13 +++++--- usr.bin/localedef/localedef.1 | 11 ++++++- usr.bin/localedef/localedef.c | 13 +++++++- usr.bin/localedef/localedef.h | 2 ++ 14 files changed, 119 insertions(+), 22 deletions(-) diff --git a/include/xlocale/_locale.h b/include/xlocale/_locale.h index a4e04f082fa8..c9543e576ac9 100644 --- a/include/xlocale/_locale.h +++ b/include/xlocale/_locale.h @@ -43,6 +43,7 @@ #define LC_MESSAGES_MASK (1<<5) #define LC_ALL_MASK (LC_COLLATE_MASK | LC_CTYPE_MASK | LC_MESSAGES_MASK | \ LC_MONETARY_MASK | LC_NUMERIC_MASK | LC_TIME_MASK) +#define LC_VERSION_MASK (1<<6) #define LC_GLOBAL_LOCALE ((locale_t)-1) #ifndef _LOCALE_T_DEFINED diff --git a/lib/libc/locale/collate.c b/lib/libc/locale/collate.c index c67f7bcd646e..c992d2299ab7 100644 --- a/lib/libc/locale/collate.c +++ b/lib/libc/locale/collate.c @@ -140,7 +140,9 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) (void) _close(fd); return (_LDP_ERROR); } - if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { + if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN + + XLOCALE_DEF_VERSION_LEN + + sizeof (info))) { (void) _close(fd); errno = EINVAL; return (_LDP_ERROR); @@ -151,12 +153,14 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) return (_LDP_ERROR); } - if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { + if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } - TMP += COLLATE_STR_LEN; + TMP += COLLATE_FMT_VERSION_LEN; + strlcat(table->header.version, TMP, sizeof (table->header.version)); + TMP += XLOCALE_DEF_VERSION_LEN; info = (void *)TMP; TMP += sizeof (*info); diff --git a/lib/libc/locale/collate.h b/lib/libc/locale/collate.h index 4abb1f936ae2..9983cdbd969d 100644 --- a/lib/libc/locale/collate.h +++ b/lib/libc/locale/collate.h @@ -53,7 +53,9 @@ #endif #define COLLATE_STR_LEN 24 /* should be 64-bit multiple */ -#define COLLATE_VERSION "BSD 1.0\n" + +#define COLLATE_FMT_VERSION_LEN 12 +#define COLLATE_FMT_VERSION "BSD 1.0\n" #define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */ #define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */ @@ -69,7 +71,8 @@ /* * The collate file format is as follows: * - * char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION + * char fmt_version[COLLATE_FMT_VERSION_LEN]; // must be COLLATE_FMT_VERSION + * char def_version[XLOCALE_DEF_VERSION_LEN]; // NUL-terminated, may be empty * collate_info_t info; // see below, includes padding * collate_char_pri_t char_data[256]; // 8 bit char values * collate_subst_t subst[*]; // 0 or more substitutions diff --git a/lib/libc/locale/querylocale.3 b/lib/libc/locale/querylocale.3 index d1bb688ed907..ecafee49a712 100644 --- a/lib/libc/locale/querylocale.3 +++ b/lib/libc/locale/querylocale.3 @@ -27,12 +27,12 @@ .\" .\" $FreeBSD$ .\" -.Dd May 3, 2013 +.Dd November 8, 2020 .Dt QUERYLOCALE 3 .Os .Sh NAME .Nm querylocale -.Nd Look up the locale name for a specified category +.Nd Look up the locale name or version for a specified category .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -40,11 +40,22 @@ .Ft const char * .Fn querylocale "int mask" "locale_t locale" .Sh DESCRIPTION -Returns the name of the locale for the category specified by +Returns the name or version of the locale for the category specified by .Fa mask . -This possible values for the mask are the same as those in -.Xr newlocale 3 . -If more than one bit in the mask is set, the returned value is undefined. +The possible values for the mask are the same as those in +.Xr newlocale 3 , +when requesting the locale name. +Specify the bitwise OR of +.Fa LC_VERSION_MASK +and another mask value to request a version string. +Version strings can be compared to detect changes to the locale's definition. +The structure of the version string is unspecified. +Currently, version information is only available for +.Fa LC_COLLATE_MASK , +and an empty string is returned for other categories. +If more than one bit in the mask is set, not counting +.Fa LC_VERSION_MASK , +the returned value is undefined. .Sh SEE ALSO .Xr duplocale 3 , .Xr freelocale 3 , @@ -52,3 +63,12 @@ If more than one bit in the mask is set, the returned value is undefined. .Xr newlocale 3 , .Xr uselocale 3 , .Xr xlocale 3 +.Sh HISTORY +The +.Fn querylocale +function first appeared in +.Fx 9.1 , +and is based on the function of the same name in Darwin. +.Fa LC_VERSION_MASK +first appeared in +.Fx 13.0 . diff --git a/lib/libc/locale/xlocale.c b/lib/libc/locale/xlocale.c index 465172fe24eb..fb674f86bbff 100644 --- a/lib/libc/locale/xlocale.c +++ b/lib/libc/locale/xlocale.c @@ -231,6 +231,8 @@ static int dupcomponent(int type, locale_t base, locale_t new) if (new->components[type]) { strncpy(new->components[type]->locale, src->locale, ENCODING_LEN); + strncpy(new->components[type]->version, src->version, + XLOCALE_DEF_VERSION_LEN); } } else if (base->components[type]) { new->components[type] = xlocale_retain(base->components[type]); @@ -346,17 +348,24 @@ freelocale(locale_t loc) } /* - * Returns the name of the locale for a particular component of a locale_t. + * Returns the name or version of the locale for a particular component of a + * locale_t. */ const char *querylocale(int mask, locale_t loc) { - int type = ffs(mask) - 1; + int type = ffs(mask & ~LC_VERSION_MASK) - 1; FIX_LOCALE(loc); if (type >= XLC_LAST) return (NULL); - if (loc->components[type]) - return (loc->components[type]->locale); - return ("C"); + if (mask & LC_VERSION_MASK) { + if (loc->components[type]) + return (loc->components[type]->version); + return (""); + } else { + if (loc->components[type]) + return (loc->components[type]->locale); + return ("C"); + } } /* diff --git a/lib/libc/locale/xlocale_private.h b/lib/libc/locale/xlocale_private.h index fc04c9dd43a3..391e375bc03d 100644 --- a/lib/libc/locale/xlocale_private.h +++ b/lib/libc/locale/xlocale_private.h @@ -91,6 +91,9 @@ struct xlocale_refcounted { /** Function used to destroy this component, if one is required*/ void(*destructor)(void*); }; + +#define XLOCALE_DEF_VERSION_LEN 12 + /** * Header for a locale component. All locale components must begin with this * header. @@ -99,6 +102,8 @@ struct xlocale_component { struct xlocale_refcounted header; /** Name of the locale used for this component. */ char locale[ENCODING_LEN+1]; + /** Version of the definition for this component. */ + char version[XLOCALE_DEF_VERSION_LEN]; }; /** diff --git a/share/colldef/Makefile b/share/colldef/Makefile index f8db9608c47b..e7c93d300c91 100644 --- a/share/colldef/Makefile +++ b/share/colldef/Makefile @@ -7,10 +7,13 @@ FILESNAME= LC_COLLATE .SUFFIXES: .src .LC_COLLATE MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps +CLDR_VERSION= "34.0" + .include .src.LC_COLLATE: localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.IMPSRC} \ + -V ${CLDR_VERSION} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} ${.OBJDIR}/${.IMPSRC:T:R} LOCALES+= af_ZA.UTF-8 @@ -227,6 +230,7 @@ FILES+= $t.LC_COLLATE FILESDIR_$t.LC_COLLATE= ${LOCALEDIR}/$t $t.LC_COLLATE: ${.CURDIR}/$f.src localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.ALLSRC} \ + -V ${CLDR_VERSION} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} \ ${.OBJDIR}/${.TARGET:T:R} .endfor diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile index aad2c2160bb5..76fff6acb17c 100644 --- a/tools/tools/locale/Makefile +++ b/tools/tools/locale/Makefile @@ -187,6 +187,8 @@ extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UNIDIR} cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T} extract: extract-${CLDRFILES_${N}:T} .endfor + grep 'name="version"' ${UNIDIR}/tools/build.xml | \ + sed 's/.* value="//;s/".*//' > ${UNIDIR}/cldr-version patch:: .if exists(${PATCHDIR}) cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl index 5f756cc3895a..8617ca81ca40 100755 --- a/tools/tools/locale/tools/cldr2def.pl +++ b/tools/tools/locale/tools/cldr2def.pl @@ -50,6 +50,8 @@ my $UNIDIR = undef; my $ETCDIR = undef; my $TYPE = undef; +my $CLDR_VERSION = undef; + my $result = GetOptions ( "unidir=s" => \$UNIDIR, "etc=s" => \$ETCDIR, @@ -500,6 +502,12 @@ EOF sub transform_collation { + # Read the CLDR version + open(FIN, "$UNIDIR/cldr-version") or die "Cannot open cldr-version"; + read FIN, $CLDR_VERSION, -s FIN; + close(FIN); + $CLDR_VERSION =~ s/\s*$//; + foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { @@ -861,8 +869,11 @@ sub make_makefile { my $SRCOUT4 = ""; my $MAPLOC; if ($TYPE eq "colldef") { + # In future, we might want to try to put the CLDR version into + # the .src files with some new syntax, instead of the makefile. $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.IMPSRC} \\\n" . + "\t-V \${CLDR_VERSION} \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . "\${.OBJDIR}/\${.IMPSRC:T:R}"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . @@ -875,6 +886,7 @@ sub make_makefile { "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.ALLSRC} \\\n" . + "\t-V \${CLDR_VERSION} \\\n" . "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . ".endfor\n\n"; @@ -917,6 +929,13 @@ FILESNAME= $FILESNAMES{$TYPE} ${MAPLOC} EOF + if ($TYPE eq "colldef") { + print FOUT < diff --git a/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h b/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h index 243139ff09b8..48203aaa0f61 100644 --- a/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h +++ b/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h @@ -51,4 +51,7 @@ struct localedef_bootstrap_xlocale_component { char unused; }; +/* This must agree with the definition in xlocale_private.h. */ +#define XLOCALE_DEF_VERSION_LEN 12 + #endif /* _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H */ diff --git a/usr.bin/localedef/collate.c b/usr.bin/localedef/collate.c index 3e3c7539dd8d..0bed283b6aab 100644 --- a/usr.bin/localedef/collate.c +++ b/usr.bin/localedef/collate.c @@ -1119,7 +1119,8 @@ dump_collate(void) collelem_t *ce; collchar_t *cc; subst_t *sb; - char vers[COLLATE_STR_LEN]; + char fmt_version[COLLATE_FMT_VERSION_LEN]; + char def_version[XLOCALE_DEF_VERSION_LEN]; collate_char_t chars[UCHAR_MAX + 1]; collate_large_t *large; collate_subst_t *subst[COLL_WEIGHTS_MAX]; @@ -1160,8 +1161,11 @@ dump_collate(void) } (void) memset(&chars, 0, sizeof (chars)); - (void) memset(vers, 0, COLLATE_STR_LEN); - (void) strlcpy(vers, COLLATE_VERSION, sizeof (vers)); + (void) memset(fmt_version, 0, COLLATE_FMT_VERSION_LEN); + (void) strlcpy(fmt_version, COLLATE_FMT_VERSION, sizeof (fmt_version)); + (void) memset(def_version, 0, XLOCALE_DEF_VERSION_LEN); + if (version) + (void) strlcpy(def_version, version, sizeof (def_version)); /* * We need to make sure we arrange for the UNDEFINED field @@ -1301,7 +1305,8 @@ dump_collate(void) collinfo.chain_count = htote(chain_count); collinfo.large_count = htote(large_count); - if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) || + if ((wr_category(fmt_version, COLLATE_FMT_VERSION_LEN, f) < 0) || + (wr_category(def_version, XLOCALE_DEF_VERSION_LEN, f) < 0) || (wr_category(&collinfo, sizeof (collinfo), f) < 0) || (wr_category(&chars, sizeof (chars), f) < 0)) { return; diff --git a/usr.bin/localedef/localedef.1 b/usr.bin/localedef/localedef.1 index 126193caebb3..f67672f9a0c6 100644 --- a/usr.bin/localedef/localedef.1 +++ b/usr.bin/localedef/localedef.1 @@ -33,7 +33,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 18, 2018 +.Dd November 8, 2020 .Dt LOCALEDEF 1 .Os .Sh NAME @@ -135,6 +135,14 @@ If not supplied, then default screen widths will be assumed, which will generally not account for East Asian encodings requiring more than a single character cell to display, nor for combining or accent marks that occupy no additional screen width. +.It Fl V Ar version +Specifies a version string describing the version of the locale definition. +This string can be retrieved with +.Xr querylocale 3 , +and is intended to allow applications to detect locale definition changes. +Currently it is stored only for the +.Sy LC_COLLATE +category. .El .Pp The following operands are required: @@ -198,6 +206,7 @@ If an error is detected, no permanent output will be created. .Xr locale 1 , .Xr iconv_open 3 , .Xr nl_langinfo 3 , +.Xr querylocale 3 , .Xr strftime 3 , .Xr environ 7 .Sh WARNINGS diff --git a/usr.bin/localedef/localedef.c b/usr.bin/localedef/localedef.c index 40b4ee58367f..390ebf8784ef 100644 --- a/usr.bin/localedef/localedef.c +++ b/usr.bin/localedef/localedef.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include "collate.h" #include "localedef.h" #include "parser.h" @@ -62,6 +63,7 @@ int undefok = 0; int warnok = 0; static char *locname = NULL; static char locpath[PATH_MAX]; +char *version = NULL; const char * category_name(void) @@ -253,6 +255,7 @@ usage(void) (void) fprintf(stderr, " -u encoding : assume encoding\n"); (void) fprintf(stderr, " -w widths : use screen widths file\n"); (void) fprintf(stderr, " -i locsrc : source file for locale\n"); + (void) fprintf(stderr, " -V version : version string for locale\n"); exit(4); } @@ -279,7 +282,7 @@ main(int argc, char **argv) (void) setlocale(LC_ALL, ""); - while ((c = getopt(argc, argv, "blw:i:cf:u:vUD")) != -1) { + while ((c = getopt(argc, argv, "blw:i:cf:u:vUDV:")) != -1) { switch (c) { case 'D': bsd = 1; @@ -314,6 +317,9 @@ main(int argc, char **argv) case '?': usage(); break; + case 'V': + version = optarg; + break; } } @@ -325,6 +331,11 @@ main(int argc, char **argv) (void) printf("Processing locale %s.\n", locname); } + if (version && strlen(version) >= XLOCALE_DEF_VERSION_LEN) { + (void) fprintf(stderr, "Version string too long.\n"); + exit(1); + } + if (cfname) { if (verbose) (void) printf("Loading charmap %s.\n", cfname); diff --git a/usr.bin/localedef/localedef.h b/usr.bin/localedef/localedef.h index b8c831f1d38d..ba95e27701d8 100644 --- a/usr.bin/localedef/localedef.h +++ b/usr.bin/localedef/localedef.h @@ -55,6 +55,8 @@ extern int undefok; /* mostly ignore undefined symbols */ extern int warnok; extern int warnings; +extern char *version; + int yylex(void); void yyerror(const char *); _Noreturn void errf(const char *, ...) __printflike(1, 2);