Add collation version support to querylocale(3).

Provide a way to ask for an opaque version string for a locale_t, so
that potential changes in sort order can be detected.  Similar to
ICU's ucol_getVersion() and Windows' GetNLSVersionEx(), this API is
intended to allow databases to detect when text order-based indexes
might need to be rebuilt.

The CLDR version is extracted from CLDR source data by the Makefile
under tools/tools/locale, written into the machine-generated Makefile
under shared/colldef, passed to localedef -V, and then written into
LC_COLLATE file headers.  The initial version is 34.0.
tools/tools/locale was recently updated to pull down 35.0, but the
output hasn't been committed under share/colldef yet, so that will
provide the first observable change when it happens.  Other versioning
schemes are possible in future, because the format is unspecified.

Reviewed by:	bapt, 0mp, kib, yuripv (albeit a long time ago)
Differential Revision:	https://reviews.freebsd.org/D17166
This commit is contained in:
Thomas Munro 2020-11-08 02:50:34 +00:00
parent d2799054f0
commit cc7edd258c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=367476
14 changed files with 119 additions and 22 deletions

View File

@ -43,6 +43,7 @@
#define LC_MESSAGES_MASK (1<<5)
#define LC_ALL_MASK (LC_COLLATE_MASK | LC_CTYPE_MASK | LC_MESSAGES_MASK | \
LC_MONETARY_MASK | LC_NUMERIC_MASK | LC_TIME_MASK)
#define LC_VERSION_MASK (1<<6)
#define LC_GLOBAL_LOCALE ((locale_t)-1)
#ifndef _LOCALE_T_DEFINED

View File

@ -140,7 +140,9 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
(void) _close(fd);
return (_LDP_ERROR);
}
if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN +
XLOCALE_DEF_VERSION_LEN +
sizeof (info))) {
(void) _close(fd);
errno = EINVAL;
return (_LDP_ERROR);
@ -151,12 +153,14 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
return (_LDP_ERROR);
}
if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) {
(void) munmap(map, sbuf.st_size);
errno = EINVAL;
return (_LDP_ERROR);
}
TMP += COLLATE_STR_LEN;
TMP += COLLATE_FMT_VERSION_LEN;
strlcat(table->header.version, TMP, sizeof (table->header.version));
TMP += XLOCALE_DEF_VERSION_LEN;
info = (void *)TMP;
TMP += sizeof (*info);

View File

@ -53,7 +53,9 @@
#endif
#define COLLATE_STR_LEN 24 /* should be 64-bit multiple */
#define COLLATE_VERSION "BSD 1.0\n"
#define COLLATE_FMT_VERSION_LEN 12
#define COLLATE_FMT_VERSION "BSD 1.0\n"
#define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */
#define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */
@ -69,7 +71,8 @@
/*
* The collate file format is as follows:
*
* char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION
* char fmt_version[COLLATE_FMT_VERSION_LEN]; // must be COLLATE_FMT_VERSION
* char def_version[XLOCALE_DEF_VERSION_LEN]; // NUL-terminated, may be empty
* collate_info_t info; // see below, includes padding
* collate_char_pri_t char_data[256]; // 8 bit char values
* collate_subst_t subst[*]; // 0 or more substitutions

View File

@ -27,12 +27,12 @@
.\"
.\" $FreeBSD$
.\"
.Dd May 3, 2013
.Dd November 8, 2020
.Dt QUERYLOCALE 3
.Os
.Sh NAME
.Nm querylocale
.Nd Look up the locale name for a specified category
.Nd Look up the locale name or version for a specified category
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
@ -40,11 +40,22 @@
.Ft const char *
.Fn querylocale "int mask" "locale_t locale"
.Sh DESCRIPTION
Returns the name of the locale for the category specified by
Returns the name or version of the locale for the category specified by
.Fa mask .
This possible values for the mask are the same as those in
.Xr newlocale 3 .
If more than one bit in the mask is set, the returned value is undefined.
The possible values for the mask are the same as those in
.Xr newlocale 3 ,
when requesting the locale name.
Specify the bitwise OR of
.Fa LC_VERSION_MASK
and another mask value to request a version string.
Version strings can be compared to detect changes to the locale's definition.
The structure of the version string is unspecified.
Currently, version information is only available for
.Fa LC_COLLATE_MASK ,
and an empty string is returned for other categories.
If more than one bit in the mask is set, not counting
.Fa LC_VERSION_MASK ,
the returned value is undefined.
.Sh SEE ALSO
.Xr duplocale 3 ,
.Xr freelocale 3 ,
@ -52,3 +63,12 @@ If more than one bit in the mask is set, the returned value is undefined.
.Xr newlocale 3 ,
.Xr uselocale 3 ,
.Xr xlocale 3
.Sh HISTORY
The
.Fn querylocale
function first appeared in
.Fx 9.1 ,
and is based on the function of the same name in Darwin.
.Fa LC_VERSION_MASK
first appeared in
.Fx 13.0 .

View File

@ -231,6 +231,8 @@ static int dupcomponent(int type, locale_t base, locale_t new)
if (new->components[type]) {
strncpy(new->components[type]->locale, src->locale,
ENCODING_LEN);
strncpy(new->components[type]->version, src->version,
XLOCALE_DEF_VERSION_LEN);
}
} else if (base->components[type]) {
new->components[type] = xlocale_retain(base->components[type]);
@ -346,17 +348,24 @@ freelocale(locale_t loc)
}
/*
* Returns the name of the locale for a particular component of a locale_t.
* Returns the name or version of the locale for a particular component of a
* locale_t.
*/
const char *querylocale(int mask, locale_t loc)
{
int type = ffs(mask) - 1;
int type = ffs(mask & ~LC_VERSION_MASK) - 1;
FIX_LOCALE(loc);
if (type >= XLC_LAST)
return (NULL);
if (loc->components[type])
return (loc->components[type]->locale);
return ("C");
if (mask & LC_VERSION_MASK) {
if (loc->components[type])
return (loc->components[type]->version);
return ("");
} else {
if (loc->components[type])
return (loc->components[type]->locale);
return ("C");
}
}
/*

View File

@ -91,6 +91,9 @@ struct xlocale_refcounted {
/** Function used to destroy this component, if one is required*/
void(*destructor)(void*);
};
#define XLOCALE_DEF_VERSION_LEN 12
/**
* Header for a locale component. All locale components must begin with this
* header.
@ -99,6 +102,8 @@ struct xlocale_component {
struct xlocale_refcounted header;
/** Name of the locale used for this component. */
char locale[ENCODING_LEN+1];
/** Version of the definition for this component. */
char version[XLOCALE_DEF_VERSION_LEN];
};
/**

View File

@ -7,10 +7,13 @@ FILESNAME= LC_COLLATE
.SUFFIXES: .src .LC_COLLATE
MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps
CLDR_VERSION= "34.0"
.include <bsd.endian.mk>
.src.LC_COLLATE:
localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.IMPSRC} \
-V ${CLDR_VERSION} \
-f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} ${.OBJDIR}/${.IMPSRC:T:R}
LOCALES+= af_ZA.UTF-8
@ -227,6 +230,7 @@ FILES+= $t.LC_COLLATE
FILESDIR_$t.LC_COLLATE= ${LOCALEDIR}/$t
$t.LC_COLLATE: ${.CURDIR}/$f.src
localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.ALLSRC} \
-V ${CLDR_VERSION} \
-f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} \
${.OBJDIR}/${.TARGET:T:R}
.endfor

View File

@ -187,6 +187,8 @@ extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UNIDIR}
cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T}
extract: extract-${CLDRFILES_${N}:T}
.endfor
grep 'name="version"' ${UNIDIR}/tools/build.xml | \
sed 's/.* value="//;s/".*//' > ${UNIDIR}/cldr-version
patch::
.if exists(${PATCHDIR})
cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch

View File

@ -50,6 +50,8 @@ my $UNIDIR = undef;
my $ETCDIR = undef;
my $TYPE = undef;
my $CLDR_VERSION = undef;
my $result = GetOptions (
"unidir=s" => \$UNIDIR,
"etc=s" => \$ETCDIR,
@ -500,6 +502,12 @@ EOF
sub transform_collation {
# Read the CLDR version
open(FIN, "$UNIDIR/cldr-version") or die "Cannot open cldr-version";
read FIN, $CLDR_VERSION, -s FIN;
close(FIN);
$CLDR_VERSION =~ s/\s*$//;
foreach my $l (sort keys(%languages)) {
foreach my $f (sort keys(%{$languages{$l}})) {
foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
@ -861,8 +869,11 @@ sub make_makefile {
my $SRCOUT4 = "";
my $MAPLOC;
if ($TYPE eq "colldef") {
# In future, we might want to try to put the CLDR version into
# the .src files with some new syntax, instead of the makefile.
$SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " .
"-i \${.IMPSRC} \\\n" .
"\t-V \${CLDR_VERSION} \\\n" .
"\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
"\${.OBJDIR}/\${.IMPSRC:T:R}";
$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
@ -875,6 +886,7 @@ sub make_makefile {
"\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
"\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " .
"-i \${.ALLSRC} \\\n" .
"\t-V \${CLDR_VERSION} \\\n" .
"\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
"\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
".endfor\n\n";
@ -917,6 +929,13 @@ FILESNAME= $FILESNAMES{$TYPE}
${MAPLOC}
EOF
if ($TYPE eq "colldef") {
print FOUT <<EOF;
CLDR_VERSION= "${CLDR_VERSION}"
EOF
}
if ($TYPE eq "colldef" || $TYPE eq "ctypedef") {
print FOUT <<EOF;
.include <bsd.endian.mk>

View File

@ -51,4 +51,7 @@ struct localedef_bootstrap_xlocale_component {
char unused;
};
/* This must agree with the definition in xlocale_private.h. */
#define XLOCALE_DEF_VERSION_LEN 12
#endif /* _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H */

View File

@ -1119,7 +1119,8 @@ dump_collate(void)
collelem_t *ce;
collchar_t *cc;
subst_t *sb;
char vers[COLLATE_STR_LEN];
char fmt_version[COLLATE_FMT_VERSION_LEN];
char def_version[XLOCALE_DEF_VERSION_LEN];
collate_char_t chars[UCHAR_MAX + 1];
collate_large_t *large;
collate_subst_t *subst[COLL_WEIGHTS_MAX];
@ -1160,8 +1161,11 @@ dump_collate(void)
}
(void) memset(&chars, 0, sizeof (chars));
(void) memset(vers, 0, COLLATE_STR_LEN);
(void) strlcpy(vers, COLLATE_VERSION, sizeof (vers));
(void) memset(fmt_version, 0, COLLATE_FMT_VERSION_LEN);
(void) strlcpy(fmt_version, COLLATE_FMT_VERSION, sizeof (fmt_version));
(void) memset(def_version, 0, XLOCALE_DEF_VERSION_LEN);
if (version)
(void) strlcpy(def_version, version, sizeof (def_version));
/*
* We need to make sure we arrange for the UNDEFINED field
@ -1301,7 +1305,8 @@ dump_collate(void)
collinfo.chain_count = htote(chain_count);
collinfo.large_count = htote(large_count);
if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) ||
if ((wr_category(fmt_version, COLLATE_FMT_VERSION_LEN, f) < 0) ||
(wr_category(def_version, XLOCALE_DEF_VERSION_LEN, f) < 0) ||
(wr_category(&collinfo, sizeof (collinfo), f) < 0) ||
(wr_category(&chars, sizeof (chars), f) < 0)) {
return;

View File

@ -33,7 +33,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd October 18, 2018
.Dd November 8, 2020
.Dt LOCALEDEF 1
.Os
.Sh NAME
@ -135,6 +135,14 @@ If not supplied, then default screen widths will be assumed, which will
generally not account for East Asian encodings requiring more than a single
character cell to display, nor for combining or accent marks that occupy
no additional screen width.
.It Fl V Ar version
Specifies a version string describing the version of the locale definition.
This string can be retrieved with
.Xr querylocale 3 ,
and is intended to allow applications to detect locale definition changes.
Currently it is stored only for the
.Sy LC_COLLATE
category.
.El
.Pp
The following operands are required:
@ -198,6 +206,7 @@ If an error is detected, no permanent output will be created.
.Xr locale 1 ,
.Xr iconv_open 3 ,
.Xr nl_langinfo 3 ,
.Xr querylocale 3 ,
.Xr strftime 3 ,
.Xr environ 7
.Sh WARNINGS

View File

@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <limits.h>
#include <locale.h>
#include <dirent.h>
#include "collate.h"
#include "localedef.h"
#include "parser.h"
@ -62,6 +63,7 @@ int undefok = 0;
int warnok = 0;
static char *locname = NULL;
static char locpath[PATH_MAX];
char *version = NULL;
const char *
category_name(void)
@ -253,6 +255,7 @@ usage(void)
(void) fprintf(stderr, " -u encoding : assume encoding\n");
(void) fprintf(stderr, " -w widths : use screen widths file\n");
(void) fprintf(stderr, " -i locsrc : source file for locale\n");
(void) fprintf(stderr, " -V version : version string for locale\n");
exit(4);
}
@ -279,7 +282,7 @@ main(int argc, char **argv)
(void) setlocale(LC_ALL, "");
while ((c = getopt(argc, argv, "blw:i:cf:u:vUD")) != -1) {
while ((c = getopt(argc, argv, "blw:i:cf:u:vUDV:")) != -1) {
switch (c) {
case 'D':
bsd = 1;
@ -314,6 +317,9 @@ main(int argc, char **argv)
case '?':
usage();
break;
case 'V':
version = optarg;
break;
}
}
@ -325,6 +331,11 @@ main(int argc, char **argv)
(void) printf("Processing locale %s.\n", locname);
}
if (version && strlen(version) >= XLOCALE_DEF_VERSION_LEN) {
(void) fprintf(stderr, "Version string too long.\n");
exit(1);
}
if (cfname) {
if (verbose)
(void) printf("Loading charmap %s.\n", cfname);

View File

@ -55,6 +55,8 @@ extern int undefok; /* mostly ignore undefined symbols */
extern int warnok;
extern int warnings;
extern char *version;
int yylex(void);
void yyerror(const char *);
_Noreturn void errf(const char *, ...) __printflike(1, 2);