Fix strcoll_l disagreeing with strxfrm by reworking the forward order case in

wcscoll_l().

Illumos fixed this while grabbing back our patches:
https://www.illumos.org/rb/r/402/

This does not 100% fix what postgresql folks reported as there is still a
remaining issue: https://www.illumos.org/issues/7962, it improves the situation

The initial issue was reported in postgresql mailing lists:
https://www.postgresql.org/message-id/flat/111D0E27-A8F3-4A84-A4E0-B0FB703863DF@s24.com#111D0E27-A8F3-4A84-A4E0-B0FB703863DF@s24.com

Submitted by:	Yuri Pankov <yuri.pankov@nexenta.com>
Obtained from:	Illumos
MFC after:	2 weeks
This commit is contained in:
Baptiste Daroussin 2017-04-16 19:12:07 +00:00
parent b8255b91e7
commit c48dc2a193
2 changed files with 159 additions and 62 deletions

View File

@ -1,5 +1,5 @@
/*- /*-
* Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2002 Tim J. Robbins * Copyright (c) 2002 Tim J. Robbins
* All rights reserved. * All rights reserved.
* *
@ -42,22 +42,22 @@ __FBSDID("$FreeBSD$");
int int
wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale) wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
{ {
int len1, len2, pri1, pri2, ret; int len1, len2, pri1, pri2;
wchar_t *tr1 = NULL, *tr2 = NULL; wchar_t *tr1 = NULL, *tr2 = NULL;
int direc, pass; int direc, pass;
int ret = wcscmp(ws1, ws2);
FIX_LOCALE(locale); FIX_LOCALE(locale);
struct xlocale_collate *table = struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE]; (struct xlocale_collate*)locale->components[XLC_COLLATE];
if (table->__collate_load_error) if (table->__collate_load_error || ret == 0)
/* return (ret);
* Locale has no special collating order or could not be
* loaded, do a fast binary comparison.
*/
return (wcscmp(ws1, ws2));
ret = 0; if (*ws1 == 0 && *ws2 != 0)
return (-1);
if (*ws1 != 0 && *ws2 == 0)
return (1);
/* /*
* Once upon a time we had code to try to optimize this, but * Once upon a time we had code to try to optimize this, but
@ -77,19 +77,19 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
const int32_t *st2 = NULL; const int32_t *st2 = NULL;
const wchar_t *w1 = ws1; const wchar_t *w1 = ws1;
const wchar_t *w2 = ws2; const wchar_t *w2 = ws2;
int check1, check2;
/* special pass for UNDEFINED */ /* special pass for UNDEFINED */
if (pass == table->info->directive_count) { if (pass == table->info->directive_count) {
direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; direc = DIRECTIVE_FORWARD;
} else { } else {
direc = table->info->directive[pass]; direc = table->info->directive[pass];
} }
if (direc & DIRECTIVE_BACKWARD) { if (direc & DIRECTIVE_BACKWARD) {
wchar_t *bp, *fp, c; wchar_t *bp, *fp, c;
free(tr1);
if ((tr1 = wcsdup(w1)) == NULL) if ((tr1 = wcsdup(w1)) == NULL)
goto fail; goto end;
bp = tr1; bp = tr1;
fp = tr1 + wcslen(tr1) - 1; fp = tr1 + wcslen(tr1) - 1;
while (bp < fp) { while (bp < fp) {
@ -97,8 +97,9 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
*bp++ = *fp; *bp++ = *fp;
*fp-- = c; *fp-- = c;
} }
free(tr2);
if ((tr2 = wcsdup(w2)) == NULL) if ((tr2 = wcsdup(w2)) == NULL)
goto fail; goto end;
bp = tr2; bp = tr2;
fp = tr2 + wcslen(tr2) - 1; fp = tr2 + wcslen(tr2) - 1;
while (bp < fp) { while (bp < fp) {
@ -111,6 +112,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
} }
if (direc & DIRECTIVE_POSITION) { if (direc & DIRECTIVE_POSITION) {
int check1, check2;
while (*w1 && *w2) { while (*w1 && *w2) {
pri1 = pri2 = 0; pri1 = pri2 = 0;
check1 = check2 = 1; check1 = check2 = 1;
@ -120,7 +122,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
&pri1, pass, &st1); &pri1, pass, &st1);
if (pri1 < 0) { if (pri1 < 0) {
errno = EINVAL; errno = EINVAL;
goto fail; goto end;
} }
if (!pri1) { if (!pri1) {
pri1 = COLLATE_MAX_PRIORITY; pri1 = COLLATE_MAX_PRIORITY;
@ -133,7 +135,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
&pri2, pass, &st2); &pri2, pass, &st2);
if (pri2 < 0) { if (pri2 < 0) {
errno = EINVAL; errno = EINVAL;
goto fail; goto end;
} }
if (!pri2) { if (!pri2) {
pri2 = COLLATE_MAX_PRIORITY; pri2 = COLLATE_MAX_PRIORITY;
@ -149,58 +151,64 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
w1 += len1; w1 += len1;
w2 += len2; w2 += len2;
} }
} else { if (!*w1) {
while (*w1 && *w2) { if (*w2) {
pri1 = pri2 = 0; ret = -(int)*w2;
check1 = check2 = 1; goto end;
while ((pri1 == pri2) && (check1 || check2)) {
while (check1 && *w1) {
_collate_lookup(table, w1,
&len1, &pri1, pass, &st1);
if (pri1 > 0)
break;
if (pri1 < 0) {
errno = EINVAL;
goto fail;
}
st1 = NULL;
w1 += 1;
}
check1 = (st1 != NULL);
while (check2 && *w2) {
_collate_lookup(table, w2,
&len2, &pri2, pass, &st2);
if (pri2 > 0)
break;
if (pri2 < 0) {
errno = EINVAL;
goto fail;
}
st2 = NULL;
w2 += 1;
}
check2 = (st2 != NULL);
if (!pri1 || !pri2)
break;
} }
if (!pri1 || !pri2) } else {
ret = *w1;
goto end;
}
} else {
int vpri1 = 0, vpri2 = 0;
while (*w1 || *w2 || st1 || st2) {
pri1 = 1;
while (*w1 || st1) {
_collate_lookup(table, w1, &len1, &pri1,
pass, &st1);
w1 += len1;
if (pri1 > 0) {
vpri1++;
break;
}
if (pri1 < 0) {
errno = EINVAL;
goto end;
}
st1 = NULL;
}
pri2 = 1;
while (*w2 || st2) {
_collate_lookup(table, w2, &len2, &pri2,
pass, &st2);
w2 += len2;
if (pri2 > 0) {
vpri2++;
break;
}
if (pri2 < 0) {
errno = EINVAL;
goto end;
}
st2 = NULL;
}
if ((!pri1 || !pri2) && (vpri1 == vpri2))
break; break;
if (pri1 != pri2) { if (pri1 != pri2) {
ret = pri1 - pri2; ret = pri1 - pri2;
goto end; goto end;
} }
w1 += len1;
w2 += len2;
} }
} if (vpri1 && !vpri2) {
if (!*w1) { ret = 1;
if (*w2) { goto end;
ret = -(int)*w2; }
if (!vpri1 && vpri2) {
ret = -1;
goto end; goto end;
} }
} else {
ret = *w1;
goto end;
} }
} }
ret = 0; ret = 0;
@ -210,10 +218,6 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
free(tr2); free(tr2);
return (ret); return (ret);
fail:
ret = wcscmp(ws1, ws2);
goto end;
} }
int int

View File

@ -1,5 +1,7 @@
/*- /*-
* Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org> * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
* Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
* Copyright 2017 Nexenta Systems, Inc.
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -30,6 +32,8 @@ __FBSDID("$FreeBSD$");
#include <wchar.h> #include <wchar.h>
#include <locale.h> #include <locale.h>
#include <stdlib.h> #include <stdlib.h>
#include <time.h>
#include <errno.h>
#include <atf-c.h> #include <atf-c.h>
@ -55,9 +59,98 @@ ATF_TC_BODY(russian_collation, tc)
"Bad collation, expected: '%ls' got '%ls'", res, c); "Bad collation, expected: '%ls' got '%ls'", res, c);
} }
#define NSTRINGS 2000
#define MAXSTRLEN 20
#define MAXXFRMLEN (MAXSTRLEN * 20)
typedef struct {
char sval[MAXSTRLEN];
char xval[MAXXFRMLEN];
} cstr;
ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
ATF_TC_BODY(strcoll_vs_strxfrm, tc)
{
cstr data[NSTRINGS];
char *curloc;
int i, j;
curloc = setlocale(LC_ALL, "en_US.UTF-8");
ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
/* Ensure new random() values on every run */
srandom((unsigned int) time(NULL));
/* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
for (i = 0; i < NSTRINGS; i++) {
char *p;
int len;
again:
p = data[i].sval;
len = 1 + (random() % (MAXSTRLEN - 1));
while (len > 0) {
int c;
/*
* Generate random printable char in ISO8859-1 range.
* Bias towards producing a lot of spaces.
*/
if ((random() % 16) < 3) {
c = ' ';
} else {
do {
c = random() & 0xFF;
} while (!((c >= ' ' && c <= 127) ||
(c >= 0xA0 && c <= 0xFF)));
}
if (c <= 127) {
*p++ = c;
len--;
} else {
if (len < 2)
break;
/* Poor man's utf8-ification */
*p++ = 0xC0 + (c >> 6);
len--;
*p++ = 0x80 + (c & 0x3F);
len--;
}
}
*p = '\0';
/* strxfrm() each string as we produce it */
errno = 0;
ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
" string exceeded %d bytes", (int)strlen(data[i].sval),
MAXXFRMLEN);
/*
* Amend strxfrm() failing on certain characters to be fixed and
* test later
*/
if (errno != 0)
goto again;
}
for (i = 0; i < NSTRINGS; i++) {
for (j = 0; j < NSTRINGS; j++) {
int sr = strcoll(data[i].sval, data[j].sval);
int sx = strcmp(data[i].xval, data[j].xval);
ATF_CHECK_MSG(!((sr * sx < 0) ||
(sr * sx == 0 && sr + sx != 0)),
"%s: diff for \"%s\" and \"%s\"",
curloc, data[i].sval, data[j].sval);
}
}
}
ATF_TP_ADD_TCS(tp) ATF_TP_ADD_TCS(tp)
{ {
ATF_TP_ADD_TC(tp, russian_collation); ATF_TP_ADD_TC(tp, russian_collation);
ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
return (atf_no_error()); return (atf_no_error());
} }