Fix strcoll_l disagreeing with strxfrm by reworking the forward order case in
wcscoll_l(). Illumos fixed this while grabbing back our patches: https://www.illumos.org/rb/r/402/ This does not 100% fix what postgresql folks reported as there is still a remaining issue: https://www.illumos.org/issues/7962, it improves the situation The initial issue was reported in postgresql mailing lists: https://www.postgresql.org/message-id/flat/111D0E27-A8F3-4A84-A4E0-B0FB703863DF@s24.com#111D0E27-A8F3-4A84-A4E0-B0FB703863DF@s24.com Submitted by: Yuri Pankov <yuri.pankov@nexenta.com> Obtained from: Illumos MFC after: 2 weeks
This commit is contained in:
parent
b8255b91e7
commit
c48dc2a193
@ -1,5 +1,5 @@
|
||||
/*-
|
||||
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright 2017 Nexenta Systems, Inc.
|
||||
* Copyright (c) 2002 Tim J. Robbins
|
||||
* All rights reserved.
|
||||
*
|
||||
@ -42,22 +42,22 @@ __FBSDID("$FreeBSD$");
|
||||
int
|
||||
wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
{
|
||||
int len1, len2, pri1, pri2, ret;
|
||||
int len1, len2, pri1, pri2;
|
||||
wchar_t *tr1 = NULL, *tr2 = NULL;
|
||||
int direc, pass;
|
||||
int ret = wcscmp(ws1, ws2);
|
||||
|
||||
FIX_LOCALE(locale);
|
||||
struct xlocale_collate *table =
|
||||
(struct xlocale_collate*)locale->components[XLC_COLLATE];
|
||||
|
||||
if (table->__collate_load_error)
|
||||
/*
|
||||
* Locale has no special collating order or could not be
|
||||
* loaded, do a fast binary comparison.
|
||||
*/
|
||||
return (wcscmp(ws1, ws2));
|
||||
if (table->__collate_load_error || ret == 0)
|
||||
return (ret);
|
||||
|
||||
ret = 0;
|
||||
if (*ws1 == 0 && *ws2 != 0)
|
||||
return (-1);
|
||||
if (*ws1 != 0 && *ws2 == 0)
|
||||
return (1);
|
||||
|
||||
/*
|
||||
* Once upon a time we had code to try to optimize this, but
|
||||
@ -77,19 +77,19 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
const int32_t *st2 = NULL;
|
||||
const wchar_t *w1 = ws1;
|
||||
const wchar_t *w2 = ws2;
|
||||
int check1, check2;
|
||||
|
||||
/* special pass for UNDEFINED */
|
||||
if (pass == table->info->directive_count) {
|
||||
direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
|
||||
direc = DIRECTIVE_FORWARD;
|
||||
} else {
|
||||
direc = table->info->directive[pass];
|
||||
}
|
||||
|
||||
if (direc & DIRECTIVE_BACKWARD) {
|
||||
wchar_t *bp, *fp, c;
|
||||
free(tr1);
|
||||
if ((tr1 = wcsdup(w1)) == NULL)
|
||||
goto fail;
|
||||
goto end;
|
||||
bp = tr1;
|
||||
fp = tr1 + wcslen(tr1) - 1;
|
||||
while (bp < fp) {
|
||||
@ -97,8 +97,9 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
*bp++ = *fp;
|
||||
*fp-- = c;
|
||||
}
|
||||
free(tr2);
|
||||
if ((tr2 = wcsdup(w2)) == NULL)
|
||||
goto fail;
|
||||
goto end;
|
||||
bp = tr2;
|
||||
fp = tr2 + wcslen(tr2) - 1;
|
||||
while (bp < fp) {
|
||||
@ -111,6 +112,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
}
|
||||
|
||||
if (direc & DIRECTIVE_POSITION) {
|
||||
int check1, check2;
|
||||
while (*w1 && *w2) {
|
||||
pri1 = pri2 = 0;
|
||||
check1 = check2 = 1;
|
||||
@ -120,7 +122,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
&pri1, pass, &st1);
|
||||
if (pri1 < 0) {
|
||||
errno = EINVAL;
|
||||
goto fail;
|
||||
goto end;
|
||||
}
|
||||
if (!pri1) {
|
||||
pri1 = COLLATE_MAX_PRIORITY;
|
||||
@ -133,7 +135,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
&pri2, pass, &st2);
|
||||
if (pri2 < 0) {
|
||||
errno = EINVAL;
|
||||
goto fail;
|
||||
goto end;
|
||||
}
|
||||
if (!pri2) {
|
||||
pri2 = COLLATE_MAX_PRIORITY;
|
||||
@ -149,58 +151,64 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
w1 += len1;
|
||||
w2 += len2;
|
||||
}
|
||||
} else {
|
||||
while (*w1 && *w2) {
|
||||
pri1 = pri2 = 0;
|
||||
check1 = check2 = 1;
|
||||
while ((pri1 == pri2) && (check1 || check2)) {
|
||||
while (check1 && *w1) {
|
||||
_collate_lookup(table, w1,
|
||||
&len1, &pri1, pass, &st1);
|
||||
if (pri1 > 0)
|
||||
break;
|
||||
if (pri1 < 0) {
|
||||
errno = EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
st1 = NULL;
|
||||
w1 += 1;
|
||||
}
|
||||
check1 = (st1 != NULL);
|
||||
while (check2 && *w2) {
|
||||
_collate_lookup(table, w2,
|
||||
&len2, &pri2, pass, &st2);
|
||||
if (pri2 > 0)
|
||||
break;
|
||||
if (pri2 < 0) {
|
||||
errno = EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
st2 = NULL;
|
||||
w2 += 1;
|
||||
}
|
||||
check2 = (st2 != NULL);
|
||||
if (!pri1 || !pri2)
|
||||
break;
|
||||
if (!*w1) {
|
||||
if (*w2) {
|
||||
ret = -(int)*w2;
|
||||
goto end;
|
||||
}
|
||||
if (!pri1 || !pri2)
|
||||
} else {
|
||||
ret = *w1;
|
||||
goto end;
|
||||
}
|
||||
} else {
|
||||
int vpri1 = 0, vpri2 = 0;
|
||||
while (*w1 || *w2 || st1 || st2) {
|
||||
pri1 = 1;
|
||||
while (*w1 || st1) {
|
||||
_collate_lookup(table, w1, &len1, &pri1,
|
||||
pass, &st1);
|
||||
w1 += len1;
|
||||
if (pri1 > 0) {
|
||||
vpri1++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pri1 < 0) {
|
||||
errno = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
st1 = NULL;
|
||||
}
|
||||
pri2 = 1;
|
||||
while (*w2 || st2) {
|
||||
_collate_lookup(table, w2, &len2, &pri2,
|
||||
pass, &st2);
|
||||
w2 += len2;
|
||||
if (pri2 > 0) {
|
||||
vpri2++;
|
||||
break;
|
||||
}
|
||||
if (pri2 < 0) {
|
||||
errno = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
st2 = NULL;
|
||||
}
|
||||
if ((!pri1 || !pri2) && (vpri1 == vpri2))
|
||||
break;
|
||||
if (pri1 != pri2) {
|
||||
ret = pri1 - pri2;
|
||||
goto end;
|
||||
}
|
||||
w1 += len1;
|
||||
w2 += len2;
|
||||
}
|
||||
}
|
||||
if (!*w1) {
|
||||
if (*w2) {
|
||||
ret = -(int)*w2;
|
||||
if (vpri1 && !vpri2) {
|
||||
ret = 1;
|
||||
goto end;
|
||||
}
|
||||
if (!vpri1 && vpri2) {
|
||||
ret = -1;
|
||||
goto end;
|
||||
}
|
||||
} else {
|
||||
ret = *w1;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
@ -210,10 +218,6 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
|
||||
free(tr2);
|
||||
|
||||
return (ret);
|
||||
|
||||
fail:
|
||||
ret = wcscmp(ws1, ws2);
|
||||
goto end;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -1,5 +1,7 @@
|
||||
/*-
|
||||
* Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
|
||||
* Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
|
||||
* Copyright 2017 Nexenta Systems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -30,6 +32,8 @@ __FBSDID("$FreeBSD$");
|
||||
#include <wchar.h>
|
||||
#include <locale.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <atf-c.h>
|
||||
|
||||
@ -55,9 +59,98 @@ ATF_TC_BODY(russian_collation, tc)
|
||||
"Bad collation, expected: '%ls' got '%ls'", res, c);
|
||||
}
|
||||
|
||||
#define NSTRINGS 2000
|
||||
#define MAXSTRLEN 20
|
||||
#define MAXXFRMLEN (MAXSTRLEN * 20)
|
||||
|
||||
typedef struct {
|
||||
char sval[MAXSTRLEN];
|
||||
char xval[MAXXFRMLEN];
|
||||
} cstr;
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
|
||||
ATF_TC_BODY(strcoll_vs_strxfrm, tc)
|
||||
{
|
||||
cstr data[NSTRINGS];
|
||||
char *curloc;
|
||||
int i, j;
|
||||
|
||||
curloc = setlocale(LC_ALL, "en_US.UTF-8");
|
||||
ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
|
||||
|
||||
/* Ensure new random() values on every run */
|
||||
srandom((unsigned int) time(NULL));
|
||||
|
||||
/* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
|
||||
for (i = 0; i < NSTRINGS; i++) {
|
||||
char *p;
|
||||
int len;
|
||||
|
||||
again:
|
||||
p = data[i].sval;
|
||||
len = 1 + (random() % (MAXSTRLEN - 1));
|
||||
while (len > 0) {
|
||||
int c;
|
||||
/*
|
||||
* Generate random printable char in ISO8859-1 range.
|
||||
* Bias towards producing a lot of spaces.
|
||||
*/
|
||||
|
||||
if ((random() % 16) < 3) {
|
||||
c = ' ';
|
||||
} else {
|
||||
do {
|
||||
c = random() & 0xFF;
|
||||
} while (!((c >= ' ' && c <= 127) ||
|
||||
(c >= 0xA0 && c <= 0xFF)));
|
||||
}
|
||||
|
||||
if (c <= 127) {
|
||||
*p++ = c;
|
||||
len--;
|
||||
} else {
|
||||
if (len < 2)
|
||||
break;
|
||||
/* Poor man's utf8-ification */
|
||||
*p++ = 0xC0 + (c >> 6);
|
||||
len--;
|
||||
*p++ = 0x80 + (c & 0x3F);
|
||||
len--;
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
/* strxfrm() each string as we produce it */
|
||||
errno = 0;
|
||||
ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
|
||||
MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
|
||||
" string exceeded %d bytes", (int)strlen(data[i].sval),
|
||||
MAXXFRMLEN);
|
||||
|
||||
/*
|
||||
* Amend strxfrm() failing on certain characters to be fixed and
|
||||
* test later
|
||||
*/
|
||||
if (errno != 0)
|
||||
goto again;
|
||||
}
|
||||
|
||||
for (i = 0; i < NSTRINGS; i++) {
|
||||
for (j = 0; j < NSTRINGS; j++) {
|
||||
int sr = strcoll(data[i].sval, data[j].sval);
|
||||
int sx = strcmp(data[i].xval, data[j].xval);
|
||||
|
||||
ATF_CHECK_MSG(!((sr * sx < 0) ||
|
||||
(sr * sx == 0 && sr + sx != 0)),
|
||||
"%s: diff for \"%s\" and \"%s\"",
|
||||
curloc, data[i].sval, data[j].sval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ATF_TP_ADD_TCS(tp)
|
||||
{
|
||||
ATF_TP_ADD_TC(tp, russian_collation);
|
||||
ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
|
||||
|
||||
return (atf_no_error());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user