3c7b03ea74
with the major functionality and optimizations by Oleg Moskalenko. It is compatible with the latest version of POSIX and the current GNU sort version that we have in base. Beside this, it implements all the functionality introduced in later versions of GNU sort. For now, it will be installed as "bsdsort", keeping GNU sort as the default sort implementation.
1308 lines
26 KiB
C
1308 lines
26 KiB
C
/*-
|
|
* Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
|
|
* Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <errno.h>
|
|
#include <err.h>
|
|
#include <langinfo.h>
|
|
#include <limits.h>
|
|
#include <math.h>
|
|
#include <md5.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
|
|
#include "coll.h"
|
|
#include "vsort.h"
|
|
|
|
struct key_specs *keys = NULL;
|
|
size_t keys_num = 0;
|
|
|
|
wchar_t symbol_decimal_point = L'.';
|
|
/* there is no default thousands separator in collate rules: */
|
|
wchar_t symbol_thousands_sep = 0;
|
|
wchar_t symbol_negative_sign = L'-';
|
|
wchar_t symbol_positive_sign = L'+';
|
|
|
|
static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
|
|
static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
|
|
static int monthcoll(struct key_value*, struct key_value *, size_t offset);
|
|
static int numcoll(struct key_value*, struct key_value *, size_t offset);
|
|
static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
|
|
static int randomcoll(struct key_value*, struct key_value *, size_t offset);
|
|
static int versioncoll(struct key_value*, struct key_value *, size_t offset);
|
|
|
|
/*
|
|
* Allocate keys array
|
|
*/
|
|
struct keys_array *
|
|
keys_array_alloc(void)
|
|
{
|
|
struct keys_array *ka;
|
|
size_t sz;
|
|
|
|
sz = keys_array_size();
|
|
ka = sort_malloc(sz);
|
|
memset(ka, 0, sz);
|
|
|
|
return (ka);
|
|
}
|
|
|
|
/*
|
|
* Calculate whether we need key hint space
|
|
*/
|
|
static size_t
|
|
key_hint_size(void)
|
|
{
|
|
|
|
return (need_hint ? sizeof(struct key_hint) : 0);
|
|
}
|
|
|
|
/*
|
|
* Calculate keys array size
|
|
*/
|
|
size_t
|
|
keys_array_size(void)
|
|
{
|
|
|
|
return (keys_num * (sizeof(struct key_value) + key_hint_size()));
|
|
}
|
|
|
|
/*
|
|
* Clean data of keys array
|
|
*/
|
|
void
|
|
clean_keys_array(const struct bwstring *s, struct keys_array *ka)
|
|
{
|
|
|
|
if (ka) {
|
|
for (size_t i = 0; i < keys_num; ++i)
|
|
if (ka->key[i].k && ka->key[i].k != s)
|
|
bwsfree(ka->key[i].k);
|
|
memset(ka, 0, keys_array_size());
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set value of a key in the keys set
|
|
*/
|
|
void
|
|
set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
|
|
{
|
|
|
|
if (ka && keys_num > ind) {
|
|
struct key_value *kv;
|
|
|
|
kv = &(ka->key[ind]);
|
|
|
|
if (kv->k && kv->k != s)
|
|
bwsfree(kv->k);
|
|
kv->k = s;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize a sort list item
|
|
*/
|
|
struct sort_list_item *
|
|
sort_list_item_alloc(void)
|
|
{
|
|
struct sort_list_item *si;
|
|
size_t sz;
|
|
|
|
sz = sizeof(struct sort_list_item) + keys_array_size();
|
|
si = sort_malloc(sz);
|
|
memset(si, 0, sz);
|
|
|
|
return (si);
|
|
}
|
|
|
|
size_t
|
|
sort_list_item_size(struct sort_list_item *si)
|
|
{
|
|
size_t ret = 0;
|
|
|
|
if (si) {
|
|
ret = sizeof(struct sort_list_item) + keys_array_size();
|
|
if (si->str)
|
|
ret += bws_memsize(si->str);
|
|
for (size_t i = 0; i < keys_num; ++i) {
|
|
struct key_value *kv;
|
|
|
|
kv = &(si->ka.key[i]);
|
|
|
|
if (kv->k != si->str)
|
|
ret += bws_memsize(kv->k);
|
|
}
|
|
}
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* Calculate key for a sort list item
|
|
*/
|
|
static void
|
|
sort_list_item_make_key(struct sort_list_item *si)
|
|
{
|
|
|
|
preproc(si->str, &(si->ka));
|
|
}
|
|
|
|
/*
|
|
* Set value of a sort list item.
|
|
* Return combined string and keys memory size.
|
|
*/
|
|
void
|
|
sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
|
|
{
|
|
|
|
if (si) {
|
|
clean_keys_array(si->str, &(si->ka));
|
|
if (si->str) {
|
|
if (si->str == str) {
|
|
/* we are trying to reset the same string */
|
|
return;
|
|
} else {
|
|
bwsfree(si->str);
|
|
si->str = NULL;
|
|
}
|
|
}
|
|
si->str = str;
|
|
sort_list_item_make_key(si);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* De-allocate a sort list item object memory
|
|
*/
|
|
void
|
|
sort_list_item_clean(struct sort_list_item *si)
|
|
{
|
|
|
|
if (si) {
|
|
clean_keys_array(si->str, &(si->ka));
|
|
if (si->str) {
|
|
bwsfree(si->str);
|
|
si->str = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Skip columns according to specs
|
|
*/
|
|
static size_t
|
|
skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
|
|
bool skip_blanks, bool *empty_key)
|
|
{
|
|
if (cols < 1)
|
|
return (BWSLEN(s) + 1);
|
|
|
|
if (skip_blanks)
|
|
while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
|
|
++start;
|
|
|
|
while (start < BWSLEN(s) && cols > 1) {
|
|
--cols;
|
|
++start;
|
|
}
|
|
|
|
if (start >= BWSLEN(s))
|
|
*empty_key = true;
|
|
|
|
return (start);
|
|
}
|
|
|
|
/*
|
|
* Skip fields according to specs
|
|
*/
|
|
static size_t
|
|
skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
|
|
{
|
|
|
|
if (fields < 2) {
|
|
if (BWSLEN(s) == 0)
|
|
*empty_field = true;
|
|
return (0);
|
|
} else if (!(sort_opts_vals.tflag)) {
|
|
size_t cpos = 0;
|
|
bool pb = true;
|
|
|
|
while (cpos < BWSLEN(s)) {
|
|
bool isblank;
|
|
|
|
isblank = iswblank(BWS_GET(s,cpos));
|
|
|
|
if (isblank && !pb) {
|
|
--fields;
|
|
if (fields <= 1)
|
|
return (cpos);
|
|
}
|
|
pb = isblank;
|
|
++cpos;
|
|
}
|
|
if (fields > 1)
|
|
*empty_field = true;
|
|
return (cpos);
|
|
} else {
|
|
size_t cpos = 0;
|
|
|
|
while (cpos < BWSLEN(s)) {
|
|
if (BWS_GET(s,cpos) == sort_opts_vals.field_sep) {
|
|
--fields;
|
|
if (fields <= 1)
|
|
return (cpos + 1);
|
|
}
|
|
++cpos;
|
|
}
|
|
if (fields > 1)
|
|
*empty_field = true;
|
|
return (cpos);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Find fields start
|
|
*/
|
|
static void
|
|
find_field_start(const struct bwstring *s, struct key_specs *ks,
|
|
size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
|
|
{
|
|
|
|
*field_start = skip_fields_to_start(s, ks->f1, empty_field);
|
|
if (!*empty_field)
|
|
*key_start = skip_cols_to_start(s, ks->c1, *field_start,
|
|
ks->pos1b, empty_key);
|
|
else
|
|
*empty_key = true;
|
|
}
|
|
|
|
/*
|
|
* Find end key position
|
|
*/
|
|
static size_t
|
|
find_field_end(const struct bwstring *s, struct key_specs *ks)
|
|
{
|
|
size_t f2, next_field_start, pos_end;
|
|
bool empty_field, empty_key;
|
|
|
|
pos_end = 0;
|
|
next_field_start = 0;
|
|
empty_field = false;
|
|
empty_key = false;
|
|
f2 = ks->f2;
|
|
|
|
if (f2 == 0)
|
|
return (BWSLEN(s) + 1);
|
|
else {
|
|
if (ks->c2 == 0) {
|
|
next_field_start = skip_fields_to_start(s, f2 + 1,
|
|
&empty_field);
|
|
if ((next_field_start > 0) && sort_opts_vals.tflag &&
|
|
(sort_opts_vals.field_sep == BWS_GET(s,
|
|
next_field_start - 1)))
|
|
--next_field_start;
|
|
} else
|
|
next_field_start = skip_fields_to_start(s, f2,
|
|
&empty_field);
|
|
}
|
|
|
|
if (empty_field || (next_field_start >= BWSLEN(s)))
|
|
return (BWSLEN(s) + 1);
|
|
|
|
if (ks->c2) {
|
|
pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
|
|
ks->pos2b, &empty_key);
|
|
if (pos_end < BWSLEN(s))
|
|
++pos_end;
|
|
} else
|
|
pos_end = next_field_start;
|
|
|
|
return (pos_end);
|
|
}
|
|
|
|
/*
|
|
* Cut a field according to the key specs
|
|
*/
|
|
static struct bwstring *
|
|
cut_field(const struct bwstring *s, struct key_specs *ks)
|
|
{
|
|
struct bwstring *ret = NULL;
|
|
|
|
if (s && ks) {
|
|
size_t field_start, key_end, key_start, sz;
|
|
bool empty_field, empty_key;
|
|
|
|
field_start = 0;
|
|
key_start = 0;
|
|
empty_field = false;
|
|
empty_key = false;
|
|
|
|
find_field_start(s, ks, &field_start, &key_start,
|
|
&empty_field, &empty_key);
|
|
|
|
if (empty_key)
|
|
sz = 0;
|
|
else {
|
|
key_end = find_field_end(s, ks);
|
|
sz = (key_end < key_start) ? 0 : (key_end - key_start);
|
|
}
|
|
|
|
ret = bwsalloc(sz);
|
|
if (sz)
|
|
bwsnocpy(ret, s, key_start, sz);
|
|
} else
|
|
ret = bwsalloc(0);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* Preprocesses a line applying the necessary transformations
|
|
* specified by command line options and returns the preprocessed
|
|
* string, which can be used to compare.
|
|
*/
|
|
int
|
|
preproc(struct bwstring *s, struct keys_array *ka)
|
|
{
|
|
|
|
if (sort_opts_vals.kflag)
|
|
for (size_t i = 0; i < keys_num; i++) {
|
|
struct bwstring *key;
|
|
struct key_specs *kspecs;
|
|
struct sort_mods *sm;
|
|
|
|
kspecs = &(keys[i]);
|
|
key = cut_field(s, kspecs);
|
|
|
|
sm = &(kspecs->sm);
|
|
if (sm->dflag)
|
|
key = dictionary_order(key);
|
|
else if (sm->iflag)
|
|
key = ignore_nonprinting(key);
|
|
if (sm->fflag || sm->Mflag)
|
|
key = ignore_case(key);
|
|
|
|
set_key_on_keys_array(ka, key, i);
|
|
}
|
|
else {
|
|
struct bwstring *ret = NULL;
|
|
struct sort_mods *sm = default_sort_mods;
|
|
|
|
if (sm->bflag) {
|
|
if (ret == NULL)
|
|
ret = bwsdup(s);
|
|
ret = ignore_leading_blanks(ret);
|
|
}
|
|
if (sm->dflag) {
|
|
if (ret == NULL)
|
|
ret = bwsdup(s);
|
|
ret = dictionary_order(ret);
|
|
} else if (sm->iflag) {
|
|
if (ret == NULL)
|
|
ret = bwsdup(s);
|
|
ret = ignore_nonprinting(ret);
|
|
}
|
|
if (sm->fflag || sm->Mflag) {
|
|
if (ret == NULL)
|
|
ret = bwsdup(s);
|
|
ret = ignore_case(ret);
|
|
}
|
|
if (ret == NULL)
|
|
set_key_on_keys_array(ka, s, 0);
|
|
else
|
|
set_key_on_keys_array(ka, ret, 0);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
cmpcoll_t
|
|
get_sort_func(struct sort_mods *sm)
|
|
{
|
|
|
|
if (sm->nflag)
|
|
return (numcoll);
|
|
else if (sm->hflag)
|
|
return (hnumcoll);
|
|
else if (sm->gflag)
|
|
return (gnumcoll);
|
|
else if (sm->Mflag)
|
|
return (monthcoll);
|
|
else if (sm->Rflag)
|
|
return (randomcoll);
|
|
else if (sm->Vflag)
|
|
return (versioncoll);
|
|
else
|
|
return (wstrcoll);
|
|
}
|
|
|
|
/*
|
|
* Compares the given strings. Returns a positive number if
|
|
* the first precedes the second, a negative number if the second is
|
|
* the preceding one, and zero if they are equal. This function calls
|
|
* the underlying collate functions, which done the actual comparison.
|
|
*/
|
|
int
|
|
key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
|
|
{
|
|
struct sort_mods *sm;
|
|
int res = 0;
|
|
|
|
for (size_t i = 0; i < keys_num; ++i) {
|
|
sm = &(keys[i].sm);
|
|
|
|
if (sm->rflag)
|
|
res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
|
|
else
|
|
res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
|
|
|
|
if (res)
|
|
break;
|
|
|
|
/* offset applies to only the first key */
|
|
offset = 0;
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
/*
|
|
* Compare two strings.
|
|
* Plain symbol-by-symbol comparison.
|
|
*/
|
|
int
|
|
top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
|
|
{
|
|
|
|
if (default_sort_mods->rflag) {
|
|
const struct bwstring *tmp;
|
|
|
|
tmp = s1;
|
|
s1 = s2;
|
|
s2 = tmp;
|
|
}
|
|
|
|
return (bwscoll(s1, s2, 0));
|
|
}
|
|
|
|
/*
|
|
* Compare a string and a sort list item, according to the sort specs.
|
|
*/
|
|
int
|
|
str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
|
|
{
|
|
struct keys_array *ka1;
|
|
int ret = 0;
|
|
|
|
ka1 = keys_array_alloc();
|
|
|
|
preproc(str1, ka1);
|
|
|
|
sort_list_item_make_key(*ss2);
|
|
|
|
if (debug_sort) {
|
|
bwsprintf(stdout, str1, "; s1=<", ">");
|
|
bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
|
|
}
|
|
|
|
ret = key_coll(ka1, &((*ss2)->ka), 0);
|
|
|
|
if (debug_sort)
|
|
printf("; cmp1=%d", ret);
|
|
|
|
clean_keys_array(str1, ka1);
|
|
sort_free(ka1);
|
|
|
|
if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
|
|
ret = top_level_str_coll(str1, ((*ss2)->str));
|
|
if (debug_sort)
|
|
printf("; cmp2=%d", ret);
|
|
}
|
|
|
|
if (debug_sort)
|
|
printf("\n");
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* Compare two sort list items, according to the sort specs.
|
|
*/
|
|
int
|
|
list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
|
|
size_t offset)
|
|
{
|
|
int ret;
|
|
|
|
ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
|
|
|
|
if (debug_sort) {
|
|
if (offset)
|
|
printf("; offset=%d", (int) offset);
|
|
bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
|
|
bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
|
|
printf("; cmp1=%d\n", ret);
|
|
}
|
|
|
|
if (ret)
|
|
return (ret);
|
|
|
|
if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
|
|
ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
|
|
if (debug_sort)
|
|
printf("; cmp2=%d\n", ret);
|
|
}
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* Compare two sort list items, according to the sort specs.
|
|
*/
|
|
int
|
|
list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
|
|
{
|
|
|
|
return (list_coll_offset(ss1, ss2, 0));
|
|
}
|
|
|
|
#define LSCDEF(N) \
|
|
static int \
|
|
list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
|
|
{ \
|
|
\
|
|
return (list_coll_offset(ss1, ss2, N)); \
|
|
}
|
|
|
|
LSCDEF(1)
|
|
LSCDEF(2)
|
|
LSCDEF(3)
|
|
LSCDEF(4)
|
|
LSCDEF(5)
|
|
LSCDEF(6)
|
|
LSCDEF(7)
|
|
LSCDEF(8)
|
|
LSCDEF(9)
|
|
LSCDEF(10)
|
|
LSCDEF(11)
|
|
LSCDEF(12)
|
|
LSCDEF(13)
|
|
LSCDEF(14)
|
|
LSCDEF(15)
|
|
LSCDEF(16)
|
|
LSCDEF(17)
|
|
LSCDEF(18)
|
|
LSCDEF(19)
|
|
LSCDEF(20)
|
|
|
|
listcoll_t
|
|
get_list_call_func(size_t offset)
|
|
{
|
|
static const listcoll_t lsarray[] = { list_coll, list_coll_1,
|
|
list_coll_2, list_coll_3, list_coll_4, list_coll_5,
|
|
list_coll_6, list_coll_7, list_coll_8, list_coll_9,
|
|
list_coll_10, list_coll_11, list_coll_12, list_coll_13,
|
|
list_coll_14, list_coll_15, list_coll_16, list_coll_17,
|
|
list_coll_18, list_coll_19, list_coll_20 };
|
|
|
|
if (offset <= 20)
|
|
return (lsarray[offset]);
|
|
|
|
return (list_coll);
|
|
}
|
|
|
|
/*
|
|
* Compare two sort list items, only by their original string.
|
|
*/
|
|
int
|
|
list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
|
|
{
|
|
|
|
return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
|
|
}
|
|
|
|
/*
|
|
* Maximum size of a number in the string (before or after decimal point)
|
|
*/
|
|
#define MAX_NUM_SIZE (128)
|
|
|
|
/*
|
|
* Set suffix value
|
|
*/
|
|
static void setsuffix(wchar_t c, unsigned char *si)
|
|
{
|
|
switch (c){
|
|
case L'k':
|
|
case L'K':
|
|
*si = 1;
|
|
break;
|
|
case L'M':
|
|
*si = 2;
|
|
break;
|
|
case L'G':
|
|
*si = 3;
|
|
break;
|
|
case L'T':
|
|
*si = 4;
|
|
break;
|
|
case L'P':
|
|
*si = 5;
|
|
break;
|
|
case L'E':
|
|
*si = 6;
|
|
break;
|
|
case L'Z':
|
|
*si = 7;
|
|
break;
|
|
case L'Y':
|
|
*si = 8;
|
|
break;
|
|
default:
|
|
*si = 0;
|
|
};
|
|
}
|
|
|
|
/*
|
|
* Read string s and parse the string into a fixed-decimal-point number.
|
|
* sign equals -1 if the number is negative (explicit plus is not allowed,
|
|
* according to GNU sort's "info sort".
|
|
* The number part before decimal point is in the smain, after the decimal
|
|
* point is in sfrac, tail is the pointer to the remainder of the string.
|
|
*/
|
|
static int
|
|
read_number(struct bwstring *s0, int *sign, wchar_t *smain, int *main_len, wchar_t *sfrac, int *frac_len, unsigned char *si)
|
|
{
|
|
bwstring_iterator s;
|
|
|
|
s = bws_begin(s0);
|
|
|
|
/* always end the fraction with zero, even if we have no fraction */
|
|
sfrac[0] = 0;
|
|
|
|
while (iswblank(bws_get_iter_value(s)))
|
|
s = bws_iterator_inc(s, 1);
|
|
|
|
if (bws_get_iter_value(s) == symbol_negative_sign) {
|
|
*sign = -1;
|
|
s = bws_iterator_inc(s, 1);
|
|
}
|
|
|
|
// This is '0', not '\0', do not change this
|
|
while (iswdigit(bws_get_iter_value(s)) &&
|
|
(bws_get_iter_value(s) == L'0'))
|
|
s = bws_iterator_inc(s, 1);
|
|
|
|
while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
|
|
if (iswdigit(bws_get_iter_value(s))) {
|
|
smain[*main_len] = bws_get_iter_value(s);
|
|
s = bws_iterator_inc(s, 1);
|
|
*main_len += 1;
|
|
} else if (symbol_thousands_sep &&
|
|
(bws_get_iter_value(s) == symbol_thousands_sep))
|
|
s = bws_iterator_inc(s, 1);
|
|
else
|
|
break;
|
|
}
|
|
|
|
smain[*main_len] = 0;
|
|
|
|
if (bws_get_iter_value(s) == symbol_decimal_point) {
|
|
s = bws_iterator_inc(s, 1);
|
|
while (iswdigit(bws_get_iter_value(s)) &&
|
|
*frac_len < MAX_NUM_SIZE) {
|
|
sfrac[*frac_len] = bws_get_iter_value(s);
|
|
s = bws_iterator_inc(s, 1);
|
|
*frac_len += 1;
|
|
}
|
|
sfrac[*frac_len] = 0;
|
|
|
|
while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
|
|
--(*frac_len);
|
|
sfrac[*frac_len] = L'\0';
|
|
}
|
|
}
|
|
|
|
setsuffix(bws_get_iter_value(s),si);
|
|
|
|
if ((*main_len + *frac_len) == 0)
|
|
*sign = 0;
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Implements string sort.
|
|
*/
|
|
static int
|
|
wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
|
|
{
|
|
|
|
if (debug_sort) {
|
|
if (offset)
|
|
printf("; offset=%d\n", (int) offset);
|
|
bwsprintf(stdout, kv1->k, "; k1=<", ">");
|
|
printf("(%zu)", BWSLEN(kv1->k));
|
|
bwsprintf(stdout, kv2->k, ", k2=<", ">");
|
|
printf("(%zu)", BWSLEN(kv2->k));
|
|
}
|
|
|
|
return (bwscoll(kv1->k, kv2->k, offset));
|
|
}
|
|
|
|
/*
|
|
* Compare two suffixes
|
|
*/
|
|
static inline int
|
|
cmpsuffix(unsigned char si1, unsigned char si2)
|
|
{
|
|
|
|
return ((char)si1 - (char)si2);
|
|
}
|
|
|
|
/*
|
|
* Implements numeric sort for -n and -h.
|
|
*/
|
|
static int
|
|
numcoll_impl(struct key_value *kv1, struct key_value *kv2, size_t offset, bool use_suffix)
|
|
{
|
|
struct bwstring *s1, *s2;
|
|
wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
|
|
wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
|
|
int cmp_res, frac1, frac2, main1, main2, sign1, sign2;
|
|
unsigned char SI1, SI2;
|
|
bool e1, e2, key1_read, key2_read;
|
|
|
|
s1 = kv1->k;
|
|
s2 = kv2->k;
|
|
sign1 = sign2 = 0;
|
|
main1 = main2 = 0;
|
|
frac1 = frac2 = 0;
|
|
|
|
cmp_res = 0;
|
|
key1_read = key2_read = false;
|
|
|
|
UNUSED_ARG(offset);
|
|
|
|
if (debug_sort) {
|
|
bwsprintf(stdout, s1, "; k1=<", ">");
|
|
bwsprintf(stdout, s2, ", k2=<", ">");
|
|
}
|
|
|
|
if (s1 == s2)
|
|
return (0);
|
|
|
|
if (kv1->hint->status == HS_UNINITIALIZED) {
|
|
/* read the number from the string */
|
|
read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
|
|
key1_read = true;
|
|
kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
|
|
if(main1 < 1 && frac1 < 1)
|
|
kv1->hint->v.nh.empty=true;
|
|
kv1->hint->v.nh.si = SI1;
|
|
kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
|
|
HS_INITIALIZED : HS_ERROR;
|
|
kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
|
|
}
|
|
|
|
if (kv2->hint->status == HS_UNINITIALIZED) {
|
|
/* read the number from the string */
|
|
read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
|
|
key2_read = true;
|
|
kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
|
|
if(main2 < 1 && frac2 < 1)
|
|
kv2->hint->v.nh.empty=true;
|
|
kv2->hint->v.nh.si = SI2;
|
|
kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
|
|
HS_INITIALIZED : HS_ERROR;
|
|
kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
|
|
}
|
|
|
|
if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
|
|
HS_INITIALIZED) {
|
|
unsigned long long n1, n2;
|
|
bool neg1, neg2;
|
|
|
|
e1 = kv1->hint->v.nh.empty;
|
|
e2 = kv2->hint->v.nh.empty;
|
|
|
|
if (e1 && e2)
|
|
return (0);
|
|
|
|
neg1 = kv1->hint->v.nh.neg;
|
|
neg2 = kv2->hint->v.nh.neg;
|
|
|
|
if (neg1 && !neg2)
|
|
return (-1);
|
|
if (neg2 && !neg1)
|
|
return (+1);
|
|
|
|
if (e1)
|
|
return (neg2 ? +1 : -1);
|
|
else if (e2)
|
|
return (neg1 ? -1 : +1);
|
|
|
|
|
|
if (use_suffix) {
|
|
cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
|
|
if (cmp_res)
|
|
return (neg1 ? -cmp_res : cmp_res);
|
|
}
|
|
|
|
n1 = kv1->hint->v.nh.n1;
|
|
n2 = kv2->hint->v.nh.n1;
|
|
if (n1 < n2)
|
|
return (neg1 ? +1 : -1);
|
|
else if (n1 > n2)
|
|
return (neg1 ? -1 : +1);
|
|
}
|
|
|
|
/* read the numbers from the strings */
|
|
if (!key1_read)
|
|
read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
|
|
if (!key2_read)
|
|
read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
|
|
|
|
e1 = ((main1 + frac1) == 0);
|
|
e2 = ((main2 + frac2) == 0);
|
|
|
|
if (e1 && e2)
|
|
return (0);
|
|
|
|
/* we know the result if the signs are different */
|
|
if (sign1 < 0 && sign2 >= 0)
|
|
return (-1);
|
|
if (sign1 >= 0 && sign2 < 0)
|
|
return (+1);
|
|
|
|
if (e1)
|
|
return ((sign2 < 0) ? +1 : -1);
|
|
else if (e2)
|
|
return ((sign1 < 0) ? -1 : +1);
|
|
|
|
if (use_suffix) {
|
|
cmp_res = cmpsuffix(SI1, SI2);
|
|
if (cmp_res)
|
|
return ((sign1 < 0) ? -cmp_res : cmp_res);
|
|
}
|
|
|
|
/* if both numbers are empty assume that the strings are equal */
|
|
if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
|
|
return (0);
|
|
|
|
/*
|
|
* if the main part is of different size, we know the result
|
|
* (because the leading zeros are removed)
|
|
*/
|
|
if (main1 < main2)
|
|
cmp_res = -1;
|
|
else if (main1 > main2)
|
|
cmp_res = +1;
|
|
/* if the sizes are equal then simple non-collate string compare gives the correct result */
|
|
else
|
|
cmp_res = wcscmp(smain1, smain2);
|
|
|
|
/* check fraction */
|
|
if (!cmp_res)
|
|
cmp_res = wcscmp(sfrac1, sfrac2);
|
|
|
|
if (!cmp_res)
|
|
return (0);
|
|
|
|
/* reverse result if the signs are negative */
|
|
if (sign1 < 0 && sign2 < 0)
|
|
cmp_res = -cmp_res;
|
|
|
|
return (cmp_res);
|
|
}
|
|
|
|
/*
|
|
* Implements numeric sort (-n).
|
|
*/
|
|
static int
|
|
numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
|
|
{
|
|
|
|
return (numcoll_impl(kv1, kv2, offset, false));
|
|
}
|
|
|
|
/*
|
|
* Implements 'human' numeric sort (-h).
|
|
*/
|
|
static int
|
|
hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
|
|
{
|
|
|
|
return (numcoll_impl(kv1, kv2, offset, true));
|
|
}
|
|
|
|
/*
|
|
* Implements random sort (-R).
|
|
*/
|
|
static int
|
|
randomcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
|
|
{
|
|
struct bwstring *s1, *s2;
|
|
MD5_CTX ctx1, ctx2;
|
|
char *b1, *b2;
|
|
|
|
UNUSED_ARG(offset);
|
|
|
|
s1 = kv1->k;
|
|
s2 = kv2->k;
|
|
|
|
if (debug_sort) {
|
|
bwsprintf(stdout, s1, "; k1=<", ">");
|
|
bwsprintf(stdout, s2, ", k2=<", ">");
|
|
}
|
|
|
|
if (s1 == s2)
|
|
return (0);
|
|
|
|
memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
|
|
memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
|
|
|
|
MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
|
|
MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
|
|
b1 = MD5End(&ctx1, NULL);
|
|
b2 = MD5End(&ctx2, NULL);
|
|
if (b1 == NULL) {
|
|
if (b2 == NULL)
|
|
return (0);
|
|
else {
|
|
sort_free(b2);
|
|
return (-1);
|
|
}
|
|
} else if (b2 == NULL) {
|
|
sort_free(b1);
|
|
return (+1);
|
|
} else {
|
|
int cmp_res;
|
|
|
|
cmp_res = strcmp(b1,b2);
|
|
sort_free(b1);
|
|
sort_free(b2);
|
|
|
|
if (!cmp_res)
|
|
cmp_res = bwscoll(s1, s2, 0);
|
|
|
|
return (cmp_res);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Implements version sort (-V).
|
|
*/
|
|
static int
|
|
versioncoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
|
|
{
|
|
struct bwstring *s1, *s2;
|
|
|
|
UNUSED_ARG(offset);
|
|
|
|
s1 = kv1->k;
|
|
s2 = kv2->k;
|
|
|
|
if (debug_sort) {
|
|
bwsprintf(stdout, s1, "; k1=<", ">");
|
|
bwsprintf(stdout, s2, ", k2=<", ">");
|
|
}
|
|
|
|
if (s1 == s2)
|
|
return (0);
|
|
|
|
return (vcmp(s1, s2));
|
|
}
|
|
|
|
/*
|
|
* Check for minus infinity
|
|
*/
|
|
static inline bool
|
|
huge_minus(double d, int err1)
|
|
{
|
|
|
|
if (err1 == ERANGE)
|
|
if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
|
|
return (+1);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Check for plus infinity
|
|
*/
|
|
static inline bool
|
|
huge_plus(double d, int err1)
|
|
{
|
|
|
|
if (err1 == ERANGE)
|
|
if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
|
|
return (+1);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Check whether a function is a NAN
|
|
*/
|
|
static bool
|
|
is_nan(double d)
|
|
{
|
|
|
|
return ((d == NAN) || (isnan(d)));
|
|
}
|
|
|
|
/*
|
|
* Compare two NANs
|
|
*/
|
|
static int
|
|
cmp_nans(double d1, double d2)
|
|
{
|
|
|
|
if (d1 < d2)
|
|
return (-1);
|
|
if (d2 > d2)
|
|
return (+1);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Implements general numeric sort (-g).
|
|
*/
|
|
static int
|
|
gnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
|
|
{
|
|
double d1, d2;
|
|
int err1, err2;
|
|
bool empty1, empty2, key1_read, key2_read;
|
|
|
|
d1 = d2 = 0;
|
|
err1 = err2 = 0;
|
|
key1_read = key2_read = false;
|
|
|
|
UNUSED_ARG(offset);
|
|
|
|
if (debug_sort) {
|
|
bwsprintf(stdout, kv1->k, "; k1=<", ">");
|
|
bwsprintf(stdout, kv2->k, "; k2=<", ">");
|
|
}
|
|
|
|
if (kv1->hint->status == HS_UNINITIALIZED) {
|
|
errno = 0;
|
|
d1 = bwstod(kv1->k, &empty1);
|
|
err1 = errno;
|
|
|
|
if (empty1)
|
|
kv1->hint->v.gh.notnum = true;
|
|
else if (err1 == 0) {
|
|
kv1->hint->v.gh.d = d1;
|
|
kv1->hint->v.gh.nan = is_nan(d1);
|
|
kv1->hint->status = HS_INITIALIZED;
|
|
} else
|
|
kv1->hint->status = HS_ERROR;
|
|
|
|
key1_read = true;
|
|
}
|
|
|
|
if (kv2->hint->status == HS_UNINITIALIZED) {
|
|
errno = 0;
|
|
d2 = bwstod(kv2->k, &empty2);
|
|
err2 = errno;
|
|
|
|
if (empty2)
|
|
kv2->hint->v.gh.notnum = true;
|
|
else if (err2 == 0) {
|
|
kv2->hint->v.gh.d = d2;
|
|
kv2->hint->v.gh.nan = is_nan(d2);
|
|
kv2->hint->status = HS_INITIALIZED;
|
|
} else
|
|
kv2->hint->status = HS_ERROR;
|
|
|
|
key2_read = true;
|
|
}
|
|
|
|
if (kv1->hint->status == HS_INITIALIZED &&
|
|
kv2->hint->status == HS_INITIALIZED) {
|
|
if (kv1->hint->v.gh.notnum)
|
|
return ((kv2->hint->v.gh.notnum) ? 0 : -1);
|
|
else if (kv2->hint->v.gh.notnum)
|
|
return (+1);
|
|
|
|
if (kv1->hint->v.gh.nan)
|
|
return ((kv2->hint->v.gh.nan) ?
|
|
cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
|
|
-1);
|
|
else if (kv2->hint->v.gh.nan)
|
|
return (+1);
|
|
|
|
d1 = kv1->hint->v.gh.d;
|
|
d2 = kv2->hint->v.gh.d;
|
|
|
|
if (d1 < d2)
|
|
return (-1);
|
|
else if (d1 > d2)
|
|
return (+1);
|
|
else
|
|
return (0);
|
|
}
|
|
|
|
if (!key1_read) {
|
|
errno = 0;
|
|
d1 = bwstod(kv1->k, &empty1);
|
|
err1 = errno;
|
|
}
|
|
|
|
if (!key2_read) {
|
|
errno = 0;
|
|
d2 = bwstod(kv2->k, &empty2);
|
|
err2 = errno;
|
|
}
|
|
|
|
/* Non-value case: */
|
|
if (empty1)
|
|
return (empty2 ? 0 : -1);
|
|
else if (empty2)
|
|
return (+1);
|
|
|
|
/* NAN case */
|
|
if (is_nan(d1))
|
|
return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
|
|
else if (is_nan(d2))
|
|
return (+1);
|
|
|
|
/* Infinities */
|
|
if (err1 == ERANGE || err2 == ERANGE) {
|
|
/* Minus infinity case */
|
|
if (huge_minus(d1, err1)) {
|
|
if (huge_minus(d2, err2)) {
|
|
if (d1 < d2)
|
|
return (-1);
|
|
if (d1 > d2)
|
|
return (+1);
|
|
return (0);
|
|
} else
|
|
return (-1);
|
|
|
|
} else if (huge_minus(d2, err2)) {
|
|
if (huge_minus(d1, err1)) {
|
|
if (d1 < d2)
|
|
return (-1);
|
|
if (d1 > d2)
|
|
return (+1);
|
|
return (0);
|
|
} else
|
|
return (+1);
|
|
}
|
|
|
|
/* Plus infinity case */
|
|
if (huge_plus(d1, err1)) {
|
|
if (huge_plus(d2, err2)) {
|
|
if (d1 < d2)
|
|
return (-1);
|
|
if (d1 > d2)
|
|
return (+1);
|
|
return (0);
|
|
} else
|
|
return (+1);
|
|
} else if (huge_plus(d2, err2)) {
|
|
if (huge_plus(d1, err1)) {
|
|
if (d1 < d2)
|
|
return (-1);
|
|
if (d1 > d2)
|
|
return (+1);
|
|
return (0);
|
|
} else
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
if (d1 < d2)
|
|
return (-1);
|
|
if (d1 > d2)
|
|
return (+1);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Implements month sort (-M).
|
|
*/
|
|
static int
|
|
monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
|
|
{
|
|
int val1, val2;
|
|
bool key1_read, key2_read;
|
|
|
|
val1 = val2 = 0;
|
|
key1_read = key2_read = false;
|
|
|
|
UNUSED_ARG(offset);
|
|
|
|
if (debug_sort) {
|
|
bwsprintf(stdout, kv1->k, "; k1=<", ">");
|
|
bwsprintf(stdout, kv2->k, "; k2=<", ">");
|
|
}
|
|
|
|
if (kv1->hint->status == HS_UNINITIALIZED) {
|
|
kv1->hint->v.Mh.m = bws_month_score(kv1->k);
|
|
key1_read = true;
|
|
kv1->hint->status = HS_INITIALIZED;
|
|
}
|
|
|
|
if (kv2->hint->status == HS_UNINITIALIZED) {
|
|
kv2->hint->v.Mh.m = bws_month_score(kv2->k);
|
|
key2_read = true;
|
|
kv2->hint->status = HS_INITIALIZED;
|
|
}
|
|
|
|
if (kv1->hint->status == HS_INITIALIZED) {
|
|
val1 = kv1->hint->v.Mh.m;
|
|
key1_read = true;
|
|
}
|
|
|
|
if (kv2->hint->status == HS_INITIALIZED) {
|
|
val2 = kv2->hint->v.Mh.m;
|
|
key2_read = true;
|
|
}
|
|
|
|
if (!key1_read)
|
|
val1 = bws_month_score(kv1->k);
|
|
if (!key2_read)
|
|
val2 = bws_month_score(kv2->k);
|
|
|
|
if (val1 == val2) {
|
|
return (0);
|
|
}
|
|
if (val1 < val2)
|
|
return (-1);
|
|
return (+1);
|
|
}
|