- Only use multi-threading for large files

- Do not use mmap() by default; it can be enabled by --mmap
- Add some minor optimizations for -u
- Update manual page according to the changes

Submitted by:	Oleg Moskalenko <oleg.moskalenko@citrix.com>
This commit is contained in:
Gabor Kovesdan 2012-05-25 09:30:16 +00:00
parent 39e19560d6
commit 5ca724dc59
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=235987
7 changed files with 90 additions and 23 deletions

View File

@ -499,6 +499,22 @@ bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
}
return (bwssbdup(ret, *len));
} else if (!zero_ended && (MB_CUR_MAX == 1)) {
char *ret;
ret = fgetln(f, len);
if (ret == NULL) {
if (!feof(f))
err(2, NULL);
return (NULL);
}
if (*len > 0) {
if (ret[*len - 1] == '\n')
--(*len);
}
return (bwscsbdup(ret, *len));
} else {
wchar_t c = 0;

View File

@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
unsigned long long free_memory = 1000000;
unsigned long long available_free_memory = 1000000;
bool use_mmap;
const char *tmpdir = "/var/tmp";
const char *compress_program;
@ -404,23 +406,21 @@ sort_list_dump(struct sort_list *l, const char *fn)
err(2, NULL);
if (l->list) {
struct sort_list_item *last_printed_item;
size_t i;
last_printed_item = NULL;
for (i = 0; i < l->count; i++) {
struct sort_list_item *item;
item = l->list[i];
if (!(sort_opts_vals.uflag) ||
(last_printed_item == NULL) ||
list_coll(&last_printed_item, &item)) {
bwsfwrite(item->str, f,
if (!(sort_opts_vals.uflag)) {
for (i = 0; i < l->count; ++i)
bwsfwrite(l->list[i]->str, f,
sort_opts_vals.zflag);
if (sort_opts_vals.uflag)
} else {
struct sort_list_item *last_printed_item = NULL;
struct sort_list_item *item;
for (i = 0; i < l->count; ++i) {
item = l->list[i];
if ((last_printed_item == NULL) ||
list_coll(&last_printed_item, &item)) {
bwsfwrite(item->str, f, sort_opts_vals.zflag);
last_printed_item = item;
}
}
}
}
@ -657,7 +657,7 @@ file_reader_init(const char *fsrc)
ret->fname = sort_strdup(fsrc);
if (strcmp(fsrc, "-") && (compress_program == NULL)) {
if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) {
do {
struct stat stat_buf;
@ -1539,7 +1539,9 @@ mt_sort(struct sort_list *list,
const char* fn)
{
#if defined(SORT_THREADS)
if (nthreads < 2 || list->count < nthreads) {
if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) {
size_t nthreads_save = nthreads;
nthreads = 1;
#endif
/* if single thread or small data, do simple sort */
sort_func(list->list, list->count,
@ -1547,6 +1549,7 @@ mt_sort(struct sort_list *list,
(int(*)(const void *, const void *)) list_coll);
sort_list_dump(list, fn);
#if defined(SORT_THREADS)
nthreads = nthreads_save;
} else {
/* multi-threaded sort */
struct sort_list **parts;
@ -1590,7 +1593,18 @@ mt_sort(struct sort_list *list,
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
pthread_create(&pth, &attr, mt_sort_thread, parts[i]);
for (;;) {
int res = pthread_create(&pth, &attr,
mt_sort_thread, parts[i]);
if (res >= 0)
break;
if (errno == EAGAIN) {
pthread_yield();
continue;
}
err(2, NULL);
}
pthread_attr_destroy(&attr);
}

View File

@ -84,6 +84,9 @@ struct file0_reader
extern unsigned long long free_memory;
extern unsigned long long available_free_memory;
/* Are we using mmap ? */
extern bool use_mmap;
/* temporary file dir */
extern const char *tmpdir;

View File

@ -609,7 +609,17 @@ run_top_sort_level(struct sort_level *sl)
pthread_attr_setdetachstate(&attr,
PTHREAD_DETACHED);
pthread_create(&pth, &attr, sort_thread, NULL);
for (;;) {
int res = pthread_create(&pth, &attr,
sort_thread, NULL);
if (res >= 0)
break;
if (errno == EAGAIN) {
pthread_yield();
continue;
}
err(2, NULL);
}
pthread_attr_destroy(&attr);
}
@ -626,6 +636,10 @@ run_sort(struct sort_list_item **base, size_t nmemb)
struct sort_level *sl;
#if defined(SORT_THREADS)
size_t nthreads_save = nthreads;
if (nmemb < MT_SORT_THRESHOLD)
nthreads = 1;
if (nthreads > 1) {
pthread_mutexattr_t mattr;
@ -663,6 +677,7 @@ run_sort(struct sort_list_item **base, size_t nmemb)
pthread_mutex_destroy(&g_ls_mutex);
pthread_mutex_destroy(&sort_left_mutex);
}
nthreads = nthreads_save;
#endif
}

View File

@ -33,7 +33,7 @@
.\"
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
.\"
.Dd May 6, 2012
.Dd May 25, 2012
.Dt SORT 1
.Os
.Sh NAME
@ -358,6 +358,9 @@ This sort algorithm cannot be used with
.Fl u
and
.Fl s .
.It Fl Fl mmap
Try to use file memory mapping system call.
It may increase speed in some cases.
.El
.Pp
The following operands are available:

View File

@ -89,6 +89,7 @@ const char *nlsstr[] = { "",
"[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
"[-o outfile] [--batch-size size] [--files0-from file] "
"[--heapsort] [--mergesort] [--radixsort] [--qsort] "
"[--mmap] "
#if defined(SORT_THREADS)
"[--nthreads thread_no] "
#endif
@ -138,7 +139,8 @@ enum
QSORT_OPT,
MERGESORT_OPT,
HEAPSORT_OPT,
RADIXSORT_OPT
RADIXSORT_OPT,
MMAP_OPT
};
#define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
@ -164,6 +166,7 @@ struct option long_options[] = {
{ "key", required_argument, NULL, 'k' },
{ "merge", no_argument, NULL, 'm' },
{ "mergesort", no_argument, NULL, MERGESORT_OPT },
{ "mmap", no_argument, NULL, MMAP_OPT },
{ "month-sort", no_argument, NULL, 'M' },
{ "numeric-sort", no_argument, NULL, 'n' },
{ "output", required_argument, NULL, 'o' },
@ -1063,12 +1066,16 @@ main(int argc, char **argv)
tmpdir = sort_strdup(optarg);
break;
case 't':
if (strlen(optarg) > 1) {
if (strcmp(optarg, "\\0")) {
while (strlen(optarg) > 1) {
if (optarg[0] != '\\') {
errx(2, "%s: %s\n",
strerror(EINVAL), optarg);
}
*optarg = 0;
optarg += 1;
if (*optarg == '0') {
*optarg = 0;
break;
}
}
sort_opts_vals.tflag = true;
sort_opts_vals.field_sep = btowc(optarg[0]);
@ -1126,6 +1133,9 @@ main(int argc, char **argv)
case MERGESORT_OPT:
sort_opts_vals.sort_method = SORT_MERGESORT;
break;
case MMAP_OPT:
use_mmap = true;
break;
case HEAPSORT_OPT:
sort_opts_vals.sort_method = SORT_HEAPSORT;
break;
@ -1258,6 +1268,11 @@ main(int argc, char **argv)
}
}
#if defined(SORT_THREADS)
if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
nthreads = 1;
#endif
if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
struct file_list fl;
struct sort_list list;

View File

@ -55,6 +55,7 @@ extern nl_catd catalog;
extern const char *nlsstr[];
#if defined(SORT_THREADS)
#define MT_SORT_THRESHOLD (10000)
extern size_t ncpu;
extern size_t nthreads;
#endif