- Only use multi-threading for large files

- Do not use mmap() by default; it can be enabled by --mmap
- Add some minor optimizations for -u
- Update manual page according to the changes

Submitted by:	Oleg Moskalenko <oleg.moskalenko@citrix.com>
This commit is contained in:
Gabor Kovesdan 2012-05-25 09:30:16 +00:00
parent 39e19560d6
commit 5ca724dc59
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=235987
7 changed files with 90 additions and 23 deletions

View File

@ -499,6 +499,22 @@ bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
} }
return (bwssbdup(ret, *len)); return (bwssbdup(ret, *len));
} else if (!zero_ended && (MB_CUR_MAX == 1)) {
char *ret;
ret = fgetln(f, len);
if (ret == NULL) {
if (!feof(f))
err(2, NULL);
return (NULL);
}
if (*len > 0) {
if (ret[*len - 1] == '\n')
--(*len);
}
return (bwscsbdup(ret, *len));
} else { } else {
wchar_t c = 0; wchar_t c = 0;

View File

@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
unsigned long long free_memory = 1000000; unsigned long long free_memory = 1000000;
unsigned long long available_free_memory = 1000000; unsigned long long available_free_memory = 1000000;
bool use_mmap;
const char *tmpdir = "/var/tmp"; const char *tmpdir = "/var/tmp";
const char *compress_program; const char *compress_program;
@ -404,23 +406,21 @@ sort_list_dump(struct sort_list *l, const char *fn)
err(2, NULL); err(2, NULL);
if (l->list) { if (l->list) {
struct sort_list_item *last_printed_item;
size_t i; size_t i;
if (!(sort_opts_vals.uflag)) {
last_printed_item = NULL; for (i = 0; i < l->count; ++i)
bwsfwrite(l->list[i]->str, f,
for (i = 0; i < l->count; i++) {
struct sort_list_item *item;
item = l->list[i];
if (!(sort_opts_vals.uflag) ||
(last_printed_item == NULL) ||
list_coll(&last_printed_item, &item)) {
bwsfwrite(item->str, f,
sort_opts_vals.zflag); sort_opts_vals.zflag);
if (sort_opts_vals.uflag) } else {
struct sort_list_item *last_printed_item = NULL;
struct sort_list_item *item;
for (i = 0; i < l->count; ++i) {
item = l->list[i];
if ((last_printed_item == NULL) ||
list_coll(&last_printed_item, &item)) {
bwsfwrite(item->str, f, sort_opts_vals.zflag);
last_printed_item = item; last_printed_item = item;
}
} }
} }
} }
@ -657,7 +657,7 @@ file_reader_init(const char *fsrc)
ret->fname = sort_strdup(fsrc); ret->fname = sort_strdup(fsrc);
if (strcmp(fsrc, "-") && (compress_program == NULL)) { if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) {
do { do {
struct stat stat_buf; struct stat stat_buf;
@ -1539,7 +1539,9 @@ mt_sort(struct sort_list *list,
const char* fn) const char* fn)
{ {
#if defined(SORT_THREADS) #if defined(SORT_THREADS)
if (nthreads < 2 || list->count < nthreads) { if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) {
size_t nthreads_save = nthreads;
nthreads = 1;
#endif #endif
/* if single thread or small data, do simple sort */ /* if single thread or small data, do simple sort */
sort_func(list->list, list->count, sort_func(list->list, list->count,
@ -1547,6 +1549,7 @@ mt_sort(struct sort_list *list,
(int(*)(const void *, const void *)) list_coll); (int(*)(const void *, const void *)) list_coll);
sort_list_dump(list, fn); sort_list_dump(list, fn);
#if defined(SORT_THREADS) #if defined(SORT_THREADS)
nthreads = nthreads_save;
} else { } else {
/* multi-threaded sort */ /* multi-threaded sort */
struct sort_list **parts; struct sort_list **parts;
@ -1590,7 +1593,18 @@ mt_sort(struct sort_list *list,
pthread_attr_init(&attr); pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED); pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
pthread_create(&pth, &attr, mt_sort_thread, parts[i]); for (;;) {
int res = pthread_create(&pth, &attr,
mt_sort_thread, parts[i]);
if (res >= 0)
break;
if (errno == EAGAIN) {
pthread_yield();
continue;
}
err(2, NULL);
}
pthread_attr_destroy(&attr); pthread_attr_destroy(&attr);
} }

View File

@ -84,6 +84,9 @@ struct file0_reader
extern unsigned long long free_memory; extern unsigned long long free_memory;
extern unsigned long long available_free_memory; extern unsigned long long available_free_memory;
/* Are we using mmap ? */
extern bool use_mmap;
/* temporary file dir */ /* temporary file dir */
extern const char *tmpdir; extern const char *tmpdir;

View File

@ -609,7 +609,17 @@ run_top_sort_level(struct sort_level *sl)
pthread_attr_setdetachstate(&attr, pthread_attr_setdetachstate(&attr,
PTHREAD_DETACHED); PTHREAD_DETACHED);
pthread_create(&pth, &attr, sort_thread, NULL); for (;;) {
int res = pthread_create(&pth, &attr,
sort_thread, NULL);
if (res >= 0)
break;
if (errno == EAGAIN) {
pthread_yield();
continue;
}
err(2, NULL);
}
pthread_attr_destroy(&attr); pthread_attr_destroy(&attr);
} }
@ -626,6 +636,10 @@ run_sort(struct sort_list_item **base, size_t nmemb)
struct sort_level *sl; struct sort_level *sl;
#if defined(SORT_THREADS) #if defined(SORT_THREADS)
size_t nthreads_save = nthreads;
if (nmemb < MT_SORT_THRESHOLD)
nthreads = 1;
if (nthreads > 1) { if (nthreads > 1) {
pthread_mutexattr_t mattr; pthread_mutexattr_t mattr;
@ -663,6 +677,7 @@ run_sort(struct sort_list_item **base, size_t nmemb)
pthread_mutex_destroy(&g_ls_mutex); pthread_mutex_destroy(&g_ls_mutex);
pthread_mutex_destroy(&sort_left_mutex); pthread_mutex_destroy(&sort_left_mutex);
} }
nthreads = nthreads_save;
#endif #endif
} }

View File

@ -33,7 +33,7 @@
.\" .\"
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93 .\" @(#)sort.1 8.1 (Berkeley) 6/6/93
.\" .\"
.Dd May 6, 2012 .Dd May 25, 2012
.Dt SORT 1 .Dt SORT 1
.Os .Os
.Sh NAME .Sh NAME
@ -358,6 +358,9 @@ This sort algorithm cannot be used with
.Fl u .Fl u
and and
.Fl s . .Fl s .
.It Fl Fl mmap
Try to use file memory mapping system call.
It may increase speed in some cases.
.El .El
.Pp .Pp
The following operands are available: The following operands are available:

View File

@ -89,6 +89,7 @@ const char *nlsstr[] = { "",
"[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
"[-o outfile] [--batch-size size] [--files0-from file] " "[-o outfile] [--batch-size size] [--files0-from file] "
"[--heapsort] [--mergesort] [--radixsort] [--qsort] " "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
"[--mmap] "
#if defined(SORT_THREADS) #if defined(SORT_THREADS)
"[--nthreads thread_no] " "[--nthreads thread_no] "
#endif #endif
@ -138,7 +139,8 @@ enum
QSORT_OPT, QSORT_OPT,
MERGESORT_OPT, MERGESORT_OPT,
HEAPSORT_OPT, HEAPSORT_OPT,
RADIXSORT_OPT RADIXSORT_OPT,
MMAP_OPT
}; };
#define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
@ -164,6 +166,7 @@ struct option long_options[] = {
{ "key", required_argument, NULL, 'k' }, { "key", required_argument, NULL, 'k' },
{ "merge", no_argument, NULL, 'm' }, { "merge", no_argument, NULL, 'm' },
{ "mergesort", no_argument, NULL, MERGESORT_OPT }, { "mergesort", no_argument, NULL, MERGESORT_OPT },
{ "mmap", no_argument, NULL, MMAP_OPT },
{ "month-sort", no_argument, NULL, 'M' }, { "month-sort", no_argument, NULL, 'M' },
{ "numeric-sort", no_argument, NULL, 'n' }, { "numeric-sort", no_argument, NULL, 'n' },
{ "output", required_argument, NULL, 'o' }, { "output", required_argument, NULL, 'o' },
@ -1063,12 +1066,16 @@ main(int argc, char **argv)
tmpdir = sort_strdup(optarg); tmpdir = sort_strdup(optarg);
break; break;
case 't': case 't':
if (strlen(optarg) > 1) { while (strlen(optarg) > 1) {
if (strcmp(optarg, "\\0")) { if (optarg[0] != '\\') {
errx(2, "%s: %s\n", errx(2, "%s: %s\n",
strerror(EINVAL), optarg); strerror(EINVAL), optarg);
} }
*optarg = 0; optarg += 1;
if (*optarg == '0') {
*optarg = 0;
break;
}
} }
sort_opts_vals.tflag = true; sort_opts_vals.tflag = true;
sort_opts_vals.field_sep = btowc(optarg[0]); sort_opts_vals.field_sep = btowc(optarg[0]);
@ -1126,6 +1133,9 @@ main(int argc, char **argv)
case MERGESORT_OPT: case MERGESORT_OPT:
sort_opts_vals.sort_method = SORT_MERGESORT; sort_opts_vals.sort_method = SORT_MERGESORT;
break; break;
case MMAP_OPT:
use_mmap = true;
break;
case HEAPSORT_OPT: case HEAPSORT_OPT:
sort_opts_vals.sort_method = SORT_HEAPSORT; sort_opts_vals.sort_method = SORT_HEAPSORT;
break; break;
@ -1258,6 +1268,11 @@ main(int argc, char **argv)
} }
} }
#if defined(SORT_THREADS)
if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
nthreads = 1;
#endif
if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
struct file_list fl; struct file_list fl;
struct sort_list list; struct sort_list list;

View File

@ -55,6 +55,7 @@ extern nl_catd catalog;
extern const char *nlsstr[]; extern const char *nlsstr[];
#if defined(SORT_THREADS) #if defined(SORT_THREADS)
#define MT_SORT_THRESHOLD (10000)
extern size_t ncpu; extern size_t ncpu;
extern size_t nthreads; extern size_t nthreads;
#endif #endif