diff --git a/usr.bin/sort/bwstring.c b/usr.bin/sort/bwstring.c index 9ebeb10d6e89..5733732eac6f 100644 --- a/usr.bin/sort/bwstring.c +++ b/usr.bin/sort/bwstring.c @@ -499,6 +499,22 @@ bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) } return (bwssbdup(ret, *len)); + } else if (!zero_ended && (MB_CUR_MAX == 1)) { + char *ret; + + ret = fgetln(f, len); + + if (ret == NULL) { + if (!feof(f)) + err(2, NULL); + return (NULL); + } + if (*len > 0) { + if (ret[*len - 1] == '\n') + --(*len); + } + return (bwscsbdup(ret, *len)); + } else { wchar_t c = 0; diff --git a/usr.bin/sort/file.c b/usr.bin/sort/file.c index 9afa4b2655f6..68c3f548e717 100644 --- a/usr.bin/sort/file.c +++ b/usr.bin/sort/file.c @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); unsigned long long free_memory = 1000000; unsigned long long available_free_memory = 1000000; +bool use_mmap; + const char *tmpdir = "/var/tmp"; const char *compress_program; @@ -404,23 +406,21 @@ sort_list_dump(struct sort_list *l, const char *fn) err(2, NULL); if (l->list) { - struct sort_list_item *last_printed_item; size_t i; - - last_printed_item = NULL; - - for (i = 0; i < l->count; i++) { - struct sort_list_item *item; - - item = l->list[i]; - - if (!(sort_opts_vals.uflag) || - (last_printed_item == NULL) || - list_coll(&last_printed_item, &item)) { - bwsfwrite(item->str, f, + if (!(sort_opts_vals.uflag)) { + for (i = 0; i < l->count; ++i) + bwsfwrite(l->list[i]->str, f, sort_opts_vals.zflag); - if (sort_opts_vals.uflag) + } else { + struct sort_list_item *last_printed_item = NULL; + struct sort_list_item *item; + for (i = 0; i < l->count; ++i) { + item = l->list[i]; + if ((last_printed_item == NULL) || + list_coll(&last_printed_item, &item)) { + bwsfwrite(item->str, f, sort_opts_vals.zflag); last_printed_item = item; + } } } } @@ -657,7 +657,7 @@ file_reader_init(const char *fsrc) ret->fname = sort_strdup(fsrc); - if (strcmp(fsrc, "-") && (compress_program == NULL)) { + if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) { do { struct stat stat_buf; @@ -1539,7 +1539,9 @@ mt_sort(struct sort_list *list, const char* fn) { #if defined(SORT_THREADS) - if (nthreads < 2 || list->count < nthreads) { + if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) { + size_t nthreads_save = nthreads; + nthreads = 1; #endif /* if single thread or small data, do simple sort */ sort_func(list->list, list->count, @@ -1547,6 +1549,7 @@ mt_sort(struct sort_list *list, (int(*)(const void *, const void *)) list_coll); sort_list_dump(list, fn); #if defined(SORT_THREADS) + nthreads = nthreads_save; } else { /* multi-threaded sort */ struct sort_list **parts; @@ -1590,7 +1593,18 @@ mt_sort(struct sort_list *list, pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED); - pthread_create(&pth, &attr, mt_sort_thread, parts[i]); + for (;;) { + int res = pthread_create(&pth, &attr, + mt_sort_thread, parts[i]); + + if (res >= 0) + break; + if (errno == EAGAIN) { + pthread_yield(); + continue; + } + err(2, NULL); + } pthread_attr_destroy(&attr); } diff --git a/usr.bin/sort/file.h b/usr.bin/sort/file.h index fd2392221e5b..47c22992b74e 100644 --- a/usr.bin/sort/file.h +++ b/usr.bin/sort/file.h @@ -84,6 +84,9 @@ struct file0_reader extern unsigned long long free_memory; extern unsigned long long available_free_memory; +/* Are we using mmap ? */ +extern bool use_mmap; + /* temporary file dir */ extern const char *tmpdir; diff --git a/usr.bin/sort/radixsort.c b/usr.bin/sort/radixsort.c index 6fa2d3ae5a8d..ccaa99469286 100644 --- a/usr.bin/sort/radixsort.c +++ b/usr.bin/sort/radixsort.c @@ -609,7 +609,17 @@ run_top_sort_level(struct sort_level *sl) pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED); - pthread_create(&pth, &attr, sort_thread, NULL); + for (;;) { + int res = pthread_create(&pth, &attr, + sort_thread, NULL); + if (res >= 0) + break; + if (errno == EAGAIN) { + pthread_yield(); + continue; + } + err(2, NULL); + } pthread_attr_destroy(&attr); } @@ -626,6 +636,10 @@ run_sort(struct sort_list_item **base, size_t nmemb) struct sort_level *sl; #if defined(SORT_THREADS) + size_t nthreads_save = nthreads; + if (nmemb < MT_SORT_THRESHOLD) + nthreads = 1; + if (nthreads > 1) { pthread_mutexattr_t mattr; @@ -663,6 +677,7 @@ run_sort(struct sort_list_item **base, size_t nmemb) pthread_mutex_destroy(&g_ls_mutex); pthread_mutex_destroy(&sort_left_mutex); } + nthreads = nthreads_save; #endif } diff --git a/usr.bin/sort/sort.1.in b/usr.bin/sort/sort.1.in index e24f3538d4de..12f63ecc575e 100644 --- a/usr.bin/sort/sort.1.in +++ b/usr.bin/sort/sort.1.in @@ -33,7 +33,7 @@ .\" .\" @(#)sort.1 8.1 (Berkeley) 6/6/93 .\" -.Dd May 6, 2012 +.Dd May 25, 2012 .Dt SORT 1 .Os .Sh NAME @@ -358,6 +358,9 @@ This sort algorithm cannot be used with .Fl u and .Fl s . +.It Fl Fl mmap +Try to use file memory mapping system call. +It may increase speed in some cases. .El .Pp The following operands are available: diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c index f802ced04e01..dd0ed6828311 100644 --- a/usr.bin/sort/sort.c +++ b/usr.bin/sort/sort.c @@ -89,6 +89,7 @@ const char *nlsstr[] = { "", "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " "[-o outfile] [--batch-size size] [--files0-from file] " "[--heapsort] [--mergesort] [--radixsort] [--qsort] " + "[--mmap] " #if defined(SORT_THREADS) "[--nthreads thread_no] " #endif @@ -138,7 +139,8 @@ enum QSORT_OPT, MERGESORT_OPT, HEAPSORT_OPT, - RADIXSORT_OPT + RADIXSORT_OPT, + MMAP_OPT }; #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 @@ -164,6 +166,7 @@ struct option long_options[] = { { "key", required_argument, NULL, 'k' }, { "merge", no_argument, NULL, 'm' }, { "mergesort", no_argument, NULL, MERGESORT_OPT }, + { "mmap", no_argument, NULL, MMAP_OPT }, { "month-sort", no_argument, NULL, 'M' }, { "numeric-sort", no_argument, NULL, 'n' }, { "output", required_argument, NULL, 'o' }, @@ -1063,12 +1066,16 @@ main(int argc, char **argv) tmpdir = sort_strdup(optarg); break; case 't': - if (strlen(optarg) > 1) { - if (strcmp(optarg, "\\0")) { + while (strlen(optarg) > 1) { + if (optarg[0] != '\\') { errx(2, "%s: %s\n", strerror(EINVAL), optarg); } - *optarg = 0; + optarg += 1; + if (*optarg == '0') { + *optarg = 0; + break; + } } sort_opts_vals.tflag = true; sort_opts_vals.field_sep = btowc(optarg[0]); @@ -1126,6 +1133,9 @@ main(int argc, char **argv) case MERGESORT_OPT: sort_opts_vals.sort_method = SORT_MERGESORT; break; + case MMAP_OPT: + use_mmap = true; + break; case HEAPSORT_OPT: sort_opts_vals.sort_method = SORT_HEAPSORT; break; @@ -1258,6 +1268,11 @@ main(int argc, char **argv) } } +#if defined(SORT_THREADS) + if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) + nthreads = 1; +#endif + if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { struct file_list fl; struct sort_list list; diff --git a/usr.bin/sort/sort.h b/usr.bin/sort/sort.h index 9d4d1ed8f799..f6505c9b2a99 100644 --- a/usr.bin/sort/sort.h +++ b/usr.bin/sort/sort.h @@ -55,6 +55,7 @@ extern nl_catd catalog; extern const char *nlsstr[]; #if defined(SORT_THREADS) +#define MT_SORT_THRESHOLD (10000) extern size_t ncpu; extern size_t nthreads; #endif