From 7660a41785dca8b2e0213b9cace03d4d370bece2 Mon Sep 17 00:00:00 2001 From: cem Date: Sat, 15 Feb 2020 19:13:37 +0000 Subject: [PATCH] random(6): Re-add undocumented support for floating point denominators And document it in the manual page. PR: 244139 Submitted by: Keith White (earlier version) --- usr.bin/random/random.6 | 60 +++++++++++++++++------------------ usr.bin/random/random.c | 45 ++++++++++++++------------ usr.bin/random/randomize_fd.c | 3 +- usr.bin/random/randomize_fd.h | 36 +++++++++++++++++++++ 4 files changed, 91 insertions(+), 53 deletions(-) diff --git a/usr.bin/random/random.6 b/usr.bin/random/random.6 index 75fa7a378d3d..30b121ebbfa1 100644 --- a/usr.bin/random/random.6 +++ b/usr.bin/random/random.6 @@ -28,7 +28,7 @@ .\" @(#)random.6 8.2 (Berkeley) 3/31/94 .\" $FreeBSD$ .\" -.Dd December 12, 2019 +.Dd February 15, 2020 .Dt RANDOM 6 .Os .Sh NAME @@ -42,27 +42,26 @@ .Sh DESCRIPTION .Nm Random has two distinct modes of operations. -The default is to read in lines -from the standard input and randomly write them out -to the standard output with a probability of -1 / +The default is to read lines from standard input and write them to standard +output with a probability of 1.0 / .Ar denominator . +.Ar ( denominator +is a real number greater than or equal to 1.0.) The default .Ar denominator -for this mode of operation is 2, giving each line a 50/50 chance of +for this mode of operation is 2.0, giving each line a 50% chance of being displayed. .Pp -The second mode of operation is to read in a file from -.Ar filename -and randomize the contents of the file and send it back out to +The second mode of operation, selected with the +.Fl f Ar filename +option, reads the specified file and outputs the randomized contents to standard output. -The contents can be randomized based off of newlines or based off of -space characters as determined by -.Xr isspace 3 . +The contents can be randomized in units of lines (split on newline characters) +or in units of words (split on space characters as determined by +.Xr isspace 3 . ) The default .Ar denominator -for this mode of operation is 1, which gives each line a chance to be -displayed, but in a random order. +for this mode of operation is 1.0, which displays every line. .Pp The options are as follows: .Bl -tag -width Ds @@ -75,6 +74,9 @@ does not read or write anything, and simply exits with a random exit value of 0 to .Ar denominator \&- 1, inclusive. +In this mode, +.Ar denominator +must be less than or equal to 256. .It Fl f Ar filename The .Fl f @@ -83,27 +85,18 @@ option is used to specify the to read from. Standard input is used if .Ar filename -is set to -.Sq Fl . +is +.Sq - . .It Fl l Randomize the input via newlines (the default). .It Fl r -The -.Fl r -option guarantees that the output is unbuffered. +Do not buffer output. .It Fl U -Tells -.Xr random 6 -that it is okay for it to reuse any given line or word when creating a -randomized output. +Reuse any given line or word when creating a randomized output. .It Fl u -Tells -.Xr random 6 -not to select the same line or word from a file more than once (the -default). +Do not select the same line or word from a file more than once (the default). This does not guarantee uniqueness if there are two of the -same tokens from the input, but it does prevent selecting the same -token more than once. +same tokens in the input. .It Fl w Randomize words separated by .Xr isspace 3 @@ -116,7 +109,12 @@ The functionality to randomizing lines and words was added in 2003 by .An Sean Chittenden Aq Mt seanc@FreeBSD.org . .Sh BUGS +This tool is a remnant of the "games" collection formerly part of +.Fx +base. +It probably should have been removed to ports with the rest of that collection. +It does not have a coherent purpose and the motivation for it to be a core base +utility is nonobvious. +.Pp No index is used when printing out tokens from the list which makes it rather slow for large files (10MB+). -For smaller -files, however, it should still be quite fast and efficient. diff --git a/usr.bin/random/random.c b/usr.bin/random/random.c index 9f78a2711cc9..b3b349cbb75a 100644 --- a/usr.bin/random/random.c +++ b/usr.bin/random/random.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -67,11 +68,12 @@ main(int argc, char *argv[]) { double denom; int ch, fd, random_exit, randomize_lines, random_type, ret, - selected, unique_output, unbuffer_output; + unique_output, unbuffer_output; + bool selected; char *ep; const char *filename; - denom = 0; + denom = 0.; filename = "/dev/fd/0"; random_type = RANDOM_TYPE_UNSET; random_exit = randomize_lines = unbuffer_output = 0; @@ -119,16 +121,16 @@ main(int argc, char *argv[]) switch (argc) { case 0: - denom = (randomize_lines ? 1 : 2); + denom = (randomize_lines ? 1. : 2.); break; case 1: errno = 0; denom = strtod(*argv, &ep); if (errno == ERANGE) err(1, "%s", *argv); - if (denom <= 0 || *ep != '\0') + if (denom < 1. || *ep != '\0') errx(1, "denominator is not valid."); - if (random_exit && denom > 256) + if (random_exit && denom > 256.) errx(1, "denominator must be <= 256 for random exit."); break; default: @@ -160,24 +162,25 @@ main(int argc, char *argv[]) return (arc4random_uniform(denom)); /* - * Select whether to print the first line. (Prime the pump.) - * We find a random number between 0 and denom - 1 and, if it's - * 0 (which has a 1 / denom chance of being true), we select the - * line. + * Filter stdin, selecting lines with probability 1/denom, one + * character at a time. */ - selected = (arc4random_uniform(denom) == 0); - while ((ch = getchar()) != EOF) { - if (selected) - (void)putchar(ch); - if (ch == '\n') { - /* End of that line. See if we got an error. */ - if (ferror(stdout)) - err(2, "stdout"); - - /* Now see if the next line is to be printed. */ - selected = (arc4random_uniform(denom) == 0); + do { + selected = random_uniform_denom(denom); + if (selected) { + while ((ch = getchar()) != EOF) { + putchar(ch); + if (ch == '\n') + break; + } + } else { + while ((ch = getchar()) != EOF) + if (ch == '\n') + break; } - } + if (ferror(stdout)) + err(2, "stdout"); + } while (ch != EOF); if (ferror(stdin)) err(2, "stdin"); exit (0); diff --git a/usr.bin/random/randomize_fd.c b/usr.bin/random/randomize_fd.c index 21075f301dd4..a60bb0c15e68 100644 --- a/usr.bin/random/randomize_fd.c +++ b/usr.bin/random/randomize_fd.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -218,7 +219,7 @@ randomize_fd(int fd, int type, int unique, double denom) if (n->cp == NULL) break; - if (arc4random_uniform(denom) == 0) { + if (random_uniform_denom(denom)) { ret = printf("%.*s", (int)n->len - 1, n->cp); if (ret < 0) diff --git a/usr.bin/random/randomize_fd.h b/usr.bin/random/randomize_fd.h index 5b50e6265504..01fa9d6c117e 100644 --- a/usr.bin/random/randomize_fd.h +++ b/usr.bin/random/randomize_fd.h @@ -42,4 +42,40 @@ struct rand_node { int randomize_fd(int fd, int type, int unique, double denom); +/* + * Generates a random number uniformly in the range [0.0, 1.0). + */ +static inline double +random_unit_float(void) +{ + static const uint64_t denom = (1ull << 53); + static const uint64_t mask = denom - 1; + + uint64_t rand64; + + /* + * arc4random_buf(...) in this use generates integer outputs in [0, + * UINT64_MAX]. + * + * The double mantissa only has 53 bits, so we uniformly mask off the + * high 11 bits and then floating-point divide by 2^53 to achieve a + * result in [0, 1). + * + * We are not allowed to emit 1.0, so denom must be one greater than + * the possible range of the preceeding step. + */ + arc4random_buf(&rand64, sizeof(rand64)); + rand64 &= mask; + return ((double)rand64 / denom); +} + +/* + * Returns true with probability 1 / denom (a floating point number >= 1). + * Otherwise, returns false. + */ +static inline bool +random_uniform_denom(double denom) +{ + return ((uint64_t)(denom * random_unit_float()) == 0); +} #endif