random(6): Re-add undocumented support for floating point denominators

And document it in the manual page.

PR:		244139
Submitted by:	Keith White <kwhite AT site.uottawa.ca> (earlier version)
This commit is contained in:
cem 2020-02-15 19:13:37 +00:00
parent f6abce8e79
commit 7660a41785
4 changed files with 91 additions and 53 deletions

View File

@ -28,7 +28,7 @@
.\" @(#)random.6 8.2 (Berkeley) 3/31/94 .\" @(#)random.6 8.2 (Berkeley) 3/31/94
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd December 12, 2019 .Dd February 15, 2020
.Dt RANDOM 6 .Dt RANDOM 6
.Os .Os
.Sh NAME .Sh NAME
@ -42,27 +42,26 @@
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm Random .Nm Random
has two distinct modes of operations. has two distinct modes of operations.
The default is to read in lines The default is to read lines from standard input and write them to standard
from the standard input and randomly write them out output with a probability of 1.0 /
to the standard output with a probability of
1 /
.Ar denominator . .Ar denominator .
.Ar ( denominator
is a real number greater than or equal to 1.0.)
The default The default
.Ar denominator .Ar denominator
for this mode of operation is 2, giving each line a 50/50 chance of for this mode of operation is 2.0, giving each line a 50% chance of
being displayed. being displayed.
.Pp .Pp
The second mode of operation is to read in a file from The second mode of operation, selected with the
.Ar filename .Fl f Ar filename
and randomize the contents of the file and send it back out to option, reads the specified file and outputs the randomized contents to
standard output. standard output.
The contents can be randomized based off of newlines or based off of The contents can be randomized in units of lines (split on newline characters)
space characters as determined by or in units of words (split on space characters as determined by
.Xr isspace 3 . .Xr isspace 3 . )
The default The default
.Ar denominator .Ar denominator
for this mode of operation is 1, which gives each line a chance to be for this mode of operation is 1.0, which displays every line.
displayed, but in a random order.
.Pp .Pp
The options are as follows: The options are as follows:
.Bl -tag -width Ds .Bl -tag -width Ds
@ -75,6 +74,9 @@ does not read or write anything, and simply exits with a random
exit value of 0 to exit value of 0 to
.Ar denominator .Ar denominator
\&- 1, inclusive. \&- 1, inclusive.
In this mode,
.Ar denominator
must be less than or equal to 256.
.It Fl f Ar filename .It Fl f Ar filename
The The
.Fl f .Fl f
@ -83,27 +85,18 @@ option is used to specify the
to read from. to read from.
Standard input is used if Standard input is used if
.Ar filename .Ar filename
is set to is
.Sq Fl . .Sq - .
.It Fl l .It Fl l
Randomize the input via newlines (the default). Randomize the input via newlines (the default).
.It Fl r .It Fl r
The Do not buffer output.
.Fl r
option guarantees that the output is unbuffered.
.It Fl U .It Fl U
Tells Reuse any given line or word when creating a randomized output.
.Xr random 6
that it is okay for it to reuse any given line or word when creating a
randomized output.
.It Fl u .It Fl u
Tells Do not select the same line or word from a file more than once (the default).
.Xr random 6
not to select the same line or word from a file more than once (the
default).
This does not guarantee uniqueness if there are two of the This does not guarantee uniqueness if there are two of the
same tokens from the input, but it does prevent selecting the same same tokens in the input.
token more than once.
.It Fl w .It Fl w
Randomize words separated by Randomize words separated by
.Xr isspace 3 .Xr isspace 3
@ -116,7 +109,12 @@ The
functionality to randomizing lines and words was added in 2003 by functionality to randomizing lines and words was added in 2003 by
.An Sean Chittenden Aq Mt seanc@FreeBSD.org . .An Sean Chittenden Aq Mt seanc@FreeBSD.org .
.Sh BUGS .Sh BUGS
This tool is a remnant of the "games" collection formerly part of
.Fx
base.
It probably should have been removed to ports with the rest of that collection.
It does not have a coherent purpose and the motivation for it to be a core base
utility is nonobvious.
.Pp
No index is used when printing out tokens from the list which No index is used when printing out tokens from the list which
makes it rather slow for large files (10MB+). makes it rather slow for large files (10MB+).
For smaller
files, however, it should still be quite fast and efficient.

View File

@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <fcntl.h> #include <fcntl.h>
#include <limits.h> #include <limits.h>
#include <locale.h> #include <locale.h>
#include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -67,11 +68,12 @@ main(int argc, char *argv[])
{ {
double denom; double denom;
int ch, fd, random_exit, randomize_lines, random_type, ret, int ch, fd, random_exit, randomize_lines, random_type, ret,
selected, unique_output, unbuffer_output; unique_output, unbuffer_output;
bool selected;
char *ep; char *ep;
const char *filename; const char *filename;
denom = 0; denom = 0.;
filename = "/dev/fd/0"; filename = "/dev/fd/0";
random_type = RANDOM_TYPE_UNSET; random_type = RANDOM_TYPE_UNSET;
random_exit = randomize_lines = unbuffer_output = 0; random_exit = randomize_lines = unbuffer_output = 0;
@ -119,16 +121,16 @@ main(int argc, char *argv[])
switch (argc) { switch (argc) {
case 0: case 0:
denom = (randomize_lines ? 1 : 2); denom = (randomize_lines ? 1. : 2.);
break; break;
case 1: case 1:
errno = 0; errno = 0;
denom = strtod(*argv, &ep); denom = strtod(*argv, &ep);
if (errno == ERANGE) if (errno == ERANGE)
err(1, "%s", *argv); err(1, "%s", *argv);
if (denom <= 0 || *ep != '\0') if (denom < 1. || *ep != '\0')
errx(1, "denominator is not valid."); errx(1, "denominator is not valid.");
if (random_exit && denom > 256) if (random_exit && denom > 256.)
errx(1, "denominator must be <= 256 for random exit."); errx(1, "denominator must be <= 256 for random exit.");
break; break;
default: default:
@ -160,24 +162,25 @@ main(int argc, char *argv[])
return (arc4random_uniform(denom)); return (arc4random_uniform(denom));
/* /*
* Select whether to print the first line. (Prime the pump.) * Filter stdin, selecting lines with probability 1/denom, one
* We find a random number between 0 and denom - 1 and, if it's * character at a time.
* 0 (which has a 1 / denom chance of being true), we select the
* line.
*/ */
selected = (arc4random_uniform(denom) == 0); do {
while ((ch = getchar()) != EOF) { selected = random_uniform_denom(denom);
if (selected) if (selected) {
(void)putchar(ch); while ((ch = getchar()) != EOF) {
if (ch == '\n') { putchar(ch);
/* End of that line. See if we got an error. */ if (ch == '\n')
if (ferror(stdout)) break;
err(2, "stdout"); }
} else {
/* Now see if the next line is to be printed. */ while ((ch = getchar()) != EOF)
selected = (arc4random_uniform(denom) == 0); if (ch == '\n')
break;
} }
} if (ferror(stdout))
err(2, "stdout");
} while (ch != EOF);
if (ferror(stdin)) if (ferror(stdin))
err(2, "stdin"); err(2, "stdin");
exit (0); exit (0);

View File

@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <ctype.h> #include <ctype.h>
#include <err.h> #include <err.h>
#include <errno.h> #include <errno.h>
#include <stdbool.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
@ -218,7 +219,7 @@ randomize_fd(int fd, int type, int unique, double denom)
if (n->cp == NULL) if (n->cp == NULL)
break; break;
if (arc4random_uniform(denom) == 0) { if (random_uniform_denom(denom)) {
ret = printf("%.*s", ret = printf("%.*s",
(int)n->len - 1, n->cp); (int)n->len - 1, n->cp);
if (ret < 0) if (ret < 0)

View File

@ -42,4 +42,40 @@ struct rand_node {
int randomize_fd(int fd, int type, int unique, double denom); int randomize_fd(int fd, int type, int unique, double denom);
/*
* Generates a random number uniformly in the range [0.0, 1.0).
*/
static inline double
random_unit_float(void)
{
static const uint64_t denom = (1ull << 53);
static const uint64_t mask = denom - 1;
uint64_t rand64;
/*
* arc4random_buf(...) in this use generates integer outputs in [0,
* UINT64_MAX].
*
* The double mantissa only has 53 bits, so we uniformly mask off the
* high 11 bits and then floating-point divide by 2^53 to achieve a
* result in [0, 1).
*
* We are not allowed to emit 1.0, so denom must be one greater than
* the possible range of the preceeding step.
*/
arc4random_buf(&rand64, sizeof(rand64));
rand64 &= mask;
return ((double)rand64 / denom);
}
/*
* Returns true with probability 1 / denom (a floating point number >= 1).
* Otherwise, returns false.
*/
static inline bool
random_uniform_denom(double denom)
{
return ((uint64_t)(denom * random_unit_float()) == 0);
}
#endif #endif