Update random(6) to have the ability to randomize a file/stdin based

off of lines or words.  See the man page for details.

Reviewed by:	markm
MFC after:	3 days
This commit is contained in:
Sean Chittenden 2003-02-11 19:32:18 +00:00
parent 24cc1d58d5
commit a3b3a1cf6c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=110723
5 changed files with 389 additions and 20 deletions

View File

@ -3,5 +3,7 @@
PROG= random
MAN= random.6
SRCS= random.c randomize_fd.c
WARNS= 5
.include <bsd.prog.mk>

View File

@ -10,7 +10,7 @@
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" must display the following acknowledgment:
.\" This product includes software developed by the University of
.\" California, Berkeley and its contributors.
.\" 4. Neither the name of the University nor the names of its contributors
@ -32,7 +32,7 @@
.\" @(#)random.6 8.2 (Berkeley) 3/31/94
.\" $FreeBSD$
.\"
.Dd March 31, 1994
.Dd February 8, 2003
.Dt RANDOM 6
.Os
.Sh NAME
@ -41,14 +41,31 @@
.Sh SYNOPSIS
.Nm
.Op Fl er
.Op Fl f Ar filename
.Op Ar denominator
.Sh DESCRIPTION
.Nm Random
reads lines from the standard input and copies them to the standard
output with a probability of 1/denominator.
The default value for
has two distinct modes of operations. The default is to read in lines
from stdin and randomly write them out to stdout with a probability of
1 /
.Ar denominator .
The default
.Ar denominator
is 2.
for this mode of operation is 2, giving each line a 50/50 chance of
being displayed.
.Pp
The second mode of operation is to read in a file from
.Ar filename
and randomize the contents of the file and send it back out to stdout.
The contents can be randomized based off of newlines or based off of
space characters as determined by
.Xr isspace 3 .
The default
.Ar denominator
for this mode of operation is 1, which gives each line a chance to be
displayed, but in a
.Xr random 3
order.
.Pp
The options are as follows:
.Bl -tag -width Ds
@ -61,10 +78,46 @@ does not read or write anything, and simply exits with a random
exit value of 0 to
.Ar denominator
\&- 1, inclusive.
.It Fl f Ar filename
The
.Fl f
option is used to specify the
.Ar filename
to read from. stdin is used if the filename is set to "-".
.It Fl l
Randomize the input via newlines (the default).
.It Fl r
The
.Fl r
option guarantees that the output is unbuffered.
.It Fl u
Tells
.Xr random 6
not to select the same line or word from a file more than once (the
default). This does not guarantee uniqueness if there are two of the
same tokens from the input, but it does prevent selecting the same
token more than once.
.It Fl U
Tells
.Xr random 6
that it is okay for it to reuse any given line or word when creating a
randomized output.
.It Fl w
Randomize words separated by
.Xr isspace 3
instead of newlines.
.El
.Sh SEE ALSO
.Xr fortune 6
.Xr fortune 6 ,
.Xr random 3
.Sh BUGS
There is no index used when printing out tokens from the list which
makes rather slow for large files (10MB+). If this were used in
performance sensitive areas, I'd do something about it. For smaller
files, however, it should still be quite fast and efficient.
.Sh HISTORY
Original
.Xr random 6
game was brought in from BSD 4.4 Lite by jkh in 1994. The
functionality to randomizing lines and words was added in 2003 by
seanc.

View File

@ -52,33 +52,60 @@ static const char rcsid[] =
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "randomize_fd.h"
void usage(void);
int
main(argc, argv)
int argc;
char *argv[];
main(int argc, char **argv)
{
double denom;
int ch, random_exit, selected, unbuffer_output;
char *ep;
int ch, fd, random_exit, randomize_lines, random_type, ret,
selected, unique_output, unbuffer_output;
char *ep, *filename;
random_exit = unbuffer_output = 0;
denom = 0;
while ((ch = getopt(argc, argv, "er")) != -1)
filename = NULL;
random_type = RANDOM_TYPE_UNSET;
random_exit = randomize_lines = random_type = unbuffer_output = 0;
unique_output = 1;
while ((ch = getopt(argc, argv, "ef:hlruUw")) != -1)
switch (ch) {
case 'e':
random_exit = 1;
break;
case 'f':
randomize_lines = 1;
if (!strcmp(optarg, "-"))
filename = strdup("/dev/fd/0");
else
filename = optarg;
break;
case 'l':
randomize_lines = 1;
random_type = RANDOM_TYPE_LINES;
break;
case 'r':
unbuffer_output = 1;
break;
case 'u':
randomize_lines = 1;
unique_output = 1;
break;
case 'U':
randomize_lines = 1;
unique_output = 0;
break;
case 'w':
randomize_lines = 1;
random_type = RANDOM_TYPE_WORDS;
break;
default:
case '?':
usage();
@ -90,7 +117,7 @@ main(argc, argv)
switch (argc) {
case 0:
denom = 2;
denom = (randomize_lines ? 1 : 2);
break;
case 1:
errno = 0;
@ -109,10 +136,6 @@ main(argc, argv)
srandomdev();
/* Compute a random exit status between 0 and denom - 1. */
if (random_exit)
return ((denom * random()) / LONG_MAX);
/*
* Act as a filter, randomly choosing lines of the standard input
* to write to the standard output.
@ -120,6 +143,22 @@ main(argc, argv)
if (unbuffer_output)
setbuf(stdout, NULL);
/*
* Act as a filter, randomizing lines read in from a given file
* descriptor and write the output to standard output.
*/
if (randomize_lines) {
if ((fd = open(filename, O_RDONLY, 0)) < 0)
err(1, "%s", optarg);
ret = randomize_fd(fd, random_type, unique_output, denom);
if (!random_exit)
return(ret);
}
/* Compute a random exit status between 0 and denom - 1. */
if (random_exit)
return ((denom * random()) / LONG_MAX);
/*
* Select whether to print the first line. (Prime the pump.)
* We find a random number between 0 and denom - 1 and, if it's
@ -148,6 +187,6 @@ void
usage()
{
(void)fprintf(stderr, "usage: random [-er] [denominator]\n");
(void)fprintf(stderr, "usage: random [-elruUw] [-f filename] [denominator]\n");
exit(1);
}

219
games/random/randomize_fd.c Normal file
View File

@ -0,0 +1,219 @@
/*
* Copyright (C) 2003 Sean Chittenden <seanc@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "randomize_fd.h"
struct rand_node *rand_root;
struct rand_node *rand_tail;
static
struct rand_node *rand_node_allocate(void)
{
struct rand_node *n;
n = (struct rand_node *)malloc(sizeof(struct rand_node));
if (n == NULL)
err(1, "malloc");
n->len = 0;
n->cp = NULL;
n->next = NULL;
return(n);
}
static
void rand_node_free(struct rand_node *n)
{
if (n != NULL) {
if (n->cp != NULL)
free(n->cp);
free(n);
}
}
static
void rand_node_free_rec(struct rand_node *n)
{
if (n != NULL) {
if (n->next != NULL)
rand_node_free_rec(n->next);
rand_node_free(n);
}
}
static
struct rand_node *rand_node_append(struct rand_node *n)
{
if (rand_root == NULL) {
rand_root = rand_tail = n;
return(n);
} else {
rand_tail->next = n;
rand_tail = n;
return(n);
}
}
int randomize_fd(int fd, int type, int unique, double denom)
{
u_char *buf, *p;
u_int numnode, j, selected, slen;
struct rand_node *n, *prev;
int bufc, bufleft, buflen, eof, fndstr, i, len, ret;
rand_root = rand_tail = NULL;
bufc = bufleft = eof = fndstr = numnode = 0;
if (type == RANDOM_TYPE_UNSET)
type = RANDOM_TYPE_LINES;
buflen = sizeof(u_char) * MAXBSIZE;
buf = (u_char *)malloc(buflen);
if (buf == NULL)
err(1, "malloc");
while (!eof) {
/* Check to see if we have bits in the buffer */
if (bufleft == 0) {
len = read(fd, buf, buflen);
if (len == -1)
err(1, "read");
else if (len == 0)
break;
else if (len < buflen) {
buflen = len;
eof++;
}
bufleft = len;
}
/* Look for a newline */
for (i = bufc; i <= buflen; i++, bufleft--) {
if (i == buflen) {
if (fndstr) {
if (!eof) {
memmove(buf, &buf[bufc], i - bufc);
i = i - bufc;
bufc = 0;
len = read(fd, &buf[i], buflen - i);
if (len == -1)
err(1, "read");
else if (len == 0) {
eof++;
break;
} else if (len < buflen -i )
buflen = i + len;
bufleft = len;
fndstr = 0;
}
} else {
p = (u_char *)realloc(buf, buflen * 2);
if (p == NULL)
err(1, "realloc");
buf = p;
if (!eof) {
len = read(fd, &buf[i], buflen);
if (len == -1)
err(1, "read");
else if (len == 0) {
eof++;
break;
} else if (len < buflen -i )
buflen = len;
bufleft = len;
}
buflen *= 2;
}
}
if ((type == RANDOM_TYPE_LINES && buf[i] == '\n') ||
(type == RANDOM_TYPE_WORDS && isspace((int)buf[i])) ||
(eof && i == buflen - 1)) {
n = rand_node_allocate();
slen = i - bufc;
n->len = slen + 2;
n->cp = (u_char *)malloc(slen + 2);
if (n->cp == NULL)
err(1, "malloc");
memmove(n->cp, &buf[bufc], slen);
n->cp[slen] = buf[i];
n->cp[slen + 1] = '\0';
bufc = i + 1;
fndstr = 1;
rand_node_append(n);
numnode++;
}
}
}
(void)close(fd);
for (i = numnode; i > 0; i--) {
selected = ((int)denom * random())/(((double)RAND_MAX + 1) / numnode);
for (j = 0, prev = n = rand_root; n != NULL; j++, prev = n, n = n->next) {
if (j == selected) {
ret = printf("%.*s", n->len - 1, n->cp);
if (ret < 0)
err(1, "printf");
if (unique) {
if (n == rand_root)
rand_root = n->next;
if (n == rand_tail)
rand_tail = prev;
prev->next = n->next;
rand_node_free(n);
numnode--;
break;
}
}
}
}
fflush(stdout);
if (!unique)
rand_node_free_rec(rand_root);
return(0);
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (C) 2003 Sean Chittenden <seanc@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef __RANDOMIZE_FD__
#define __RANDOMIZE_FD__
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define RANDOM_TYPE_UNSET 0
#define RANDOM_TYPE_LINES 1
#define RANDOM_TYPE_WORDS 2
/* The multiple instance single integer key */
struct rand_node {
u_char *cp;
u_int len;
struct rand_node *next;
};
int randomize_fd(int fd, int type, int unique, double denom);
#endif