Improve input parsing:

Add "-C <column>" and "-d <delims>" options to chop up input lines.

Make '#' a comment character, rest of line is ignored.

Submitted by: Dmitry Morozovsky <marck@rinet.ru>
This commit is contained in:
phk 2006-08-28 08:27:02 +00:00
parent 995c93345f
commit 40d4b9bdbe
2 changed files with 72 additions and 28 deletions

View File

@ -466,13 +466,14 @@ DumpPlot(void)
static struct dataset *
ReadSet(char *n)
ReadSet(char *n, int column, char *delim)
{
FILE *f;
char buf[BUFSIZ], *p;
char buf[BUFSIZ], *p, *t;
struct dataset *s;
double d;
int line;
int i;
if (n == NULL) {
f = stdin;
@ -490,15 +491,20 @@ ReadSet(char *n)
line = 0;
while (fgets(buf, sizeof buf, f) != NULL) {
line++;
p = strchr(buf, '#');
if (p != NULL)
*p = '\0';
p = buf + strlen(buf) - 1;
while (p >= buf && isspace(*p)) {
*p = '\0';
p--;
i = strlen(buf);
if (buf[i-1] == '\n')
buf[i-1] = '\0';
for (i = 1, t = strtok(buf, delim);
t != NULL && *t != '#';
i++, t = strtok(NULL, delim)) {
if (i == column)
break;
}
d = strtod(buf, &p);
if (t == NULL || *t == '#')
continue;
d = strtod(t, &p);
if (p != NULL && *p != '\0')
err(2, "Invalid data on line %d in %s\n", line, n);
if (*buf != '\0')
@ -520,7 +526,7 @@ usage(char const *whine)
fprintf(stderr, "%s\n", whine);
fprintf(stderr,
"Usage: ministat [ -c confidence ] [-ns] [-w width] [file [file ...]]\n");
"Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-ns] [-w width] [file [file ...]]\n");
fprintf(stderr, "\tconfidence = {");
for (i = 0; i < NCONF; i++) {
fprintf(stderr, "%s%g%%",
@ -528,6 +534,8 @@ usage(char const *whine)
studentpct[i]);
}
fprintf(stderr, "}\n");
fprintf(stderr, "\t-C : column number to extract (starts and defaults to 1)\n");
fprintf(stderr, "\t-d : delimiter(s) string, default to \" \\t\"\n");
fprintf(stderr, "\t-n : print summary statistics only, no graph/test\n");
fprintf(stderr, "\t-s : print avg/median/stddev bars on separate lines\n");
fprintf(stderr, "\t-w : width of graph/test output (default 74 or terminal width)\n");
@ -540,8 +548,10 @@ main(int argc, char **argv)
struct dataset *ds[7];
int nds;
double a;
char *delim = " \t";
char *p;
int c, i, ci;
int column = 1;
int flag_s = 0;
int flag_n = 0;
int termwidth = 74;
@ -557,8 +567,15 @@ main(int argc, char **argv)
}
ci = -1;
while ((c = getopt(argc, argv, "c:snw:")) != -1)
while ((c = getopt(argc, argv, "C:c:d:snw:")) != -1)
switch (c) {
case 'C':
column = strtol(optarg, &p, 10);
if (p != NULL && *p != '\0')
usage("Invalid column number.");
if (column <= 0)
usage("Column number should be positive.");
break;
case 'c':
a = strtod(optarg, &p);
if (p != NULL && *p != '\0')
@ -569,6 +586,11 @@ main(int argc, char **argv)
if (ci == -1)
usage("No support for confidence level");
break;
case 'd':
if (*optarg == '\0')
usage("Can't use empty delimiter string");
delim = optarg;
break;
case 'n':
flag_n = 1;
break;
@ -592,14 +614,14 @@ main(int argc, char **argv)
argv += optind;
if (argc == 0) {
ds[0] = ReadSet("-");
ds[0] = ReadSet("-", column, delim);
nds = 1;
} else {
if (argc > (MAX_DS - 1))
usage("Too many datasets.");
nds = argc;
for (i = 0; i < nds; i++)
ds[i] = ReadSet(argv[i]);
ds[i] = ReadSet(argv[i], column, delim);
}
for (i = 0; i < nds; i++)

View File

@ -466,13 +466,14 @@ DumpPlot(void)
static struct dataset *
ReadSet(char *n)
ReadSet(char *n, int column, char *delim)
{
FILE *f;
char buf[BUFSIZ], *p;
char buf[BUFSIZ], *p, *t;
struct dataset *s;
double d;
int line;
int i;
if (n == NULL) {
f = stdin;
@ -490,15 +491,20 @@ ReadSet(char *n)
line = 0;
while (fgets(buf, sizeof buf, f) != NULL) {
line++;
p = strchr(buf, '#');
if (p != NULL)
*p = '\0';
p = buf + strlen(buf) - 1;
while (p >= buf && isspace(*p)) {
*p = '\0';
p--;
i = strlen(buf);
if (buf[i-1] == '\n')
buf[i-1] = '\0';
for (i = 1, t = strtok(buf, delim);
t != NULL && *t != '#';
i++, t = strtok(NULL, delim)) {
if (i == column)
break;
}
d = strtod(buf, &p);
if (t == NULL || *t == '#')
continue;
d = strtod(t, &p);
if (p != NULL && *p != '\0')
err(2, "Invalid data on line %d in %s\n", line, n);
if (*buf != '\0')
@ -520,7 +526,7 @@ usage(char const *whine)
fprintf(stderr, "%s\n", whine);
fprintf(stderr,
"Usage: ministat [ -c confidence ] [-ns] [-w width] [file [file ...]]\n");
"Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-ns] [-w width] [file [file ...]]\n");
fprintf(stderr, "\tconfidence = {");
for (i = 0; i < NCONF; i++) {
fprintf(stderr, "%s%g%%",
@ -528,6 +534,8 @@ usage(char const *whine)
studentpct[i]);
}
fprintf(stderr, "}\n");
fprintf(stderr, "\t-C : column number to extract (starts and defaults to 1)\n");
fprintf(stderr, "\t-d : delimiter(s) string, default to \" \\t\"\n");
fprintf(stderr, "\t-n : print summary statistics only, no graph/test\n");
fprintf(stderr, "\t-s : print avg/median/stddev bars on separate lines\n");
fprintf(stderr, "\t-w : width of graph/test output (default 74 or terminal width)\n");
@ -540,8 +548,10 @@ main(int argc, char **argv)
struct dataset *ds[7];
int nds;
double a;
char *delim = " \t";
char *p;
int c, i, ci;
int column = 1;
int flag_s = 0;
int flag_n = 0;
int termwidth = 74;
@ -557,8 +567,15 @@ main(int argc, char **argv)
}
ci = -1;
while ((c = getopt(argc, argv, "c:snw:")) != -1)
while ((c = getopt(argc, argv, "C:c:d:snw:")) != -1)
switch (c) {
case 'C':
column = strtol(optarg, &p, 10);
if (p != NULL && *p != '\0')
usage("Invalid column number.");
if (column <= 0)
usage("Column number should be positive.");
break;
case 'c':
a = strtod(optarg, &p);
if (p != NULL && *p != '\0')
@ -569,6 +586,11 @@ main(int argc, char **argv)
if (ci == -1)
usage("No support for confidence level");
break;
case 'd':
if (*optarg == '\0')
usage("Can't use empty delimiter string");
delim = optarg;
break;
case 'n':
flag_n = 1;
break;
@ -592,14 +614,14 @@ main(int argc, char **argv)
argv += optind;
if (argc == 0) {
ds[0] = ReadSet("-");
ds[0] = ReadSet("-", column, delim);
nds = 1;
} else {
if (argc > (MAX_DS - 1))
usage("Too many datasets.");
nds = argc;
for (i = 0; i < nds; i++)
ds[i] = ReadSet(argv[i]);
ds[i] = ReadSet(argv[i], column, delim);
}
for (i = 0; i < nds; i++)