Implement the -c option correctly in locales with multibyte characters
instead of treating it as a synonym for -b.
This commit is contained in:
parent
39bd8f0099
commit
364d0a915c
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=131194
@ -35,7 +35,7 @@
|
||||
.\" @(#)cut.1 8.1 (Berkeley) 6/6/93
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd June 6, 1993
|
||||
.Dd June 28, 2004
|
||||
.Dt CUT 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -122,11 +122,9 @@ The
|
||||
.Ev LANG , LC_ALL
|
||||
and
|
||||
.Ev LC_CTYPE
|
||||
environment variables affect the execution of
|
||||
environment variables affect the execution of the
|
||||
.Nm
|
||||
if the
|
||||
.Fl n
|
||||
option is specified.
|
||||
utility.
|
||||
Their effect is described in
|
||||
.Xr environ 7 .
|
||||
.Sh EXAMPLES
|
||||
@ -158,13 +156,6 @@ command appeared in
|
||||
System III
|
||||
.Ux .
|
||||
.Sh BUGS
|
||||
The
|
||||
.Fl c
|
||||
option is a synonym for the
|
||||
.Fl b
|
||||
option, which causes incorrect behaviour in locales that support
|
||||
multibyte characters.
|
||||
.Pp
|
||||
When operating on fields
|
||||
.Fl ( f
|
||||
option is specified),
|
||||
|
@ -61,9 +61,10 @@ int fflag;
|
||||
int nflag;
|
||||
int sflag;
|
||||
|
||||
void b_n_cut(FILE *, const char *);
|
||||
void c_cut(FILE *, const char *);
|
||||
void f_cut(FILE *, const char *);
|
||||
int b_cut(FILE *, const char *);
|
||||
int b_n_cut(FILE *, const char *);
|
||||
int c_cut(FILE *, const char *);
|
||||
int f_cut(FILE *, const char *);
|
||||
void get_list(char *);
|
||||
void needpos(size_t);
|
||||
static void usage(void);
|
||||
@ -72,7 +73,7 @@ int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
FILE *fp;
|
||||
void (*fcn)(FILE *, const char *);
|
||||
int (*fcn)(FILE *, const char *);
|
||||
int ch, rval;
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
@ -80,19 +81,13 @@ main(int argc, char *argv[])
|
||||
fcn = NULL;
|
||||
dchar = '\t'; /* default delimiter is \t */
|
||||
|
||||
/*
|
||||
* Since we don't support multi-byte characters, the -c and -b
|
||||
* options are equivalent.
|
||||
*/
|
||||
while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
|
||||
switch(ch) {
|
||||
case 'b':
|
||||
fcn = c_cut;
|
||||
get_list(optarg);
|
||||
bflag = 1;
|
||||
break;
|
||||
case 'c':
|
||||
fcn = c_cut;
|
||||
get_list(optarg);
|
||||
cflag = 1;
|
||||
break;
|
||||
@ -102,7 +97,6 @@ main(int argc, char *argv[])
|
||||
break;
|
||||
case 'f':
|
||||
get_list(optarg);
|
||||
fcn = f_cut;
|
||||
fflag = 1;
|
||||
break;
|
||||
case 's':
|
||||
@ -126,14 +120,18 @@ main(int argc, char *argv[])
|
||||
else if (!bflag && nflag)
|
||||
usage();
|
||||
|
||||
if (nflag)
|
||||
fcn = b_n_cut;
|
||||
if (fflag)
|
||||
fcn = f_cut;
|
||||
else if (cflag)
|
||||
fcn = MB_CUR_MAX > 1 ? c_cut : b_cut;
|
||||
else if (bflag)
|
||||
fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut;
|
||||
|
||||
rval = 0;
|
||||
if (*argv)
|
||||
for (; *argv; ++argv) {
|
||||
if (strcmp(*argv, "-") == 0)
|
||||
fcn(stdin, "stdin");
|
||||
rval |= fcn(stdin, "stdin");
|
||||
else {
|
||||
if (!(fp = fopen(*argv, "r"))) {
|
||||
warn("%s", *argv);
|
||||
@ -145,7 +143,7 @@ main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
else
|
||||
fcn(stdin, "stdin");
|
||||
rval = fcn(stdin, "stdin");
|
||||
exit(rval);
|
||||
}
|
||||
|
||||
@ -229,12 +227,41 @@ needpos(size_t n)
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
b_cut(FILE *fp, const char *fname)
|
||||
{
|
||||
int ch, col;
|
||||
char *pos;
|
||||
|
||||
ch = 0;
|
||||
for (;;) {
|
||||
pos = positions + 1;
|
||||
for (col = maxval; col; --col) {
|
||||
if ((ch = getc(fp)) == EOF)
|
||||
return (0);
|
||||
if (ch == '\n')
|
||||
break;
|
||||
if (*pos++)
|
||||
(void)putchar(ch);
|
||||
}
|
||||
if (ch != '\n') {
|
||||
if (autostop)
|
||||
while ((ch = getc(fp)) != EOF && ch != '\n')
|
||||
(void)putchar(ch);
|
||||
else
|
||||
while ((ch = getc(fp)) != EOF && ch != '\n');
|
||||
}
|
||||
(void)putchar('\n');
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cut based on byte positions, taking care not to split multibyte characters.
|
||||
* Although this function also handles the case where -n is not specified,
|
||||
* c_cut() ought to be much faster.
|
||||
* b_cut() ought to be much faster.
|
||||
*/
|
||||
void
|
||||
int
|
||||
b_n_cut(FILE *fp, const char *fname)
|
||||
{
|
||||
size_t col, i, lbuflen;
|
||||
@ -293,37 +320,45 @@ b_n_cut(FILE *fp, const char *fname)
|
||||
if (lbuflen > 0)
|
||||
putchar('\n');
|
||||
}
|
||||
return (warned);
|
||||
}
|
||||
|
||||
void
|
||||
c_cut(FILE *fp, const char *fname __unused)
|
||||
int
|
||||
c_cut(FILE *fp, const char *fname)
|
||||
{
|
||||
int ch, col;
|
||||
wint_t ch;
|
||||
int col;
|
||||
char *pos;
|
||||
|
||||
ch = 0;
|
||||
for (;;) {
|
||||
pos = positions + 1;
|
||||
for (col = maxval; col; --col) {
|
||||
if ((ch = getc(fp)) == EOF)
|
||||
return;
|
||||
if ((ch = getwc(fp)) == WEOF)
|
||||
goto out;
|
||||
if (ch == '\n')
|
||||
break;
|
||||
if (*pos++)
|
||||
(void)putchar(ch);
|
||||
(void)putwchar(ch);
|
||||
}
|
||||
if (ch != '\n') {
|
||||
if (autostop)
|
||||
while ((ch = getc(fp)) != EOF && ch != '\n')
|
||||
(void)putchar(ch);
|
||||
while ((ch = getwc(fp)) != WEOF && ch != '\n')
|
||||
(void)putwchar(ch);
|
||||
else
|
||||
while ((ch = getc(fp)) != EOF && ch != '\n');
|
||||
while ((ch = getwc(fp)) != WEOF && ch != '\n');
|
||||
}
|
||||
(void)putchar('\n');
|
||||
(void)putwchar('\n');
|
||||
}
|
||||
out:
|
||||
if (ferror(fp)) {
|
||||
warn("%s", fname);
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
int
|
||||
f_cut(FILE *fp, const char *fname __unused)
|
||||
{
|
||||
int ch, field, isdelim;
|
||||
@ -386,6 +421,7 @@ f_cut(FILE *fp, const char *fname __unused)
|
||||
}
|
||||
if (mlbuf != NULL)
|
||||
free(mlbuf);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user