Add the -m option, which counts characters (as opposed to -c, which
counts bytes). In locales that don't have multibyte characters, -m is effectively an alias for -c. This brings wc(1) up to P1003.1-2001 conformance.
This commit is contained in:
parent
63759e6467
commit
1e238aa8c3
@ -40,10 +40,10 @@
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm wc
|
||||
.Nd word, line, and byte count
|
||||
.Nd word, line, character, and byte count
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl clw
|
||||
.Op Fl clmw
|
||||
.Op Ar
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
@ -71,6 +71,12 @@ is written to the standard output.
|
||||
.It Fl l
|
||||
The number of lines in each input file
|
||||
is written to the standard output.
|
||||
.It Fl m
|
||||
The number of characters in each input file is written to the standard output.
|
||||
If the current locale does not support multibyte characters, this
|
||||
is equivalent to the
|
||||
.Fl c
|
||||
option.
|
||||
.It Fl w
|
||||
The number of words in each input file
|
||||
is written to the standard output.
|
||||
@ -79,10 +85,36 @@ is written to the standard output.
|
||||
When an option is specified,
|
||||
.Nm
|
||||
only reports the information requested by that option.
|
||||
The default action is equivalent to specifying all of the flags.
|
||||
The default action is equivalent to specifying the
|
||||
.Fl c ,
|
||||
.Fl l
|
||||
and
|
||||
.Fl w
|
||||
options.
|
||||
.Pp
|
||||
If no files are specified, the standard input is used and no
|
||||
file name is displayed.
|
||||
.Sh ENVIRONMENT
|
||||
The
|
||||
.Ev LANG ,
|
||||
.Ev LC_ALL
|
||||
and
|
||||
.Ev LC_CTYPE
|
||||
environment variables affect the execution of
|
||||
.Nm
|
||||
as described in
|
||||
.Xr environ 7
|
||||
when the
|
||||
.Fl m
|
||||
option is specified.
|
||||
.Sh EXAMPLES
|
||||
Count the number of characters, words and lines in each of the files
|
||||
.Pa report1
|
||||
and
|
||||
.Pa report2
|
||||
as well as the totals for both:
|
||||
.Pp
|
||||
.Dl "wc -mlw report1 report2"
|
||||
.Sh DIAGNOSTICS
|
||||
.Ex -std
|
||||
.Sh SEE ALSO
|
||||
@ -108,7 +140,7 @@ function, as required by
|
||||
The
|
||||
.Nm
|
||||
function conforms to
|
||||
.St -p1003.2 .
|
||||
.St -p1003.1-2001 .
|
||||
.Sh HISTORY
|
||||
A
|
||||
.Nm
|
||||
|
@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <locale.h>
|
||||
#include <stdint.h>
|
||||
@ -60,7 +61,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <unistd.h>
|
||||
|
||||
uintmax_t tlinect, twordct, tcharct;
|
||||
int doline, doword, dochar;
|
||||
int doline, doword, dochar, domulti;
|
||||
|
||||
static int cnt(const char *);
|
||||
static void usage(void);
|
||||
@ -74,7 +75,7 @@ main(argc, argv)
|
||||
|
||||
(void) setlocale(LC_CTYPE, "");
|
||||
|
||||
while ((ch = getopt(argc, argv, "lwc")) != -1)
|
||||
while ((ch = getopt(argc, argv, "clmw")) != -1)
|
||||
switch((char)ch) {
|
||||
case 'l':
|
||||
doline = 1;
|
||||
@ -84,6 +85,11 @@ main(argc, argv)
|
||||
break;
|
||||
case 'c':
|
||||
dochar = 1;
|
||||
domulti = 0;
|
||||
break;
|
||||
case 'm':
|
||||
domulti = 1;
|
||||
dochar = 0;
|
||||
break;
|
||||
case '?':
|
||||
default:
|
||||
@ -93,7 +99,7 @@ main(argc, argv)
|
||||
argc -= optind;
|
||||
|
||||
/* Wc's flags are on by default. */
|
||||
if (doline + doword + dochar == 0)
|
||||
if (doline + doword + dochar + domulti == 0)
|
||||
doline = doword = dochar = 1;
|
||||
|
||||
errors = 0;
|
||||
@ -117,7 +123,7 @@ main(argc, argv)
|
||||
(void)printf(" %7ju", tlinect);
|
||||
if (doword)
|
||||
(void)printf(" %7ju", twordct);
|
||||
if (dochar)
|
||||
if (dochar || domulti)
|
||||
(void)printf(" %7ju", tcharct);
|
||||
(void)printf(" total\n");
|
||||
}
|
||||
@ -130,10 +136,12 @@ cnt(file)
|
||||
{
|
||||
struct stat sb;
|
||||
uintmax_t linect, wordct, charct;
|
||||
int fd, len;
|
||||
ssize_t nread;
|
||||
int clen, fd, len, warned;
|
||||
short gotsp;
|
||||
u_char *p;
|
||||
u_char buf[MAXBSIZE], ch;
|
||||
wchar_t wch;
|
||||
|
||||
linect = wordct = charct = 0;
|
||||
if (file == NULL) {
|
||||
@ -144,7 +152,7 @@ cnt(file)
|
||||
warn("%s: open", file);
|
||||
return (1);
|
||||
}
|
||||
if (doword)
|
||||
if (doword || (domulti && MB_CUR_MAX != 1))
|
||||
goto word;
|
||||
/*
|
||||
* Line counting is split out because it's a lot faster to get
|
||||
@ -176,7 +184,7 @@ cnt(file)
|
||||
* If all we need is the number of characters and it's a
|
||||
* regular or linked file, just stat the puppy.
|
||||
*/
|
||||
if (dochar) {
|
||||
if (dochar || domulti) {
|
||||
if (fstat(fd, &sb)) {
|
||||
warn("%s: fstat", file);
|
||||
(void)close(fd);
|
||||
@ -192,22 +200,41 @@ cnt(file)
|
||||
}
|
||||
|
||||
/* Do it the hard way... */
|
||||
word: for (gotsp = 1; (len = read(fd, buf, MAXBSIZE));) {
|
||||
if (len == -1) {
|
||||
word: gotsp = 1;
|
||||
len = 0;
|
||||
warned = 0;
|
||||
while ((nread = read(fd, buf + len, MAXBSIZE - len)) != 0) {
|
||||
if (nread == -1) {
|
||||
warn("%s: read", file);
|
||||
(void)close(fd);
|
||||
return (1);
|
||||
}
|
||||
/*
|
||||
* This loses in the presence of multi-byte characters.
|
||||
* To do it right would require a function to return a
|
||||
* character while knowing how many bytes it consumed.
|
||||
*/
|
||||
charct += len;
|
||||
for (p = buf; len--;) {
|
||||
ch = *p++;
|
||||
if (ch == '\n')
|
||||
len += nread;
|
||||
p = buf;
|
||||
while (len > 0) {
|
||||
if (!domulti || MB_CUR_MAX == 1) {
|
||||
clen = 1;
|
||||
wch = (unsigned char)*p;
|
||||
} else if ((clen = mbtowc(&wch, p, len)) <= 0) {
|
||||
if (len > MB_CUR_MAX) {
|
||||
clen = 1;
|
||||
wch = (unsigned char)*p;
|
||||
if (!warned) {
|
||||
errno = EILSEQ;
|
||||
warn("%s", file);
|
||||
warned = 1;
|
||||
}
|
||||
} else {
|
||||
memmove(buf, p, len);
|
||||
break;
|
||||
}
|
||||
}
|
||||
charct++;
|
||||
len -= clen;
|
||||
p += clen;
|
||||
if (wch == L'\n')
|
||||
++linect;
|
||||
/* XXX Non-portable; should use iswspace() */
|
||||
if (isspace(ch))
|
||||
gotsp = 1;
|
||||
else if (gotsp) {
|
||||
@ -224,7 +251,7 @@ word: for (gotsp = 1; (len = read(fd, buf, MAXBSIZE));) {
|
||||
twordct += wordct;
|
||||
(void)printf(" %7ju", wordct);
|
||||
}
|
||||
if (dochar) {
|
||||
if (dochar || domulti) {
|
||||
tcharct += charct;
|
||||
(void)printf(" %7ju", charct);
|
||||
}
|
||||
@ -235,6 +262,6 @@ word: for (gotsp = 1; (len = read(fd, buf, MAXBSIZE));) {
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
(void)fprintf(stderr, "usage: wc [-clw] [file ...]\n");
|
||||
(void)fprintf(stderr, "usage: wc [-clmw] [file ...]\n");
|
||||
exit(1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user