Add support for multibyte characters, loosely based on Bruno Haible's

work in the util-linux package.
This commit is contained in:
Tim J. Robbins 2004-07-29 07:23:37 +00:00
parent a351559479
commit c3fae7442e
2 changed files with 41 additions and 18 deletions

View File

@ -35,7 +35,7 @@
.\" @(#)col.1 8.1 (Berkeley) 6/29/93
.\" $FreeBSD$
.\"
.Dd July 15, 2004
.Dd July 29, 2004
.Dt COL 1
.Os
.Sh NAME
@ -127,6 +127,15 @@ sure the character set is correct when they are output.
If the input attempts to back up to the last flushed line,
.Nm
will display a warning message.
.Sh ENVIRONMENT
The
.Ev LANG , LC_ALL
and
.Ev LC_CTYPE
environment variables affect the execution of
.Nm
as described in
.Xr environ 7 .
.Sh SEE ALSO
.Xr expand 1 ,
.Xr nroff 1 ,
@ -142,7 +151,3 @@ A
command
appeared in
.At v6 .
.Sh BUGS
The
.Nm
utility does not recognize multibyte characters.

View File

@ -49,13 +49,15 @@ static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95";
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <ctype.h>
#include <err.h>
#include <locale.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <locale.h>
#include <wchar.h>
#include <wctype.h>
#define BS '\b' /* backspace */
#define TAB '\t' /* tab */
@ -80,7 +82,8 @@ typedef struct char_str {
#define CS_ALTERNATE 2
short c_column; /* column character is in */
CSET c_set; /* character set (currently only 2) */
char c_char; /* character in question */
wchar_t c_char; /* character in question */
int c_width; /* character width */
} CHAR;
typedef struct line_str LINE;
@ -113,14 +116,14 @@ int pass_unknown_seqs; /* pass unknown control sequences */
#define PUTC(ch) \
do { \
if (putchar(ch) == EOF) \
if (putwchar(ch) == WEOF) \
errx(1, "write error"); \
} while (0)
int
main(int argc, char **argv)
{
int ch;
wint_t ch;
CHAR *c;
CSET cur_set; /* current character set */
LINE *l; /* current line */
@ -130,7 +133,7 @@ main(int argc, char **argv)
int max_line; /* max value of cur_line */
int this_line; /* line l points to */
int nflushd_lines; /* number of lines that were flushed */
int adjust, opt, warned;
int adjust, opt, warned, width;
(void)setlocale(LC_CTYPE, "");
@ -173,8 +176,8 @@ main(int argc, char **argv)
cur_set = last_set = CS_NORMAL;
lines = l = alloc_line();
while ((ch = getchar()) != EOF) {
if (!isgraph(ch)) {
while ((ch = getwchar()) != WEOF) {
if (!iswgraph(ch)) {
switch (ch) {
case BS: /* can't go back further */
if (cur_col == 0)
@ -185,7 +188,7 @@ main(int argc, char **argv)
cur_col = 0;
continue;
case ESC: /* just ignore EOF */
switch(getchar()) {
switch(getwchar()) {
case RLF:
cur_line -= 2;
break;
@ -221,6 +224,11 @@ main(int argc, char **argv)
cur_line -= 2;
continue;
}
if (iswspace(ch)) {
if ((width = wcwidth(ch)) > 0)
cur_col += width;
continue;
}
if (!pass_unknown_seqs)
continue;
}
@ -294,6 +302,7 @@ main(int argc, char **argv)
c->c_char = ch;
c->c_set = cur_set;
c->c_column = cur_col;
c->c_width = wcwidth(ch);
/*
* If things are put in out of order, they will need sorting
* when it is flushed.
@ -302,8 +311,11 @@ main(int argc, char **argv)
l->l_needs_sort = 1;
else
l->l_max_col = cur_col;
cur_col++;
if (c->c_width > 0)
cur_col += c->c_width;
}
if (ferror(stdin))
err(1, NULL);
if (max_line == 0)
exit(0); /* no lines, so just exit */
@ -386,7 +398,7 @@ void
flush_line(LINE *l)
{
CHAR *c, *endc;
int nchars, last_col, this_col;
int i, nchars, last_col, this_col;
last_col = 0;
nchars = l->l_line_len;
@ -438,8 +450,12 @@ flush_line(LINE *l)
} while (--nchars > 0 && this_col == endc->c_column);
/* if -b only print last character */
if (no_backspaces)
if (no_backspaces) {
c = endc - 1;
if (nchars > 0 &&
this_col + c->c_width > endc->c_column)
continue;
}
if (this_col > last_col) {
int nspace = this_col - last_col;
@ -464,7 +480,6 @@ flush_line(LINE *l)
PUTC(' ');
last_col = this_col;
}
last_col++;
for (;;) {
if (c->c_set != last_set) {
@ -478,10 +493,13 @@ flush_line(LINE *l)
last_set = c->c_set;
}
PUTC(c->c_char);
if ((c + 1) < endc)
for (i = 0; i < c->c_width; i++)
PUTC('\b');
if (++c >= endc)
break;
PUTC('\b');
}
last_col += (c - 1)->c_width;
}
}