Rewrite input processing to not exit with error on the first EILSEQ found

in the input data but fallback to "binary comparison" instead.

POSIX says: "The input files shall be text files", nothing more,
so the text file with illegal sequence is valid input.
BTW, GNU sort does not fails on EILSEQ too.
This commit is contained in:
Andrey A. Chernov 2010-03-08 22:27:46 +00:00
parent 157fb21c99
commit 8f82dfe5d1
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=204896

View File

@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <limits.h>
#include <locale.h>
#include <stdint.h>
#define _WITH_GETLINE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -60,40 +61,31 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include <wctype.h>
#define INITLINELEN (LINE_MAX + 1)
#define MAXLINELEN ((SIZE_MAX / sizeof(wchar_t)) / 2)
const wchar_t *tabs[] = { L"", L"\t", L"\t\t" };
int iflag;
const char *tabs[] = { "", "\t", "\t\t" };
FILE *file(const char *);
wchar_t *getline(wchar_t *, size_t *, FILE *);
void show(FILE *, const char *, const wchar_t *, wchar_t *, size_t *);
int wcsicoll(const wchar_t *, const wchar_t *);
wchar_t *convert(const char *);
void show(FILE *, const char *, const char *, char **, size_t *);
static void usage(void);
int
main(int argc, char *argv[])
{
int comp, read1, read2;
int ch, flag1, flag2, flag3, iflag;
int ch, flag1, flag2, flag3;
FILE *fp1, *fp2;
const wchar_t *col1, *col2, *col3;
const char *col1, *col2, *col3;
size_t line1len, line2len;
wchar_t *line1, *line2;
const wchar_t **p;
flag1 = flag2 = flag3 = 1;
iflag = 0;
line1len = INITLINELEN;
line2len = INITLINELEN;
line1 = malloc(line1len * sizeof(*line1));
line2 = malloc(line2len * sizeof(*line2));
if (line1 == NULL || line2 == NULL)
err(1, "malloc");
char *line1, *line2;
ssize_t n1, n2;
wchar_t *tline1, *tline2;
const char **p;
(void) setlocale(LC_ALL, "");
flag1 = flag2 = flag3 = 1;
while ((ch = getopt(argc, argv, "123i")) != -1)
switch(ch) {
case '1':
@ -131,41 +123,57 @@ main(int argc, char *argv[])
if (flag3)
col3 = *p;
line1len = line2len = 0;
line1 = line2 = NULL;
n1 = n2 = -1;
for (read1 = read2 = 1;;) {
/* read next line, check for EOF */
if (read1) {
line1 = getline(line1, &line1len, fp1);
if (line1 == NULL && ferror(fp1))
n1 = getline(&line1, &line1len, fp1);
if (n1 < 0 && ferror(fp1))
err(1, "%s", argv[0]);
if (n1 > 0 && line1[n1 - 1] == '\n')
line1[n1 - 1] = '\0';
}
if (read2) {
line2 = getline(line2, &line2len, fp2);
if (line2 == NULL && ferror(fp2))
n2 = getline(&line2, &line2len, fp2);
if (n2 < 0 && ferror(fp2))
err(1, "%s", argv[1]);
if (n2 > 0 && line2[n2 - 1] == '\n')
line2[n2 - 1] = '\0';
}
/* if one file done, display the rest of the other file */
if (line1 == NULL) {
if (line2 != NULL && col2 != NULL)
show(fp2, argv[1], col2, line2, &line2len);
if (n1 < 0) {
if (n2 >= 0 && col2 != NULL)
show(fp2, argv[1], col2, &line2, &line2len);
break;
}
if (line2 == NULL) {
if (line1 != NULL && col1 != NULL)
show(fp1, argv[0], col1, line1, &line1len);
if (n2 < 0) {
if (n1 >= 0 && col1 != NULL)
show(fp1, argv[0], col1, &line1, &line1len);
break;
}
tline2 = NULL;
if ((tline1 = convert(line1)) != NULL)
tline2 = convert(line2);
if (tline1 == NULL || tline2 == NULL)
comp = strcmp(line1, line2);
else
comp = wcscoll(tline1, tline2);
if (tline1 != NULL)
free(tline1);
if (tline2 != NULL)
free(tline2);
/* lines are the same */
if(iflag)
comp = wcsicoll(line1, line2);
else
comp = wcscoll(line1, line2);
if (!comp) {
read1 = read2 = 1;
if (col3 != NULL)
(void)printf("%ls%ls\n", col3, line1);
(void)printf("%s%s\n", col3, line1);
continue;
}
@ -174,49 +182,50 @@ main(int argc, char *argv[])
read1 = 1;
read2 = 0;
if (col1 != NULL)
(void)printf("%ls%ls\n", col1, line1);
(void)printf("%s%s\n", col1, line1);
} else {
read1 = 0;
read2 = 1;
if (col2 != NULL)
(void)printf("%ls%ls\n", col2, line2);
(void)printf("%s%s\n", col2, line2);
}
}
exit(0);
}
wchar_t *
getline(wchar_t *buf, size_t *buflen, FILE *fp)
convert(const char *str)
{
size_t bufpos;
wint_t ch;
size_t n;
wchar_t *buf, *p;
bufpos = 0;
while ((ch = getwc(fp)) != WEOF && ch != '\n') {
if (bufpos + 1 >= *buflen) {
*buflen = *buflen * 2;
if (*buflen > MAXLINELEN)
errx(1,
"Maximum line buffer length (%zu) exceeded",
MAXLINELEN);
buf = reallocf(buf, *buflen * sizeof(*buf));
if (buf == NULL)
err(1, "reallocf");
}
buf[bufpos++] = ch;
if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
return (NULL);
if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
err(1, "malloc");
if (mbstowcs(buf, str, n + 1) != n)
errx(1, "internal mbstowcs() error");
if (iflag) {
for (p = buf; *p != L'\0'; p++)
*p = towlower(*p);
}
buf[bufpos] = '\0';
return (bufpos != 0 || ch == '\n' ? buf : NULL);
return (buf);
}
void
show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf, size_t *buflen)
show(FILE *fp, const char *fn, const char *offset, char **bufp, size_t *buflenp)
{
ssize_t n;
do {
(void)printf("%ls%ls\n", offset, buf);
} while ((buf = getline(buf, buflen, fp)) != NULL);
(void)printf("%s%s\n", offset, *bufp);
if ((n = getline(bufp, buflenp, fp)) < 0)
break;
if (n > 0 && (*bufp)[n - 1] == '\n')
(*bufp)[n - 1] = '\0';
} while (1);
if (ferror(fp))
err(1, "%s", fn);
}
@ -240,52 +249,3 @@ usage(void)
(void)fprintf(stderr, "usage: comm [-123i] file1 file2\n");
exit(1);
}
static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
int
wcsicoll(const wchar_t *s1, const wchar_t *s2)
{
wchar_t *p;
size_t l1, l2;
size_t new_l1_buflen, new_l2_buflen;
l1 = wcslen(s1) + 1;
l2 = wcslen(s2) + 1;
new_l1_buflen = wcsicoll_l1_buflen;
new_l2_buflen = wcsicoll_l2_buflen;
while (new_l1_buflen < l1) {
if (new_l1_buflen == 0)
new_l1_buflen = INITLINELEN;
else
new_l1_buflen *= 2;
}
while (new_l2_buflen < l2) {
if (new_l2_buflen == 0)
new_l2_buflen = INITLINELEN;
else
new_l2_buflen *= 2;
}
if (new_l1_buflen > wcsicoll_l1_buflen) {
wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf));
if (wcsicoll_l1_buf == NULL)
err(1, "reallocf");
wcsicoll_l1_buflen = new_l1_buflen;
}
if (new_l2_buflen > wcsicoll_l2_buflen) {
wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf));
if (wcsicoll_l2_buf == NULL)
err(1, "reallocf");
wcsicoll_l2_buflen = new_l2_buflen;
}
for (p = wcsicoll_l1_buf; *s1; s1++)
*p++ = towlower(*s1);
*p = '\0';
for (p = wcsicoll_l2_buf; *s2; s2++)
*p++ = towlower(*s2);
*p = '\0';
return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
}