bintrans: replace the quoted printable encoder/decoder

Replace the quoted printable code with one that respects RFC2045
Add tests about this parsers and encoders, using examples from the
wikipedia page.

Reviewed by:	pstef
Differential Revision: https://reviews.freebsd.org/D36314
This commit is contained in:
Baptiste Daroussin 2022-08-23 17:45:11 +02:00
parent b6ce129d24
commit 83e8c23178
7 changed files with 244 additions and 266 deletions

View File

@ -4,7 +4,7 @@
.include <src.opts.mk>
PROG= bintrans
SRCS= bintrans.c uuencode.c uudecode.c quoted-printable.c
SRCS= bintrans.c uuencode.c uudecode.c qp.c
MAN= bintrans.1 uuencode.format.5
LINKS+= ${BINDIR}/bintrans ${BINDIR}/uuencode
LINKS+= ${BINDIR}/bintrans ${BINDIR}/b64encode

198
usr.bin/bintrans/qp.c Normal file
View File

@ -0,0 +1,198 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Baptiste Daroussin <bapt@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
extern int main_quotedprintable(int, char *[]);
static int
hexval(int c)
{
if ('0' <= c && c <= '9')
return c - '0';
return (10 + c - 'A');
}
static int
decode_char(const char *s)
{
return (16 * hexval(toupper(s[1])) + hexval(toupper(s[2])));
}
static void
decode_quoted_printable(const char *body, FILE *fpo)
{
while (*body != '\0') {
switch (*body) {
case '=':
if (strlen(body) < 2) {
fputc(*body, fpo);
break;
}
if (body[1] == '\r' && body[2] == '\n') {
body += 2;
break;
}
if (body[1] == '\n') {
body++;
break;
}
if (strchr("0123456789ABCDEFabcdef", body[1]) == NULL) {
fputc(*body, fpo);
break;
}
if (strchr("0123456789ABCDEFabcdef", body[2]) == NULL) {
fputc(*body, fpo);
break;
}
fputc(decode_char(body), fpo);
body += 2;
break;
default:
fputc(*body, fpo);
break;
}
body++;
}
}
static void
encode_quoted_printable(const char *body, FILE *fpo)
{
char prev;
const char *end = body + strlen(body);
size_t linelen = 0;
while (*body != '\0') {
if (linelen == 75) {
fputs("=\r\n", fpo);
linelen = 0;
}
if (!isascii(*body) ||
*body == '=' ||
(*body == '.' && body + 1 < end &&
(body[1] == '\n' || body[1] == '\r'))) {
fprintf(fpo, "=%02X", (unsigned char)*body);
linelen += 2;
prev = *body;
} else if (*body < 33 && *body != '\n') {
if ((*body == ' ' || *body == '\t') &&
body + 1 < end &&
(body[1] != '\n' && body[1] != '\r')) {
fputc(*body, fpo);
prev = *body;
} else {
fprintf(fpo, "=%02X", (unsigned char)*body);
linelen += 2;
prev = '_';
}
} else if (*body == '\n') {
if (prev == ' ' || prev == '\t') {
fputc('=', fpo);
}
fputc('\n', fpo);
linelen = 0;
prev = 0;
} else {
fputc(*body, fpo);
prev = *body;
}
body++;
linelen++;
}
}
static void
qp(FILE *fp, FILE *fpo, bool encode)
{
char *line = NULL;
size_t linecap = 0;
ssize_t linelen;
void (*codec)(const char *line, FILE *f);
codec = encode ? encode_quoted_printable : decode_quoted_printable ;
while ((linelen = getline(&line, &linecap, fp)) > 0)
codec(line, fpo);
free(line);
}
static void
usage(void)
{
fprintf(stderr,
"usage: bintrans qp [-u] [-o outputfile] [file name]\n");
}
int
main_quotedprintable(int argc, char *argv[])
{
int i;
bool encode = true;
FILE *fp = stdin;
FILE *fpo = stdout;
for (i = 1; i < argc; ++i) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 'o':
if (++i >= argc) {
fprintf(stderr, "qp: -o requires a file name.\n");
exit(EXIT_FAILURE);
}
fpo = fopen(argv[i], "w");
if (fpo == NULL) {
perror(argv[i]);
exit(EXIT_FAILURE);
}
break;
case 'u':
encode = false;
break;
default:
usage();
exit(EXIT_FAILURE);
}
} else {
fp = fopen(argv[i], "r");
if (fp == NULL) {
perror(argv[i]);
exit(EXIT_FAILURE);
}
}
}
qp(fp, fpo, encode);
return (EXIT_SUCCESS);
}

View File

@ -1,265 +0,0 @@
/*
Copyright (c) 1991 Bell Communications Research, Inc. (Bellcore)
Permission to use, copy, modify, and distribute this material
for any purpose and without fee is hereby granted, provided
that the above copyright notice and this permission notice
appear in all copies, and that the name of Bellcore not be
used in advertising or publicity pertaining to this
material without the specific, prior written permission
of an authorized representative of Bellcore. BELLCORE
MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY
OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS",
WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
extern int main_quotedprintable(int, char *[]);
static int
PendingBoundary(char *s, char **Boundaries, int *BoundaryCt)
{
int i;
size_t len;
if (s[0] != '-' || s[1] != '-')
return (0);
for (i = 0; i < *BoundaryCt; ++i) {
len = strlen(Boundaries[i]);
if (strncmp(s, Boundaries[i], len) == 0) {
if (s[len] == '-' && s[len + 1] == '-')
*BoundaryCt = i;
return (1);
}
}
return (0);
}
#define basis_hex "0123456789ABCDEF"
static const char index_hex[128] = {
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
};
/* The following version generated complaints on Solaris. */
/* #define hexchar(c) (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)]) */
/* Since we're no longer ever calling it with anything signed, this should work: */
#define hexchar(c) (((c) > 127) ? -1 : index_hex[(c)])
static void
toqp(FILE *infile, FILE *outfile)
{
int c, ct = 0, prevc = 255;
while ((c = getc(infile)) != EOF) {
if ((c < 32 && (c != '\n' && c != '\t'))
|| (c == '=')
|| (c >= 127)
/* Following line is to avoid single periods alone on lines,
which messes up some dumb smtp implementations, sigh... */
|| (ct == 0 && c == '.')) {
putc('=', outfile);
putc(basis_hex[c >> 4], outfile);
putc(basis_hex[c & 0xF], outfile);
ct += 3;
prevc = 'A'; /* close enough */
} else if (c == '\n') {
if (prevc == ' ' || prevc == '\t') {
putc('=', outfile); /* soft & hard lines */
putc(c, outfile);
}
putc(c, outfile);
ct = 0;
prevc = c;
} else {
if (c == 'F' && prevc == '\n') {
/* HORRIBLE but clever hack suggested by MTR for sendmail-avoidance */
c = getc(infile);
if (c == 'r') {
c = getc(infile);
if (c == 'o') {
c = getc(infile);
if (c == 'm') {
c = getc(infile);
if (c == ' ') {
/* This is the case we are looking for */
fputs("=46rom", outfile);
ct += 6;
} else {
fputs("From", outfile);
ct += 4;
}
} else {
fputs("Fro", outfile);
ct += 3;
}
} else {
fputs("Fr", outfile);
ct += 2;
}
} else {
putc('F', outfile);
++ct;
}
ungetc(c, infile);
prevc = 'x'; /* close enough -- printable */
} else { /* END horrible hack */
putc(c, outfile);
++ct;
prevc = c;
}
}
if (ct > 72) {
putc('=', outfile);
putc('\n', outfile);
ct = 0;
prevc = '\n';
}
}
if (ct) {
putc('=', outfile);
putc('\n', outfile);
}
}
static void
fromqp(FILE *infile, FILE *outfile, char **boundaries, int *boundaryct)
{
int c1, c2;
bool sawnewline = true, neednewline = false;
/* The neednewline hack is necessary because the newline leading into
a multipart boundary is part of the boundary, not the data */
while ((c1 = getc(infile)) != EOF) {
if (sawnewline && boundaries && c1 == '-') {
char Buf[200];
unsigned char *s;
ungetc(c1, infile);
fgets(Buf, sizeof(Buf), infile);
if (boundaries
&& Buf[0] == '-'
&& Buf[1] == '-'
&& PendingBoundary(Buf, boundaries, boundaryct)) {
return;
}
/* Not a boundary, now we must treat THIS line as q-p, sigh */
if (neednewline) {
putc('\n', outfile);
neednewline = false;
}
for (s = (unsigned char *)Buf; *s; ++s) {
if (*s == '=') {
if (*++s == 0)
break;
if (*s == '\n') {
/* ignore it */
sawnewline = true;
} else {
c1 = hexchar(*s);
if (*++s == 0)
break;
c2 = hexchar(*s);
putc(c1 << 4 | c2, outfile);
}
} else {
putc(*s, outfile);
}
}
} else {
if (neednewline) {
putc('\n', outfile);
neednewline = false;
}
if (c1 == '=') {
sawnewline = false;
c1 = getc(infile);
if (c1 == '\n') {
/* ignore it */
sawnewline = true;
} else {
c2 = getc(infile);
c1 = hexchar(c1);
c2 = hexchar(c2);
putc(c1 << 4 | c2, outfile);
if (c2 == '\n')
sawnewline = true;
}
} else {
if (c1 == '\n') {
sawnewline = true;
neednewline = true;
} else {
sawnewline = false;
putc(c1, outfile);
}
}
}
}
if (neednewline) {
putc('\n', outfile);
neednewline = false;
}
}
static void
usage(void)
{
fprintf(stderr,
"usage: bintrans qp [-u] [-o outputfile] [file name]\n");
}
int
main_quotedprintable(int argc, char *argv[])
{
int i;
bool encode = true;
FILE *fp = stdin;
FILE *fpo = stdout;
for (i = 1; i < argc; ++i) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 'o':
if (++i >= argc) {
fprintf(stderr, "qp: -o requires a file name.\n");
exit(EXIT_FAILURE);
}
fpo = fopen(argv[i], "w");
if (fpo == NULL) {
perror(argv[i]);
exit(EXIT_FAILURE);
}
break;
case 'u':
encode = false;
break;
default:
usage();
exit(EXIT_FAILURE);
}
} else {
fp = fopen(argv[i], "r");
if (fp == NULL) {
perror(argv[i]);
exit(EXIT_FAILURE);
}
}
}
if (encode)
toqp(fp, fpo);
else
fromqp(fp, fpo, NULL, 0);
return (0);
}

View File

@ -2,8 +2,10 @@
PACKAGE= tests
ATF_TESTS_SH+= bintrans_test
TAP_TESTS_SH= legacy_test
${PACKAGE}FILES+= textqpenc textqpdec
${PACKAGE}FILES+= regress.base64.in regress.base64.out
${PACKAGE}FILES+= regress.in regress.out
${PACKAGE}FILES+= regress.sh

View File

@ -0,0 +1,35 @@
atf_test_case encode_qp
encode_qp_body()
{
atf_check -e empty -o file:"$(atf_get_srcdir)/textqpenc" bintrans qp $(atf_get_srcdir)/textqpdec
}
atf_test_case decode_qp
decode_qp_body()
{
printf "=" > test
atf_check -e empty -o inline:"=" bintrans qp -u test
printf "=\ra" > test
atf_check -e empty -o inline:"=\ra" bintrans qp -u test
printf "=\r\na" > test
atf_check -e empty -o inline:"a" bintrans qp -u test
printf "This is a line" > test
atf_check -e empty -o inline:"This is a line" bintrans qp -u test
printf "This= is a line" > test
atf_check -e empty -o inline:"This= is a line" bintrans qp -u test
printf "This=2 is a line" > test
atf_check -e empty -o inline:"This=2 is a line" bintrans qp -u test
printf "This=23 is a line" > test
atf_check -e empty -o inline:"This# is a line" bintrans qp -u test
printf "This=3D is a line" > test
atf_check -e empty -o inline:"This= is a line" bintrans qp -u test
printf "This_ is a line" > test
atf_check -e empty -o inline:"This_ is a line" bintrans qp -u test
atf_check -e empty -o file:"$(atf_get_srcdir)/textqpdec" bintrans qp -u $(atf_get_srcdir)/textqpenc
}
atf_init_test_cases()
{
atf_add_test_case decode_qp
atf_add_test_case encode_qp
}

View File

@ -0,0 +1,2 @@
J'interdis aux marchands de vanter trop leurs marchandises. Car ils se font vite pédagogues et t'enseignent comme but ce qui n'est par essence qu'un moyen, et te trompant ainsi sur la route à suivre les voilà bientôt qui te dégradent, car si leur musique est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.
Antoine de Saint-Exupéry, Citadelle (1948)

View File

@ -0,0 +1,6 @@
J'interdis aux marchands de vanter trop leurs marchandises. Car ils se font=
vite p=C3=A9dagogues et t'enseignent comme but ce qui n'est par essence qu=
'un moyen, et te trompant ainsi sur la route =C3=A0 suivre les voil=C3=A0 b=
ient=C3=B4t qui te d=C3=A9gradent, car si leur musique est vulgaire ils te =
fabriquent pour te la vendre une =C3=A2me vulgaire=2E
=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, Citadelle (1948)