Update to the latest (un)vis(3) sources from NetBSD. This adds

multibyte support[0] and the new functions strenvisx and strsenvisx.

Add MLINKS for vis(3) functions add by this and the initial import from
NetBSD[1].

PR:		bin/166364, bin/175418
Submitted by:	"J.R. Oldroyd" <fbsd@opal.com>[0]
		stefanf[1]
Obtained from:	NetBSD
MFC after:	2 weeks
This commit is contained in:
Brooks Davis 2013-03-14 23:51:47 +00:00
commit 778c12a624
7 changed files with 626 additions and 426 deletions

@ -1,4 +1,4 @@
.\" $NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $
.\" $NetBSD: unvis.3,v 1.27 2012/12/15 07:34:36 wiz Exp $
.\" $FreeBSD$
.\"
.\" Copyright (c) 1989, 1991, 1993
@ -126,15 +126,17 @@ The
function has several return codes that must be handled properly.
They are:
.Bl -tag -width UNVIS_VALIDPUSH
.It Li \&0 (zero)
.It Li \&0 No (zero)
Another character is necessary; nothing has been recognized yet.
.It Dv UNVIS_VALID
A valid character has been recognized and is available at the location
pointed to by cp.
pointed to by
.Fa cp .
.It Dv UNVIS_VALIDPUSH
A valid character has been recognized and is available at the location
pointed to by cp; however, the character currently passed in should
be passed in again.
pointed to by
.Fa cp ;
however, the character currently passed in should be passed in again.
.It Dv UNVIS_NOCHAR
A valid sequence was detected, but no character was produced.
This return code is necessary to indicate a logical break between characters.
@ -150,7 +152,7 @@ one more time with flag set to
to extract any remaining character (the character passed in is ignored).
.Pp
The
.Ar flag
.Fa flag
argument is also used to specify the encoding style of the source.
If set to
.Dv VIS_HTTPSTYLE
@ -161,7 +163,8 @@ will decode URI strings as specified in RFC 1808.
If set to
.Dv VIS_HTTP1866 ,
.Fn unvis
will decode URI strings as specified in RFC 1866.
will decode entity references and numeric character references
as specified in RFC 1866.
If set to
.Dv VIS_MIMESTYLE ,
.Fn unvis
@ -169,7 +172,9 @@ will decode MIME Quoted-Printable strings as specified in RFC 2045.
If set to
.Dv VIS_NOESCAPE ,
.Fn unvis
will not decode \e quoted characters.
will not decode
.Ql \e
quoted characters.
.Pp
The following code fragment illustrates a proper use of
.Fn unvis .
@ -204,7 +209,7 @@ The functions
and
.Fn strnunvisx
will return \-1 on error and set
.Va errno
.Va errno
to:
.Bl -tag -width Er
.It Bq Er EINVAL
@ -212,7 +217,7 @@ An invalid escape sequence was detected, or the decoder is in an unknown state.
.El
.Pp
In addition the functions
.Fn strnunvis
.Fn strnunvis
and
.Fn strnunvisx
will can also set
@ -244,4 +249,14 @@ and
functions appeared in
.Nx 6.0
and
.Fx 10.0 .
.Fx 9.2 .
.Sh BUGS
The names
.Dv VIS_HTTP1808
and
.Dv VIS_HTTP1866
are wrong.
Percent-encoding was defined in RFC 1738, the original RFC for URL.
RFC 1866 defines HTML 2.0, an application of SGML, from which it
inherits concepts of numeric character references and entity
references.

@ -1,4 +1,4 @@
/* $NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $ */
/* $NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $ */
/*-
* Copyright (c) 1989, 1993
@ -34,7 +34,7 @@
#if 0
static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93";
#else
__RCSID("$NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $");
__RCSID("$NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
__FBSDID("$FreeBSD$");
@ -90,7 +90,7 @@ __weak_alias(strnunvisx,_strnunvisx)
* RFC 1866
*/
static const struct nv {
const char name[7];
char name[7];
uint8_t value;
} nv[] = {
{ "AElig", 198 }, /* capital AE diphthong (ligature) */

@ -1,4 +1,4 @@
.\" $NetBSD: vis.3,v 1.29 2012/12/14 22:55:59 christos Exp $
.\" $NetBSD: vis.3,v 1.39 2013/02/20 20:05:26 christos Exp $
.\" $FreeBSD$
.\"
.\" Copyright (c) 1989, 1991, 1993
@ -30,7 +30,7 @@
.\"
.\" @(#)vis.3 8.1 (Berkeley) 6/9/93
.\"
.Dd December 14, 2012
.Dd February 19, 2013
.Dt VIS 3
.Os
.Sh NAME
@ -40,12 +40,14 @@
.Nm strnvis ,
.Nm strvisx ,
.Nm strnvisx ,
.Nm strenvisx ,
.Nm svis ,
.Nm snvis ,
.Nm strsvis ,
.Nm strsnvis ,
.Nm strsvisx
.Nm strsnvisx
.Nm strsvisx ,
.Nm strsnvisx ,
.Nm strsenvisx
.Nd visually encode characters
.Sh LIBRARY
.Lb libc
@ -63,6 +65,8 @@
.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
.Ft int
.Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
.Ft int
.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
.Ft char *
.Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
.Ft char *
@ -75,6 +79,8 @@
.Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
.Ft int
.Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
.Ft int
.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
.Sh DESCRIPTION
The
.Fn vis
@ -89,11 +95,11 @@ needs no encoding, it is copied in unaltered.
The string is null terminated, and a pointer to the end of the string is
returned.
The maximum length of any encoding is four
characters (not including the trailing
bytes (not including the trailing
.Dv NUL ) ;
thus, when
encoding a set of characters into a buffer, the size of the buffer should
be four times the number of characters encoded, plus one for the trailing
be four times the number of bytes encoded, plus one for the trailing
.Dv NUL .
The flag parameter is used for altering the default range of
characters considered for encoding and for altering the visual
@ -142,16 +148,17 @@ terminate
The size of
.Fa dst
must be four times the number
of characters encoded from
of bytes encoded from
.Fa src
(plus one for the
.Dv NUL ) .
Both
forms return the number of characters in dst (not including
the trailing
forms return the number of characters in
.Fa dst
(not including the trailing
.Dv NUL ) .
The
.Dq n
.Dq Nm n
versions of the functions also take an additional argument
.Fa dlen
that indicates the length of the
@ -159,7 +166,7 @@ that indicates the length of the
buffer.
If
.Fa dlen
is not large enough to fix the converted string then the
is not large enough to fit the converted string then the
.Fn strnvis
and
.Fn strnvisx
@ -167,6 +174,14 @@ functions return \-1 and set
.Va errno
to
.Dv ENOSPC .
The
.Fn strenvisx
function takes an additional argument,
.Fa cerr_ptr ,
that is used to pass in and out a multibyte conversion error flag.
This is useful when processing single characters at a time when
it is possible that the locale may be set to something other
than the locale of the characters in the input data.
.Pp
The functions
.Fn svis ,
@ -174,16 +189,18 @@ The functions
.Fn strsvis ,
.Fn strsnvis ,
.Fn strsvisx ,
.Fn strsnvisx ,
and
.Fn strsnvisx
.Fn strsenvisx
correspond to
.Fn vis ,
.Fn nvis ,
.Fn strvis ,
.Fn strnvis ,
.Fn strvisx ,
.Fn strnvisx ,
and
.Fn strnvisx
.Fn strenvisx
but have an additional argument
.Fa extra ,
pointing to a
@ -214,14 +231,13 @@ and
.Fn strnvisx ) ,
and the type of representation used.
By default, all non-graphic characters,
except space, tab, and newline are encoded.
(See
.Xr isgraph 3 . )
except space, tab, and newline are encoded (see
.Xr isgraph 3 ) .
The following flags
alter this:
.Bl -tag -width VIS_WHITEX
.It Dv VIS_GLOB
Also encode magic characters
Also encode the magic characters
.Ql ( * ,
.Ql \&? ,
.Ql \&[
@ -243,11 +259,13 @@ Synonym for
\&|
.Dv VIS_NL .
.It Dv VIS_SAFE
Only encode "unsafe" characters.
Only encode
.Dq unsafe
characters.
Unsafe means control characters which may cause common terminals to perform
unexpected functions.
Currently this form allows space, tab, newline, backspace, bell, and
return - in addition to all graphic characters - unencoded.
return \(em in addition to all graphic characters \(em unencoded.
.El
.Pp
(The above flags have no effect for
@ -287,8 +305,8 @@ Use an
to represent meta characters (characters with the 8th
bit set), and use caret
.Ql ^
to represent control characters see
.Pf ( Xr iscntrl 3 ) .
to represent control characters (see
.Xr iscntrl 3 ) .
The following formats are used:
.Bl -tag -width xxxxx
.It Dv \e^C
@ -335,19 +353,20 @@ Use C-style backslash sequences to represent standard non-printable
characters.
The following sequences are used to represent the indicated characters:
.Bd -unfilled -offset indent
.Li \ea Tn - BEL No (007)
.Li \eb Tn - BS No (010)
.Li \ef Tn - NP No (014)
.Li \en Tn - NL No (012)
.Li \er Tn - CR No (015)
.Li \es Tn - SP No (040)
.Li \et Tn - HT No (011)
.Li \ev Tn - VT No (013)
.Li \e0 Tn - NUL No (000)
.Li \ea Tn \(em BEL No (007)
.Li \eb Tn \(em BS No (010)
.Li \ef Tn \(em NP No (014)
.Li \en Tn \(em NL No (012)
.Li \er Tn \(em CR No (015)
.Li \es Tn \(em SP No (040)
.Li \et Tn \(em HT No (011)
.Li \ev Tn \(em VT No (013)
.Li \e0 Tn \(em NUL No (000)
.Ed
.Pp
When using this format, the nextc parameter is looked at to determine
if a
When using this format, the
.Fa nextc
parameter is looked at to determine if a
.Dv NUL
character can be encoded as
.Ql \e0
@ -374,8 +393,8 @@ represents a lower case hexadecimal digit.
.It Dv VIS_MIMESTYLE
Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
break lines and don't handle CRLF.
The form is:
.Ql %XX
The form is
.Ql =XX
where
.Em X
represents an upper case hexadecimal digit.
@ -392,6 +411,41 @@ meta characters as
.Ql M-C ) .
With this flag set, the encoding is
ambiguous and non-invertible.
.Sh MULTIBYTE CHARACTER SUPPORT
These functions support multibyte character input.
The encoding conversion is influenced by the setting of the
.Ev LC_CTYPE
environment variable which defines the set of characters
that can be copied without encoding.
.Pp
When 8-bit data is present in the input,
.Ev LC_CTYPE
must be set to the correct locale or to the C locale.
If the locales of the data and the conversion are mismatched,
multibyte character recognition may fail and encoding will be performed
byte-by-byte instead.
.Pp
As noted above,
.Fa dst
must be four times the number of bytes processed from
.Fa src .
But note that each multibyte character can be up to
.Dv MB_LEN_MAX
bytes
.\" (see
.\" .Xr multibyte 3 )
so in terms of multibyte characters,
.Fa dst
must be four times
.Dv MB_LEN_MAX
times the number of characters processed from
.Fa src .
.Sh ENVIRONMENT
.Bl -tag -width ".Ev LC_CTYPE"
.It Ev LC_CTYPE
Specify the locale of the input data.
Set to C if the input data locale is unknown.
.El
.Sh ERRORS
The functions
.Fn nvis
@ -407,11 +461,11 @@ and
.Fn strsnvisx ,
will return \-1 when the
.Fa dlen
destination buffer length size is not enough to perform the conversion while
destination buffer size is not enough to perform the conversion while
setting
.Va errno
to:
.Bl -tag -width Er
.Bl -tag -width ".Bq Er ENOSPC"
.It Bq Er ENOSPC
The destination buffer size is not large enough to perform the conversion.
.El
@ -419,18 +473,23 @@ The destination buffer size is not large enough to perform the conversion.
.Xr unvis 1 ,
.Xr vis 1 ,
.Xr glob 3 ,
.\" .Xr multibyte 3 ,
.Xr unvis 3
.Rs
.%A T. Berners-Lee
.%T Uniform Resource Locators (URL)
.%O RFC1738
.%O "RFC 1738"
.Re
.Rs
.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies"
.%O "RFC 2045"
.Re
.Sh HISTORY
The
.Fn vis ,
.Fn strvis ,
and
.Fa strvisx
.Fn strvisx
functions first appeared in
.Bx 4.4 .
The
@ -441,7 +500,7 @@ and
functions appeared in
.Nx 1.5
and
.Fx 10.0 .
.Fx 9.2 .
The buffer size limited versions of the functions
.Po Fn nvis ,
.Fn strnvis ,
@ -451,6 +510,9 @@ The buffer size limited versions of the functions
and
.Fn strsnvisx Pc
appeared in
.Nx 6.0
and
.Fx 10.0 .
.Fx 9.2 .
Myltibyte character support was added in
.Nx 7.0
and
.Fx 9.2 .

@ -1,4 +1,4 @@
/* $NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $ */
/* $NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $ */
/*-
* Copyright (c) 1989, 1993
@ -57,19 +57,23 @@
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $");
__RCSID("$NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $");
#endif /* LIBC_SCCS and not lint */
#ifdef __FBSDID
__FBSDID("$FreeBSD$");
#define _DIAGASSERT(x) assert(x)
#endif
#include "namespace.h"
#include <sys/types.h>
#include <sys/param.h>
#include <assert.h>
#include <vis.h>
#include <errno.h>
#include <stdlib.h>
#define _DIAGASSERT(x) assert(x)
#include <wchar.h>
#include <wctype.h>
#ifdef __weak_alias
__weak_alias(strvisx,_strvisx)
@ -81,65 +85,66 @@ __weak_alias(strvisx,_strvisx)
#include <stdio.h>
#include <string.h>
static char *do_svis(char *, size_t *, int, int, int, const char *);
/*
* The reason for going through the trouble to deal with character encodings
* in vis(3), is that we use this to safe encode output of commands. This
* safe encoding varies depending on the character set. For example if we
* display ps output in French, we don't want to display French characters
* as M-foo.
*/
static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
#undef BELL
#define BELL '\a'
#define BELL L'\a'
#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
#define iswhite(c) (c == ' ' || c == '\t' || c == '\n')
#define issafe(c) (c == '\b' || c == BELL || c == '\r')
#define xtoa(c) "0123456789abcdef"[c]
#define XTOA(c) "0123456789ABCDEF"[c]
#define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
#define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n')
#define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r')
#define xtoa(c) L"0123456789abcdef"[c]
#define XTOA(c) L"0123456789ABCDEF"[c]
#define MAXEXTRAS 9
#define MAXEXTRAS 10
#define MAKEEXTRALIST(flag, extra, orig_str) \
do { \
const char *orig = orig_str; \
const char *o = orig; \
char *e; \
while (*o++) \
continue; \
extra = malloc((size_t)((o - orig) + MAXEXTRAS)); \
if (!extra) break; \
for (o = orig, e = extra; (*e++ = *o++) != '\0';) \
continue; \
e--; \
if (flag & VIS_GLOB) { \
*e++ = '*'; \
*e++ = '?'; \
*e++ = '['; \
*e++ = '#'; \
} \
if (flag & VIS_SP) *e++ = ' '; \
if (flag & VIS_TAB) *e++ = '\t'; \
if (flag & VIS_NL) *e++ = '\n'; \
if ((flag & VIS_NOSLASH) == 0) *e++ = '\\'; \
*e = '\0'; \
} while (/*CONSTCOND*/0)
#if !HAVE_NBTOOL_CONFIG_H
#ifndef __NetBSD__
/*
* On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
* integral type and it is probably wrong, since currently the maximum
* number of bytes and character needs is 6. Until this is fixed, the
* loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
* the assertion is commented out.
*/
#ifdef __FreeBSD__
/*
* On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
* mode.
*/
#ifndef CTASSERT
#define CTASSERT(x) _CTASSERT(x, __LINE__)
#define _CTASSERT(x, y) __CTASSERT(x, y)
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
#endif
#endif /* __FreeBSD__ */
CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
#endif /* !__NetBSD__ */
#endif
/*
* This is do_hvis, for HTTP style (RFC 1808)
*/
static char *
do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
static wchar_t *
do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
{
if ((isascii(c) && isalnum(c))
if (iswalnum(c)
/* safe */
|| c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
|| c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
/* extra */
|| c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
|| c == ',') {
dst = do_svis(dst, dlen, c, flag, nextc, extra);
} else {
if (dlen) {
if (*dlen < 3)
return NULL;
*dlen -= 3;
}
*dst++ = '%';
|| c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
|| c == L',')
dst = do_svis(dst, c, flags, nextc, extra);
else {
*dst++ = L'%';
*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
*dst++ = xtoa((unsigned int)c & 0xf);
}
@ -151,27 +156,97 @@ do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
* This is do_mvis, for Quoted-Printable MIME (RFC 2045)
* NB: No handling of long lines or CRLF.
*/
static char *
do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
static wchar_t *
do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
{
if ((c != '\n') &&
if ((c != L'\n') &&
/* Space at the end of the line */
((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
/* Out of range */
(!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
/* Specific char to be escaped */
strchr("#$@[\\]^`{|}~", c) != NULL)) {
if (dlen) {
if (*dlen < 3)
return NULL;
*dlen -= 3;
}
*dst++ = '=';
(!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
/* Specific char to be escaped */
wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
*dst++ = L'=';
*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
*dst++ = XTOA((unsigned int)c & 0xf);
} else {
dst = do_svis(dst, dlen, c, flag, nextc, extra);
} else
dst = do_svis(dst, c, flags, nextc, extra);
return dst;
}
/*
* Output single byte of multibyte character.
*/
static wchar_t *
do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
{
if (flags & VIS_CSTYLE) {
switch (c) {
case L'\n':
*dst++ = L'\\'; *dst++ = L'n';
return dst;
case L'\r':
*dst++ = L'\\'; *dst++ = L'r';
return dst;
case L'\b':
*dst++ = L'\\'; *dst++ = L'b';
return dst;
case BELL:
*dst++ = L'\\'; *dst++ = L'a';
return dst;
case L'\v':
*dst++ = L'\\'; *dst++ = L'v';
return dst;
case L'\t':
*dst++ = L'\\'; *dst++ = L't';
return dst;
case L'\f':
*dst++ = L'\\'; *dst++ = L'f';
return dst;
case L' ':
*dst++ = L'\\'; *dst++ = L's';
return dst;
case L'\0':
*dst++ = L'\\'; *dst++ = L'0';
if (iswoctal(nextc)) {
*dst++ = L'0';
*dst++ = L'0';
}
return dst;
default:
if (iswgraph(c)) {
*dst++ = L'\\';
*dst++ = c;
return dst;
}
}
}
if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
*dst++ = L'\\';
*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
*dst++ = (c & 07) + L'0';
} else {
if ((flags & VIS_NOSLASH) == 0)
*dst++ = L'\\';
if (c & 0200) {
c &= 0177;
*dst++ = L'M';
}
if (iswcntrl(c)) {
*dst++ = L'^';
if (c == 0177)
*dst++ = L'?';
else
*dst++ = c + L'@';
} else {
*dst++ = L'-';
*dst++ = c;
}
}
return dst;
}
@ -179,284 +254,350 @@ do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
* This is do_vis, the central code of vis.
* dst: Pointer to the destination buffer
* c: Character to encode
* flag: Flag word
* flags: Flags word
* nextc: The character following 'c'
* extra: Pointer to the list of extra characters to be
* backslash-protected.
*/
static char *
do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
static wchar_t *
do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
{
int isextra;
size_t odlen = dlen ? *dlen : 0;
int iswextra, i, shft;
uint64_t bmsk, wmsk;
isextra = strchr(extra, c) != NULL;
#define HAVE(x) \
do { \
if (dlen) { \
if (*dlen < (x)) \
goto out; \
*dlen -= (x); \
} \
} while (/*CONSTCOND*/0)
if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
((flag & VIS_SAFE) && issafe(c)))) {
HAVE(1);
iswextra = wcschr(extra, c) != NULL;
if (!iswextra && (iswgraph(c) || iswwhite(c) ||
((flags & VIS_SAFE) && iswsafe(c)))) {
*dst++ = c;
return dst;
}
if (flag & VIS_CSTYLE) {
HAVE(2);
switch (c) {
case '\n':
*dst++ = '\\'; *dst++ = 'n';
return dst;
case '\r':
*dst++ = '\\'; *dst++ = 'r';
return dst;
case '\b':
*dst++ = '\\'; *dst++ = 'b';
return dst;
case BELL:
*dst++ = '\\'; *dst++ = 'a';
return dst;
case '\v':
*dst++ = '\\'; *dst++ = 'v';
return dst;
case '\t':
*dst++ = '\\'; *dst++ = 't';
return dst;
case '\f':
*dst++ = '\\'; *dst++ = 'f';
return dst;
case ' ':
*dst++ = '\\'; *dst++ = 's';
return dst;
case '\0':
*dst++ = '\\'; *dst++ = '0';
if (isoctal(nextc)) {
HAVE(2);
*dst++ = '0';
*dst++ = '0';
}
return dst;
default:
if (isgraph(c)) {
*dst++ = '\\'; *dst++ = c;
return dst;
}
if (dlen)
*dlen = odlen;
}
}
if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
HAVE(4);
*dst++ = '\\';
*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
*dst++ = (c & 07) + '0';
} else {
if ((flag & VIS_NOSLASH) == 0) {
HAVE(1);
*dst++ = '\\';
}
if (c & 0200) {
HAVE(1);
c &= 0177; *dst++ = 'M';
}
if (iscntrl(c)) {
HAVE(2);
*dst++ = '^';
if (c == 0177)
*dst++ = '?';
else
*dst++ = c + '@';
} else {
HAVE(2);
*dst++ = '-'; *dst++ = c;
}
/* See comment in istrsenvisx() output loop, below. */
wmsk = 0;
for (i = sizeof(wmsk) - 1; i >= 0; i--) {
shft = i * NBBY;
bmsk = (uint64_t)0xffLL << shft;
wmsk |= bmsk;
if ((c & wmsk) || i == 0)
dst = do_mbyte(dst, (wint_t)(
(uint64_t)(c & bmsk) >> shft),
flags, nextc, iswextra);
}
return dst;
out:
*dlen = odlen;
return NULL;
}
typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *);
typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
/*
* Return the appropriate encoding function depending on the flags given.
*/
static visfun_t
getvisfun(int flag)
getvisfun(int flags)
{
if (flag & VIS_HTTPSTYLE)
if (flags & VIS_HTTPSTYLE)
return do_hvis;
if (flag & VIS_MIMESTYLE)
if (flags & VIS_MIMESTYLE)
return do_mvis;
return do_svis;
}
/*
* isnvis - visually encode characters, also encoding the characters
* pointed to by `extra'
* Expand list of extra characters to not visually encode.
*/
static char *
isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
static wchar_t *
makeextralist(int flags, const char *src)
{
char *nextra = NULL;
visfun_t f;
wchar_t *dst, *d;
size_t len;
_DIAGASSERT(dst != NULL);
_DIAGASSERT(extra != NULL);
MAKEEXTRALIST(flag, nextra, extra);
if (!nextra) {
if (dlen && *dlen == 0) {
errno = ENOSPC;
return NULL;
}
*dst = '\0'; /* can't create nextra, return "" */
return dst;
}
f = getvisfun(flag);
dst = (*f)(dst, dlen, c, flag, nextc, nextra);
free(nextra);
if (dst == NULL || (dlen && *dlen == 0)) {
errno = ENOSPC;
len = strlen(src);
if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
return NULL;
if (mbstowcs(dst, src, len) == (size_t)-1) {
size_t i;
for (i = 0; i < len; i++)
dst[i] = (wint_t)(u_char)src[i];
d = dst + len;
} else
d = dst + wcslen(dst);
if (flags & VIS_GLOB) {
*d++ = L'*';
*d++ = L'?';
*d++ = L'[';
*d++ = L'#';
}
*dst = '\0';
if (flags & VIS_SP) *d++ = L' ';
if (flags & VIS_TAB) *d++ = L'\t';
if (flags & VIS_NL) *d++ = L'\n';
if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
*d = L'\0';
return dst;
}
char *
svis(char *dst, int c, int flag, int nextc, const char *extra)
{
return isnvis(dst, NULL, c, flag, nextc, extra);
}
char *
snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra)
{
return isnvis(dst, &dlen, c, flag, nextc, extra);
}
/*
* strsvis, strsvisx - visually encode characters from src into dst
*
* Extra is a pointer to a \0-terminated list of characters to
* be encoded, too. These functions are useful e. g. to
* encode strings in such a way so that they are not interpreted
* by a shell.
*
* Dst must be 4 times the size of src to account for possible
* expansion. The length of dst, not including the trailing NULL,
* is returned.
*
* Strsvisx encodes exactly len bytes from src into dst.
* This is useful for encoding a block of data.
* istrsenvisx()
* The main internal function.
* All user-visible functions call this one.
*/
static int
istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char *extra)
istrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
int flags, const char *mbextra, int *cerr_ptr)
{
int c;
char *start;
char *nextra = NULL;
const unsigned char *src = (const unsigned char *)csrc;
wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
size_t len, olen;
uint64_t bmsk, wmsk;
wint_t c;
visfun_t f;
int clen = 0, cerr = 0, error = -1, i, shft;
ssize_t mbslength, maxolen;
_DIAGASSERT(dst != NULL);
_DIAGASSERT(src != NULL);
_DIAGASSERT(extra != NULL);
MAKEEXTRALIST(flag, nextra, extra);
if (!nextra) {
*dst = '\0'; /* can't create nextra, return "" */
return 0;
}
f = getvisfun(flag);
for (start = dst; (c = *src++) != '\0'; /* empty */) {
dst = (*f)(dst, dlen, c, flag, *src, nextra);
if (dst == NULL) {
errno = ENOSPC;
return -1;
}
}
free(nextra);
if (dlen && *dlen == 0) {
errno = ENOSPC;
_DIAGASSERT(mbdst != NULL);
_DIAGASSERT(mbsrc != NULL);
_DIAGASSERT(mbextra != NULL);
/*
* Input (mbsrc) is a char string considered to be multibyte
* characters. The input loop will read this string pulling
* one character, possibly multiple bytes, from mbsrc and
* converting each to wchar_t in src.
*
* The vis conversion will be done using the wide char
* wchar_t string.
*
* This will then be converted back to a multibyte string to
* return to the caller.
*/
/* Allocate space for the wide char strings */
psrc = pdst = extra = NULL;
if (!mblength)
mblength = strlen(mbsrc);
if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
return -1;
if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
goto out;
dst = pdst;
src = psrc;
/* Use caller's multibyte conversion error flag. */
if (cerr_ptr)
cerr = *cerr_ptr;
/*
* Input loop.
* Handle up to mblength characters (not bytes). We do not
* stop at NULs because we may be processing a block of data
* that includes NULs.
*/
mbslength = (ssize_t)mblength;
/*
* When inputing a single character, must also read in the
* next character for nextc, the look-ahead character.
*/
if (mbslength == 1)
mbslength++;
while (mbslength > 0) {
/* Convert one multibyte character to wchar_t. */
if (!cerr)
clen = mbtowc(src, mbsrc, MB_LEN_MAX);
if (cerr || clen < 0) {
/* Conversion error, process as a byte instead. */
*src = (wint_t)(u_char)*mbsrc;
clen = 1;
cerr = 1;
}
if (clen == 0)
/*
* NUL in input gives 0 return value. process
* as single NUL byte and keep going.
*/
clen = 1;
/* Advance buffer character pointer. */
src++;
/* Advance input pointer by number of bytes read. */
mbsrc += clen;
/* Decrement input byte count. */
mbslength -= clen;
}
*dst = '\0';
return (int)(dst - start);
}
len = src - psrc;
src = psrc;
/*
* In the single character input case, we will have actually
* processed two characters, c and nextc. Reset len back to
* just a single character.
*/
if (mblength < len)
len = mblength;
int
strsvis(char *dst, const char *csrc, int flag, const char *extra)
{
return istrsnvis(dst, NULL, csrc, flag, extra);
}
int
strsnvis(char *dst, size_t dlen, const char *csrc, int flag, const char *extra)
{
return istrsnvis(dst, &dlen, csrc, flag, extra);
}
static int
istrsnvisx(char *dst, size_t *dlen, const char *csrc, size_t len, int flag,
const char *extra)
{
unsigned char c;
char *start;
char *nextra = NULL;
const unsigned char *src = (const unsigned char *)csrc;
visfun_t f;
_DIAGASSERT(dst != NULL);
_DIAGASSERT(src != NULL);
_DIAGASSERT(extra != NULL);
MAKEEXTRALIST(flag, nextra, extra);
if (! nextra) {
/* Convert extra argument to list of characters for this mode. */
extra = makeextralist(flags, mbextra);
if (!extra) {
if (dlen && *dlen == 0) {
errno = ENOSPC;
return -1;
goto out;
}
*dst = '\0'; /* can't create nextra, return "" */
return 0;
*mbdst = '\0'; /* can't create extra, return "" */
error = 0;
goto out;
}
f = getvisfun(flag);
/* Look up which processing function to call. */
f = getvisfun(flags);
/*
* Main processing loop.
* Call do_Xvis processing function one character at a time
* with next character available for look-ahead.
*/
for (start = dst; len > 0; len--) {
c = *src++;
dst = (*f)(dst, dlen, c, flag, len > 1 ? *src : '\0', nextra);
dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra);
if (dst == NULL) {
errno = ENOSPC;
return -1;
goto out;
}
}
free(nextra);
if (dlen && *dlen == 0) {
errno = ENOSPC;
return -1;
/* Terminate the string in the buffer. */
*dst = L'\0';
/*
* Output loop.
* Convert wchar_t string back to multibyte output string.
* If we have hit a multi-byte conversion error on input,
* output byte-by-byte here. Else use wctomb().
*/
len = wcslen(start);
maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1);
olen = 0;
for (dst = start; len > 0; len--) {
if (!cerr)
clen = wctomb(mbdst, *dst);
if (cerr || clen < 0) {
/*
* Conversion error, process as a byte(s) instead.
* Examine each byte and higher-order bytes for
* data. E.g.,
* 0x000000000000a264 -> a2 64
* 0x000000001f00a264 -> 1f 00 a2 64
*/
clen = 0;
wmsk = 0;
for (i = sizeof(wmsk) - 1; i >= 0; i--) {
shft = i * NBBY;
bmsk = (uint64_t)0xffLL << shft;
wmsk |= bmsk;
if ((*dst & wmsk) || i == 0)
mbdst[clen++] = (char)(
(uint64_t)(*dst & bmsk) >>
shft);
}
cerr = 1;
}
/* If this character would exceed our output limit, stop. */
if (olen + clen > (size_t)maxolen)
break;
/* Advance output pointer by number of bytes written. */
mbdst += clen;
/* Advance buffer character pointer. */
dst++;
/* Incrment output character count. */
olen += clen;
}
*dst = '\0';
return (int)(dst - start);
/* Terminate the output string. */
*mbdst = '\0';
/* Pass conversion error flag out. */
if (cerr_ptr)
*cerr_ptr = cerr;
free(extra);
free(pdst);
free(psrc);
return (int)olen;
out:
free(extra);
free(pdst);
free(psrc);
return error;
}
#endif
#if !HAVE_SVIS
/*
* The "svis" variants all take an "extra" arg that is a pointer
* to a NUL-terminated list of characters to be encoded, too.
* These functions are useful e. g. to encode strings in such a
* way so that they are not interpreted by a shell.
*/
char *
svis(char *mbdst, int c, int flags, int nextc, const char *mbextra)
{
char cc[2];
int ret;
cc[0] = c;
cc[1] = nextc;
ret = istrsenvisx(mbdst, NULL, cc, 1, flags, mbextra, NULL);
if (ret < 0)
return NULL;
return mbdst + ret;
}
char *
snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra)
{
char cc[2];
int ret;
cc[0] = c;
cc[1] = nextc;
ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, mbextra, NULL);
if (ret < 0)
return NULL;
return mbdst + ret;
}
int
strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra)
strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra)
{
return istrsnvisx(dst, NULL, csrc, len, flag, extra);
return istrsenvisx(mbdst, NULL, mbsrc, 0, flags, mbextra, NULL);
}
int
strsnvisx(char *dst, size_t dlen, const char *csrc, size_t len, int flag,
const char *extra)
strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra)
{
return istrsnvisx(dst, &dlen, csrc, len, flag, extra);
return istrsenvisx(mbdst, &dlen, mbsrc, 0, flags, mbextra, NULL);
}
int
strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra)
{
return istrsenvisx(mbdst, NULL, mbsrc, len, flags, mbextra, NULL);
}
int
strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
const char *mbextra)
{
return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, NULL);
}
int
strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
const char *mbextra, int *cerr_ptr)
{
return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr);
}
#endif
@ -464,120 +605,83 @@ strsnvisx(char *dst, size_t dlen, const char *csrc, size_t len, int flag,
/*
* vis - visually encode characters
*/
static char *
invis(char *dst, size_t *dlen, int c, int flag, int nextc)
char *
vis(char *mbdst, int c, int flags, int nextc)
{
char *extra = NULL;
unsigned char uc = (unsigned char)c;
visfun_t f;
char cc[2];
int ret;
_DIAGASSERT(dst != NULL);
cc[0] = c;
cc[1] = nextc;
MAKEEXTRALIST(flag, extra, "");
if (! extra) {
if (dlen && *dlen == 0) {
errno = ENOSPC;
return NULL;
}
*dst = '\0'; /* can't create extra, return "" */
return dst;
}
f = getvisfun(flag);
dst = (*f)(dst, dlen, uc, flag, nextc, extra);
free(extra);
if (dst == NULL || (dlen && *dlen == 0)) {
errno = ENOSPC;
ret = istrsenvisx(mbdst, NULL, cc, 1, flags, "", NULL);
if (ret < 0)
return NULL;
}
*dst = '\0';
return dst;
return mbdst + ret;
}
char *
vis(char *dst, int c, int flag, int nextc)
nvis(char *mbdst, size_t dlen, int c, int flags, int nextc)
{
return invis(dst, NULL, c, flag, nextc);
}
char cc[2];
int ret;
char *
nvis(char *dst, size_t dlen, int c, int flag, int nextc)
{
return invis(dst, &dlen, c, flag, nextc);
}
cc[0] = c;
cc[1] = nextc;
ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, "", NULL);
if (ret < 0)
return NULL;
return mbdst + ret;
}
/*
* strvis, strvisx - visually encode characters from src into dst
* strvis - visually encode characters from src into dst
*
* Dst must be 4 times the size of src to account for possible
* expansion. The length of dst, not including the trailing NULL,
* is returned.
*/
int
strvis(char *mbdst, const char *mbsrc, int flags)
{
return istrsenvisx(mbdst, NULL, mbsrc, 0, flags, "", NULL);
}
int
strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags)
{
return istrsenvisx(mbdst, &dlen, mbsrc, 0, flags, "", NULL);
}
/*
* strvisx - visually encode characters from src into dst
*
* Dst must be 4 times the size of src to account for possible
* expansion. The length of dst, not including the trailing NULL,
* is returned.
*
* Strvisx encodes exactly len bytes from src into dst.
* Strvisx encodes exactly len characters from src into dst.
* This is useful for encoding a block of data.
*/
static int
istrnvis(char *dst, size_t *dlen, const char *src, int flag)
{
char *extra = NULL;
int rv;
MAKEEXTRALIST(flag, extra, "");
if (!extra) {
if (dlen && *dlen == 0) {
errno = ENOSPC;
return -1;
}
*dst = '\0'; /* can't create extra, return "" */
return 0;
}
rv = istrsnvis(dst, dlen, src, flag, extra);
free(extra);
return rv;
int
strvisx(char *mbdst, const char *mbsrc, size_t len, int flags)
{
return istrsenvisx(mbdst, NULL, mbsrc, len, flags, "", NULL);
}
int
strvis(char *dst, const char *src, int flag)
strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags)
{
return istrnvis(dst, NULL, src, flag);
return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", NULL);
}
int
strnvis(char *dst, size_t dlen, const char *src, int flag)
strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
int *cerr_ptr)
{
return istrnvis(dst, &dlen, src, flag);
return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr);
}
static int
istrnvisx(char *dst, size_t *dlen, const char *src, size_t len, int flag)
{
char *extra = NULL;
int rv;
MAKEEXTRALIST(flag, extra, "");
if (!extra) {
if (dlen && *dlen == 0) {
errno = ENOSPC;
return -1;
}
*dst = '\0'; /* can't create extra, return "" */
return 0;
}
rv = istrsnvisx(dst, dlen, src, len, flag, extra);
free(extra);
return rv;
}
int
strvisx(char *dst, const char *src, size_t len, int flag)
{
return istrnvisx(dst, NULL, src, len, flag);
}
int
strnvisx(char *dst, size_t dlen, const char *src, size_t len, int flag)
{
return istrnvisx(dst, &dlen, src, len, flag);
}
#endif

@ -1,4 +1,4 @@
/* $NetBSD: vis.h,v 1.20 2012/12/14 21:36:59 christos Exp $ */
/* $NetBSD: vis.h,v 1.21 2013/02/20 17:01:15 christos Exp $ */
/* $FreeBSD$ */
/*-
@ -96,9 +96,12 @@ int strsnvis(char *, size_t, const char *, int, const char *);
int strvisx(char *, const char *, size_t, int);
int strnvisx(char *, size_t, const char *, size_t, int);
int strenvisx(char *, size_t, const char *, size_t, int, int *);
int strsvisx(char *, const char *, size_t, int, const char *);
int strsnvisx(char *, size_t, const char *, size_t, int, const char *);
int strsenvisx(char *, size_t, const char *, size_t , int, const char *,
int *);
int strunvis(char *, const char *);
int strnunvis(char *, size_t, const char *);

@ -491,6 +491,20 @@ MLINKS+=ttyname.3 isatty.3 \
MLINKS+=tzset.3 tzsetwall.3
MLINKS+=unvis.3 strunvis.3 \
unvis.3 strunvisx.3
MLINKS+=vis.3 strvis.3 \
vis.3 strvisx.3
MLINKS+=vis.3 nvis.3 \
vis.3 snvis.3 \
vis.3 strenvisx.3 \
vis.3 strnunvis.3 \
vis.3 strnunvisx.3 \
vis.3 strnvis.3 \
vis.3 strnvisx.3 \
vis.3 strsenvisx.3 \
vis.3 strsnvis.3 \
vis.3 strsnvisx.3 \
vis.3 strsvis.3 \
vis.3 strsvisx.3 \
vis.3 strvis.3 \
vis.3 strvisx.3 \
vis.3 svis.3
MLINKS+=wordexp.3 wordfree.3

@ -391,10 +391,12 @@ FBSD_1.3 {
pwcache_userdb;
pwcache_groupdb;
snvis;
strenvisx;
strnunvis;
strnunvisx;
strnvis;
strnvisx;
strsenvisx;
strsnvis;
strsnvisx;
strsvis;