diff --git a/usr.bin/Makefile b/usr.bin/Makefile
index cf5604c79a06..e8df8e013ecd 100644
--- a/usr.bin/Makefile
+++ b/usr.bin/Makefile
@@ -84,6 +84,7 @@ SUBDIR=	${_addr2line} \
 	lesskey \
 	limits \
 	locale \
+	localedef \
 	lock \
 	lockf \
 	logger \
diff --git a/usr.bin/localedef/Makefile b/usr.bin/localedef/Makefile
new file mode 100644
index 000000000000..93c0515c85c6
--- /dev/null
+++ b/usr.bin/localedef/Makefile
@@ -0,0 +1,30 @@
+# $FreeBSD$
+
+PROG=	localedef
+SRCS=	charmap.c \
+	collate.c \
+	ctype.c \
+	localedef.c \
+	messages.c \
+	monetary.c \
+	numeric.c \
+	parser.y \
+	scanner.c \
+	time.c \
+	wide.c
+
+WARNS=	3
+${SRCS:M*.c}: parser.h
+parser.h: parser.y
+LIBADD=	avl
+
+IGNORE_PRAGMA=	yes
+
+CFLAGS+=	-DNEED_SOLARIS_BOOLEAN
+CFLAGS+=	-I. -I${.CURDIR}
+CFLAGS+=	-I${.CURDIR}/../../lib/libc/locale
+CFLAGS+=	-I${.CURDIR}/../../lib/libc/stdtime
+CFLAGS+=	-I${.CURDIR}/../../sys/cddl/compat/opensolaris
+CFLAGS+=	-I${.CURDIR}/../../sys/cddl/contrib/opensolaris/uts/common
+
+.include <bsd.prog.mk>
diff --git a/usr.bin/localedef/README b/usr.bin/localedef/README
new file mode 100644
index 000000000000..4d97371941c8
--- /dev/null
+++ b/usr.bin/localedef/README
@@ -0,0 +1,11 @@
+While there are tools called "localedef" in Solaris and Linux, this
+tool does not share heritage with any other implementation.  It was
+written independently by Garrett D'Amore while employed at Nexenta
+Systems, and thus carries the Nexenta Copyright.
+
+It was initially released under the CDDL license, but on 4 July 2014,
+Nexenta reissued the source under the BSD 2-clause license.  This
+code is part of the Illumos project.
+
+see:
+https://github.com/Nexenta/illumos-nexenta/commit/cf17542a37fc83d0ae093777e30d480423858c29
diff --git a/usr.bin/localedef/charmap.c b/usr.bin/localedef/charmap.c
new file mode 100644
index 000000000000..203008b7cec4
--- /dev/null
+++ b/usr.bin/localedef/charmap.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * CHARMAP file handling for localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/avl.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <unistd.h>
+#include "localedef.h"
+#include "parser.h"
+
+static avl_tree_t	cmap_sym;
+static avl_tree_t	cmap_wc;
+
+typedef struct charmap {
+	const char *name;
+	wchar_t wc;
+	avl_node_t avl_sym;
+	avl_node_t avl_wc;
+} charmap_t;
+
+
+/*
+ * Array of POSIX specific portable characters.
+ */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdiscarded-qualifiers"
+
+static const struct {
+	char	*name;
+	int	ch;
+} portable_chars[] = {
+	{ "NUL",		'\0' },
+	{ "alert",		'\a' },
+	{ "backspace",		'\b' },
+	{ "tab",		'\t' },
+	{ "carriage-return",	'\r' },
+	{ "newline",		'\n' },
+	{ "vertical-tab",	'\v' },
+	{ "form-feed",		'\f' },
+	{ "space",		' ' },
+	{ "exclamation-mark",	'!' },
+	{ "quotation-mark",	'"' },
+	{ "number-sign",	'#' },
+	{ "dollar-sign",	'$' },
+	{ "percent-sign",	'%' },
+	{ "ampersand",		'&' },
+	{ "apostrophe",		'\'' },
+	{ "left-parenthesis",	'(' },
+	{ "right-parenthesis",	'(' },
+	{ "asterisk",		'*' },
+	{ "plus-sign",		'+' },
+	{ "comma",		 ','},
+	{ "hyphen-minus",	'-' },
+	{ "hyphen",		'-' },
+	{ "full-stop",		'.' },
+	{ "period",		'.' },
+	{ "slash",		'/' },
+	{ "solidus",		'/' },
+	{ "zero",		'0' },
+	{ "one",		'1' },
+	{ "two",		'2' },
+	{ "three",		'3' },
+	{ "four",		'4' },
+	{ "five",		'5' },
+	{ "six",		'6' },
+	{ "seven",		'7' },
+	{ "eight",		'8' },
+	{ "nine",		'9' },
+	{ "colon",		':' },
+	{ "semicolon",		';' },
+	{ "less-than-sign",	'<' },
+	{ "equals-sign",	'=' },
+	{ "greater-than-sign",	'>' },
+	{ "question-mark",	'?' },
+	{ "commercial-at",	'@' },
+	{ "left-square-bracket", '[' },
+	{ "backslash",		'\\' },
+	{ "reverse-solidus",	'\\' },
+	{ "right-square-bracket", ']' },
+	{ "circumflex",		'^' },
+	{ "circumflex-accent",	'^' },
+	{ "low-line",		'_' },
+	{ "underscore",		'_' },
+	{ "grave-accent",	'`' },
+	{ "left-brace",		'{' },
+	{ "left-curly-bracket",	'{' },
+	{ "vertical-line",	'|' },
+	{ "right-brace",	'}' },
+	{ "right-curly-bracket", '}' },
+	{ "tilde",		'~' },
+	{ "A", 'A' },
+	{ "B", 'B' },
+	{ "C", 'C' },
+	{ "D", 'D' },
+	{ "E", 'E' },
+	{ "F", 'F' },
+	{ "G", 'G' },
+	{ "H", 'H' },
+	{ "I", 'I' },
+	{ "J", 'J' },
+	{ "K", 'K' },
+	{ "L", 'L' },
+	{ "M", 'M' },
+	{ "N", 'N' },
+	{ "O", 'O' },
+	{ "P", 'P' },
+	{ "Q", 'Q' },
+	{ "R", 'R' },
+	{ "S", 'S' },
+	{ "T", 'T' },
+	{ "U", 'U' },
+	{ "V", 'V' },
+	{ "W", 'W' },
+	{ "X", 'X' },
+	{ "Y", 'Y' },
+	{ "Z", 'Z' },
+	{ "a", 'a' },
+	{ "b", 'b' },
+	{ "c", 'c' },
+	{ "d", 'd' },
+	{ "e", 'e' },
+	{ "f", 'f' },
+	{ "g", 'g' },
+	{ "h", 'h' },
+	{ "i", 'i' },
+	{ "j", 'j' },
+	{ "k", 'k' },
+	{ "l", 'l' },
+	{ "m", 'm' },
+	{ "n", 'n' },
+	{ "o", 'o' },
+	{ "p", 'p' },
+	{ "q", 'q' },
+	{ "r", 'r' },
+	{ "s", 's' },
+	{ "t", 't' },
+	{ "u", 'u' },
+	{ "v", 'v' },
+	{ "w", 'w' },
+	{ "x", 'x' },
+	{ "y", 'y' },
+	{ "z", 'z' },
+	{ NULL, 0 }
+};
+
+#pragma GCC diagnostic pop
+
+static int
+cmap_compare_sym(const void *n1, const void *n2)
+{
+	const charmap_t *c1 = n1;
+	const charmap_t *c2 = n2;
+	int rv;
+
+	rv = strcmp(c1->name, c2->name);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+static int
+cmap_compare_wc(const void *n1, const void *n2)
+{
+	const charmap_t *c1 = n1;
+	const charmap_t *c2 = n2;
+
+	return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0);
+}
+
+void
+init_charmap(void)
+{
+	avl_create(&cmap_sym, cmap_compare_sym, sizeof (charmap_t),
+	    offsetof(charmap_t, avl_sym));
+
+	avl_create(&cmap_wc, cmap_compare_wc, sizeof (charmap_t),
+	    offsetof(charmap_t, avl_wc));
+}
+
+static void
+add_charmap_impl(char *sym, wchar_t wc, int nodups)
+{
+	charmap_t	srch;
+	charmap_t	*n = NULL;
+	avl_index_t	where;
+
+	srch.wc = wc;
+	srch.name = sym;
+
+	/*
+	 * also possibly insert the wide mapping, although note that there
+	 * can only be one of these per wide character code.
+	 */
+	if ((wc != -1) && ((avl_find(&cmap_wc, &srch, &where)) == NULL)) {
+		if ((n = calloc(1, sizeof (*n))) == NULL) {
+			errf("out of memory");
+			return;
+		}
+		n->wc = wc;
+		avl_insert(&cmap_wc, n, where);
+	}
+
+	if (sym) {
+		if (avl_find(&cmap_sym, &srch, &where) != NULL) {
+			if (nodups) {
+				errf("duplicate character definition");
+			}
+			return;
+		}
+		if ((n == NULL) && ((n = calloc(1, sizeof (*n))) == NULL)) {
+			errf("out of memory");
+			return;
+		}
+		n->wc = wc;
+		n->name = sym;
+
+		avl_insert(&cmap_sym, n, where);
+	}
+}
+
+void
+add_charmap(char *sym, int c)
+{
+	add_charmap_impl(sym, c, 1);
+}
+
+void
+add_charmap_undefined(char *sym)
+{
+	charmap_t srch;
+	charmap_t *cm = NULL;
+
+	srch.name = sym;
+	cm = avl_find(&cmap_sym, &srch, NULL);
+
+	if ((undefok == 0) && ((cm == NULL) || (cm->wc == -1))) {
+		warn("undefined symbol <%s>", sym);
+		add_charmap_impl(sym, -1, 0);
+	} else {
+		free(sym);
+	}
+}
+
+void
+add_charmap_range(char *s, char *e, int wc)
+{
+	int	ls, le;
+	int	si;
+	int	sn, en;
+	int	i;
+
+	static const char *digits = "0123456789";
+
+	ls = strlen(s);
+	le = strlen(e);
+
+	if (((si = strcspn(s, digits)) == 0) || (si == ls) ||
+	    (strncmp(s, e, si) != 0) ||
+	    ((int)strspn(s + si, digits) != (ls - si)) ||
+	    ((int)strspn(e + si, digits) != (le - si)) ||
+	    ((sn = atoi(s + si)) > ((en = atoi(e + si))))) {
+		errf("malformed charmap range");
+		return;
+	}
+
+	s[si] = 0;
+
+	for (i = sn; i <= en; i++) {
+		char *nn;
+		(void) asprintf(&nn, "%s%0*u", s, ls - si, i);
+		if (nn == NULL) {
+			errf("out of memory");
+			return;
+		}
+
+		add_charmap_impl(nn, wc, 1);
+		wc++;
+	}
+	free(s);
+	free(e);
+}
+
+void
+add_charmap_char(char *name, int val)
+{
+	add_charmap_impl(name, val, 0);
+}
+
+/*
+ * POSIX insists that certain entries be present, even when not in the
+ * orginal charmap file.
+ */
+void
+add_charmap_posix(void)
+{
+	int	i;
+
+	for (i = 0; portable_chars[i].name; i++) {
+		add_charmap_char(portable_chars[i].name, portable_chars[i].ch);
+	}
+}
+
+int
+lookup_charmap(const char *sym, wchar_t *wc)
+{
+	charmap_t	srch;
+	charmap_t	*n;
+
+	srch.name = sym;
+	n = avl_find(&cmap_sym, &srch, NULL);
+	if (n && n->wc != -1) {
+		if (wc)
+			*wc = n->wc;
+		return (0);
+	}
+	return (-1);
+}
+
+int
+check_charmap(wchar_t wc)
+{
+	charmap_t srch;
+
+	srch.wc = wc;
+	return (avl_find(&cmap_wc, &srch, NULL) ? 0 : -1);
+}
diff --git a/usr.bin/localedef/collate.c b/usr.bin/localedef/collate.c
new file mode 100644
index 000000000000..5a73cbae9173
--- /dev/null
+++ b/usr.bin/localedef/collate.c
@@ -0,0 +1,1299 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * LC_COLLATE database generation routines for localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/avl.h>
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <limits.h>
+#include "localedef.h"
+#include "parser.h"
+#include "collate.h"
+
+/*
+ * Design notes.
+ *
+ * It will be extremely helpful to the reader if they have access to
+ * the localedef and locale file format specifications available.
+ * Latest versions of these are available from www.opengroup.org.
+ *
+ * The design for the collation code is a bit complex.  The goal is a
+ * single collation database as described in collate.h (in
+ * libc/port/locale).  However, there are some other tidbits:
+ *
+ * a) The substitution entries are now a directly indexable array.  A
+ * priority elsewhere in the table is taken as an index into the
+ * substitution table if it has a high bit (COLLATE_SUBST_PRIORITY)
+ * set.  (The bit is cleared and the result is the index into the
+ * table.
+ *
+ * b) We eliminate duplicate entries into the substitution table.
+ * This saves a lot of space.
+ *
+ * c) The priorities for each level are "compressed", so that each
+ * sorting level has consecutively numbered priorities starting at 1.
+ * (O is reserved for the ignore priority.)  This means sort levels
+ * which only have a few distinct priorities can represent the
+ * priority level in fewer bits, which makes the strxfrm output
+ * smaller.
+ *
+ * d) We record the total number of priorities so that strxfrm can
+ * figure out how many bytes to expand a numeric priority into.
+ *
+ * e) For the UNDEFINED pass (the last pass), we record the maximum
+ * number of bits needed to uniquely prioritize these entries, so that
+ * the last pass can also use smaller strxfrm output when possible.
+ *
+ * f) Priorities with the sign bit set are verboten.  This works out
+ * because no active character set needs that bit to carry significant
+ * information once the character is in wide form.
+ *
+ * To process the entire data to make the database, we actually run
+ * multiple passes over the data.
+ *
+ * The first pass, which is done at parse time, identifies elements,
+ * substitutions, and such, and records them in priority order.  As
+ * some priorities can refer to other priorities, using forward
+ * references, we use a table of references indicating whether the
+ * priority's value has been resolved, or whether it is still a
+ * reference.
+ *
+ * The second pass walks over all the items in priority order, noting
+ * that they are used directly, and not just an indirect reference.
+ * This is done by creating a "weight" structure for the item.  The
+ * weights are stashed in an AVL tree sorted by relative "priority".
+ *
+ * The third pass walks over all the weight structures, in priority
+ * order, and assigns a new monotonically increasing (per sort level)
+ * weight value to them.  These are the values that will actually be
+ * written to the file.
+ *
+ * The fourth pass just writes the data out.
+ */
+
+/*
+ * In order to resolve the priorities, we create a table of priorities.
+ * Entries in the table can be in one of three states.
+ *
+ * UNKNOWN is for newly allocated entries, and indicates that nothing
+ * is known about the priority.  (For example, when new entries are created
+ * for collating-symbols, this is the value assigned for them until the
+ * collating symbol's order has been determined.
+ *
+ * RESOLVED is used for an entry where the priority indicates the final
+ * numeric weight.
+ *
+ * REFER is used for entries that reference other entries.  Typically
+ * this is used for forward references.  A collating-symbol can never
+ * have this value.
+ *
+ * The "pass" field is used during final resolution to aid in detection
+ * of referencing loops.  (For example <A> depends on <B>, but <B> has its
+ * priority dependent on <A>.)
+ */
+typedef enum {
+	UNKNOWN,	/* priority is totally unknown */
+	RESOLVED,	/* priority value fully resolved */
+	REFER		/* priority is a reference (index) */
+} res_t;
+
+typedef struct weight {
+	int32_t		pri;
+	int		opt;
+	avl_node_t	avl;
+} weight_t;
+
+typedef struct priority {
+	res_t		res;
+	int32_t		pri;
+	int		pass;
+	int		lineno;
+} collpri_t;
+
+#define	NUM_WT	collinfo.directive_count
+
+/*
+ * These are the abstract collating symbols, which are just a symbolic
+ * way to reference a priority.
+ */
+struct collsym {
+	char		*name;
+	int32_t		ref;
+	avl_node_t	avl;
+};
+
+/*
+ * These are also abstract collating symbols, but we allow them to have
+ * different priorities at different levels.
+ */
+typedef struct collundef {
+	char		*name;
+	int32_t		ref[COLL_WEIGHTS_MAX];
+	avl_node_t	avl;
+} collundef_t;
+
+/*
+ * These are called "chains" in libc.  This records the fact that two
+ * more characters should be treated as a single collating entity when
+ * they appear together.  For example, in Spanish <C><h> gets collated
+ * as a character between <C> and <D>.
+ */
+struct collelem {
+	char		*symbol;
+	wchar_t		*expand;
+	int32_t		ref[COLL_WEIGHTS_MAX];
+	avl_node_t	avl_bysymbol;
+	avl_node_t	avl_byexpand;
+};
+
+/*
+ * Individual characters have a sequence of weights as well.
+ */
+typedef struct collchar {
+	wchar_t		wc;
+	int32_t		ref[COLL_WEIGHTS_MAX];
+	avl_node_t	avl;
+} collchar_t;
+
+/*
+ * Substitution entries.  The key is itself a priority.  Note that
+ * when we create one of these, we *automatically* wind up with a
+ * fully resolved priority for the key, because creation of
+ * substitutions creates a resolved priority at the same time.
+ */
+typedef struct {
+	int32_t		key;
+	int32_t		ref[COLLATE_STR_LEN];
+	avl_node_t	avl;
+	avl_node_t	avl_ref;
+} subst_t;
+
+static avl_tree_t	collsyms;
+static avl_tree_t	collundefs;
+static avl_tree_t	elem_by_symbol;
+static avl_tree_t	elem_by_expand;
+static avl_tree_t	collchars;
+static avl_tree_t	substs[COLL_WEIGHTS_MAX];
+static avl_tree_t	substs_ref[COLL_WEIGHTS_MAX];
+static avl_tree_t	weights[COLL_WEIGHTS_MAX];
+static int32_t		nweight[COLL_WEIGHTS_MAX];
+
+/*
+ * This is state tracking for the ellipsis token.  Note that we start
+ * the initial values so that the ellipsis logic will think we got a
+ * magic starting value of NUL.  It starts at minus one because the
+ * starting point is exclusive -- i.e. the starting point is not
+ * itself handled by the ellipsis code.
+ */
+static int currorder = EOF;
+static int lastorder = EOF;
+static collelem_t *currelem;
+static collchar_t *currchar;
+static collundef_t *currundef;
+static wchar_t ellipsis_start = 0;
+static int32_t ellipsis_weights[COLL_WEIGHTS_MAX];
+
+/*
+ * We keep a running tally of weights.
+ */
+static int nextpri = 1;
+static int nextsubst[COLL_WEIGHTS_MAX] = { 0 };
+
+/*
+ * This array collects up the weights for each level.
+ */
+static int32_t order_weights[COLL_WEIGHTS_MAX];
+static int curr_weight = 0;
+static int32_t subst_weights[COLLATE_STR_LEN];
+static int curr_subst = 0;
+
+/*
+ * Some initial priority values.
+ */
+static int32_t pri_undefined[COLL_WEIGHTS_MAX];
+static int32_t pri_ignore;
+
+static collate_info_t collinfo;
+
+static collpri_t	*prilist = NULL;
+static int		numpri = 0;
+static int		maxpri = 0;
+
+static void start_order(int);
+
+static int32_t
+new_pri(void)
+{
+	int i;
+
+	if (numpri >= maxpri) {
+		maxpri = maxpri ? maxpri * 2 : 1024;
+		prilist = realloc(prilist, sizeof (collpri_t) * maxpri);
+		if (prilist == NULL) {
+			fprintf(stderr,"out of memory");
+			return (-1);
+		}
+		for (i = numpri; i < maxpri; i++) {
+			prilist[i].res = UNKNOWN;
+			prilist[i].pri = 0;
+			prilist[i].pass = 0;
+		}
+	}
+	return (numpri++);
+}
+
+static collpri_t *
+get_pri(int32_t ref)
+{
+	if ((ref < 0) || (ref > numpri)) {
+		INTERR;
+		return (NULL);
+	}
+	return (&prilist[ref]);
+}
+
+static void
+set_pri(int32_t ref, int32_t v, res_t res)
+{
+	collpri_t	*pri;
+
+	pri = get_pri(ref);
+
+	if ((res == REFER) && ((v < 0) || (v >= numpri))) {
+		INTERR;
+	}
+
+	/* Resolve self references */
+	if ((res == REFER) && (ref == v)) {
+		v = nextpri;
+		res = RESOLVED;
+	}
+
+	if (pri->res != UNKNOWN) {
+		warn("repeated item in order list (first on %d)",
+		    pri->lineno);
+		return;
+	}
+	pri->lineno = lineno;
+	pri->pri = v;
+	pri->res = res;
+}
+
+static int32_t
+resolve_pri(int32_t ref)
+{
+	collpri_t	*pri;
+	static int32_t	pass = 0;
+
+	pri = get_pri(ref);
+	pass++;
+	while (pri->res == REFER) {
+		if (pri->pass == pass) {
+			/* report a line with the circular symbol */
+			lineno = pri->lineno;
+			fprintf(stderr,"circular reference in order list");
+			return (-1);
+		}
+		if ((pri->pri < 0) || (pri->pri >= numpri)) {
+			INTERR;
+			return (-1);
+		}
+		pri->pass = pass;
+		pri = &prilist[pri->pri];
+	}
+
+	if (pri->res == UNKNOWN) {
+		return (-1);
+	}
+	if (pri->res != RESOLVED)
+		INTERR;
+
+	return (pri->pri);
+}
+
+static int
+weight_compare(const void *n1, const void *n2)
+{
+	int32_t	k1 = ((const weight_t *)n1)->pri;
+	int32_t	k2 = ((const weight_t *)n2)->pri;
+
+	return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0);
+}
+
+static int
+collsym_compare(const void *n1, const void *n2)
+{
+	const collsym_t *c1 = n1;
+	const collsym_t *c2 = n2;
+	int rv;
+
+	rv = strcmp(c1->name, c2->name);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+static int
+collundef_compare(const void *n1, const void *n2)
+{
+	const collundef_t *c1 = n1;
+	const collundef_t *c2 = n2;
+	int rv;
+
+	rv = strcmp(c1->name, c2->name);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+static int
+element_compare_symbol(const void *n1, const void *n2)
+{
+	const collelem_t *c1 = n1;
+	const collelem_t *c2 = n2;
+	int rv;
+
+	rv = strcmp(c1->symbol, c2->symbol);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+static int
+element_compare_expand(const void *n1, const void *n2)
+{
+	const collelem_t *c1 = n1;
+	const collelem_t *c2 = n2;
+	int rv;
+
+	rv = wcscmp(c1->expand, c2->expand);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+static int
+collchar_compare(const void *n1, const void *n2)
+{
+	wchar_t	k1 = ((const collchar_t *)n1)->wc;
+	wchar_t	k2 = ((const collchar_t *)n2)->wc;
+
+	return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0);
+}
+
+static int
+subst_compare(const void *n1, const void *n2)
+{
+	int32_t	k1 = ((const subst_t *)n1)->key;
+	int32_t	k2 = ((const subst_t *)n2)->key;
+
+	return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0);
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+static int
+subst_compare_ref(const void *n1, const void *n2)
+{
+	int32_t *c1 = ((subst_t *)n1)->ref;
+	int32_t *c2 = ((subst_t *)n2)->ref;
+	int rv;
+
+	rv = wcscmp((wchar_t *)c1, (wchar_t *)c2);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+#pragma GCC diagnostic pop
+
+void
+init_collate(void)
+{
+	int i;
+
+	avl_create(&collsyms, collsym_compare, sizeof (collsym_t),
+	    offsetof(collsym_t, avl));
+
+	avl_create(&collundefs, collundef_compare, sizeof (collsym_t),
+	    offsetof(collundef_t, avl));
+
+	avl_create(&elem_by_symbol, element_compare_symbol, sizeof (collelem_t),
+	    offsetof(collelem_t, avl_bysymbol));
+	avl_create(&elem_by_expand, element_compare_expand, sizeof (collelem_t),
+	    offsetof(collelem_t, avl_byexpand));
+
+	avl_create(&collchars, collchar_compare, sizeof (collchar_t),
+	    offsetof(collchar_t, avl));
+
+	for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
+		avl_create(&substs[i], subst_compare, sizeof (subst_t),
+		    offsetof(subst_t, avl));
+		avl_create(&substs_ref[i], subst_compare_ref,
+		    sizeof (subst_t), offsetof(subst_t, avl_ref));
+		avl_create(&weights[i], weight_compare, sizeof (weight_t),
+		    offsetof(weight_t, avl));
+		nweight[i] = 1;
+	}
+
+	(void) memset(&collinfo, 0, sizeof (collinfo));
+
+	/* allocate some initial priorities */
+	pri_ignore = new_pri();
+
+	set_pri(pri_ignore, 0, RESOLVED);
+
+	for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
+		pri_undefined[i] = new_pri();
+
+		/* we will override this later */
+		set_pri(pri_undefined[i], COLLATE_MAX_PRIORITY, UNKNOWN);
+	}
+}
+
+void
+define_collsym(char *name)
+{
+	collsym_t	*sym;
+	avl_index_t	where;
+
+	if ((sym = calloc(sizeof (*sym), 1)) == NULL) {
+		fprintf(stderr,"out of memory");
+		return;
+	}
+	sym->name = name;
+	sym->ref = new_pri();
+
+	if (avl_find(&collsyms, sym, &where) != NULL) {
+		/*
+		 * This should never happen because we are only called
+		 * for undefined symbols.
+		 */
+		INTERR;
+		return;
+	}
+	avl_insert(&collsyms, sym, where);
+}
+
+collsym_t *
+lookup_collsym(char *name)
+{
+	collsym_t	srch;
+
+	srch.name = name;
+	return (avl_find(&collsyms, &srch, NULL));
+}
+
+collelem_t *
+lookup_collelem(char *symbol)
+{
+	collelem_t	srch;
+
+	srch.symbol = symbol;
+	return (avl_find(&elem_by_symbol, &srch, NULL));
+}
+
+static collundef_t *
+get_collundef(char *name)
+{
+	collundef_t	srch;
+	collundef_t	*ud;
+	avl_index_t	where;
+	int		i;
+
+	srch.name = name;
+	if ((ud = avl_find(&collundefs, &srch, &where)) == NULL) {
+		if (((ud = calloc(sizeof (*ud), 1)) == NULL) ||
+		    ((ud->name = strdup(name)) == NULL)) {
+			fprintf(stderr,"out of memory");
+			return (NULL);
+		}
+		for (i = 0; i < NUM_WT; i++) {
+			ud->ref[i] = new_pri();
+		}
+		avl_insert(&collundefs, ud, where);
+	}
+	add_charmap_undefined(name);
+	return (ud);
+}
+
+static collchar_t *
+get_collchar(wchar_t wc, int create)
+{
+	collchar_t	srch;
+	collchar_t	*cc;
+	avl_index_t	where;
+	int		i;
+
+	srch.wc = wc;
+	cc = avl_find(&collchars, &srch, &where);
+	if ((cc == NULL) && create) {
+		if ((cc = calloc(sizeof (*cc), 1)) == NULL) {
+			fprintf(stderr, "out of memory");
+			return (NULL);
+		}
+		for (i = 0; i < NUM_WT; i++) {
+			cc->ref[i] = new_pri();
+		}
+		cc->wc = wc;
+		avl_insert(&collchars, cc, where);
+	}
+	return (cc);
+}
+
+void
+end_order_collsym(collsym_t *sym)
+{
+	start_order(T_COLLSYM);
+	/* update the weight */
+
+	set_pri(sym->ref, nextpri, RESOLVED);
+	nextpri++;
+}
+
+void
+end_order(void)
+{
+	int		i;
+	int32_t		pri;
+	int32_t		ref;
+	collpri_t	*p;
+
+	/* advance the priority/weight */
+	pri = nextpri;
+
+	switch (currorder) {
+	case T_CHAR:
+		for (i = 0; i < NUM_WT; i++) {
+			if (((ref = order_weights[i]) < 0) ||
+			    ((p = get_pri(ref)) == NULL) ||
+			    (p->pri == -1)) {
+				/* unspecified weight is a self reference */
+				set_pri(currchar->ref[i], pri, RESOLVED);
+			} else {
+				set_pri(currchar->ref[i], ref, REFER);
+			}
+			order_weights[i] = -1;
+		}
+
+		/* leave a cookie trail in case next symbol is ellipsis */
+		ellipsis_start = currchar->wc + 1;
+		currchar = NULL;
+		break;
+
+	case T_ELLIPSIS:
+		/* save off the weights were we can find them */
+		for (i = 0; i < NUM_WT; i++) {
+			ellipsis_weights[i] = order_weights[i];
+			order_weights[i] = -1;
+		}
+		break;
+
+	case T_COLLELEM:
+		if (currelem == NULL) {
+			INTERR;
+		} else {
+			for (i = 0; i < NUM_WT; i++) {
+
+				if (((ref = order_weights[i]) < 0) ||
+				    ((p = get_pri(ref)) == NULL) ||
+				    (p->pri == -1)) {
+					set_pri(currelem->ref[i], pri,
+					    RESOLVED);
+				} else {
+					set_pri(currelem->ref[i], ref, REFER);
+				}
+				order_weights[i] = -1;
+			}
+		}
+		break;
+
+	case T_UNDEFINED:
+		for (i = 0; i < NUM_WT; i++) {
+			if (((ref = order_weights[i]) < 0) ||
+			    ((p = get_pri(ref)) == NULL) ||
+			    (p->pri == -1)) {
+				set_pri(pri_undefined[i], -1, RESOLVED);
+			} else {
+				set_pri(pri_undefined[i], ref, REFER);
+			}
+			order_weights[i] = -1;
+		}
+		break;
+
+	case T_SYMBOL:
+		for (i = 0; i < NUM_WT; i++) {
+			if (((ref = order_weights[i]) < 0) ||
+			    ((p = get_pri(ref)) == NULL) ||
+			    (p->pri == -1)) {
+				set_pri(currundef->ref[i], pri, RESOLVED);
+			} else {
+				set_pri(currundef->ref[i], ref, REFER);
+			}
+			order_weights[i] = -1;
+		}
+		break;
+
+	default:
+		INTERR;
+	}
+
+	nextpri++;
+}
+
+static void
+start_order(int type)
+{
+	int	i;
+
+	lastorder = currorder;
+	currorder = type;
+
+	/* this is used to protect ELLIPSIS processing */
+	if ((lastorder == T_ELLIPSIS) && (type != T_CHAR)) {
+		fprintf(stderr, "character value expected");
+	}
+
+	for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
+		order_weights[i] = -1;
+	}
+	curr_weight = 0;
+}
+
+void
+start_order_undefined(void)
+{
+	start_order(T_UNDEFINED);
+}
+
+void
+start_order_symbol(char *name)
+{
+	currundef = get_collundef(name);
+	start_order(T_SYMBOL);
+}
+
+void
+start_order_char(wchar_t wc)
+{
+	collchar_t	*cc;
+	int32_t		ref;
+
+	start_order(T_CHAR);
+
+	/*
+	 * If we last saw an ellipsis, then we need to close the range.
+	 * Handle that here.  Note that we have to be careful because the
+	 * items *inside* the range are treated exclusiveley to the items
+	 * outside of the range.  The ends of the range can have quite
+	 * different weights than the range members.
+	 */
+	if (lastorder == T_ELLIPSIS) {
+		int		i;
+
+		if (wc < ellipsis_start) {
+			fprintf(stderr, "malformed range!");
+			return;
+		}
+		while (ellipsis_start < wc) {
+			/*
+			 * pick all of the saved weights for the
+			 * ellipsis.  note that -1 encodes for the
+			 * ellipsis itself, which means to take the
+			 * current relative priority.
+			 */
+			if ((cc = get_collchar(ellipsis_start, 1)) == NULL) {
+				INTERR;
+				return;
+			}
+			for (i = 0; i < NUM_WT; i++) {
+				collpri_t *p;
+				if (((ref = ellipsis_weights[i]) == -1) ||
+				    ((p = get_pri(ref)) == NULL) ||
+				    (p->pri == -1)) {
+					set_pri(cc->ref[i], nextpri, RESOLVED);
+				} else {
+					set_pri(cc->ref[i], ref, REFER);
+				}
+				ellipsis_weights[i] = 0;
+			}
+			ellipsis_start++;
+			nextpri++;
+		}
+	}
+
+	currchar = get_collchar(wc, 1);
+}
+
+void
+start_order_collelem(collelem_t *e)
+{
+	start_order(T_COLLELEM);
+	currelem = e;
+}
+
+void
+start_order_ellipsis(void)
+{
+	int	i;
+
+	start_order(T_ELLIPSIS);
+
+	if (lastorder != T_CHAR) {
+		fprintf(stderr, "illegal starting point for range");
+		return;
+	}
+
+	for (i = 0; i < NUM_WT; i++) {
+		ellipsis_weights[i] = order_weights[i];
+	}
+}
+
+void
+define_collelem(char *name, wchar_t *wcs)
+{
+	collelem_t	*e;
+	avl_index_t	where1;
+	avl_index_t	where2;
+	int		i;
+
+	if (wcslen(wcs) >= COLLATE_STR_LEN) {
+		fprintf(stderr,"expanded collation element too long");
+		return;
+	}
+
+	if ((e = calloc(sizeof (*e), 1)) == NULL) {
+		fprintf(stderr, "out of memory");
+		return;
+	}
+	e->expand = wcs;
+	e->symbol = name;
+
+	/*
+	 * This is executed before the order statement, so we don't
+	 * know how many priorities we *really* need.  We allocate one
+	 * for each possible weight.  Not a big deal, as collating-elements
+	 * prove to be quite rare.
+	 */
+	for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
+		e->ref[i] = new_pri();
+	}
+
+	/* A character sequence can only reduce to one element. */
+	if ((avl_find(&elem_by_symbol, e, &where1) != NULL) ||
+	    (avl_find(&elem_by_expand, e, &where2) != NULL)) {
+		fprintf(stderr, "duplicate collating element definition");
+		return;
+	}
+	avl_insert(&elem_by_symbol, e, where1);
+	avl_insert(&elem_by_expand, e, where2);
+}
+
+void
+add_order_bit(int kw)
+{
+	uint8_t bit = DIRECTIVE_UNDEF;
+
+	switch (kw) {
+	case T_FORWARD:
+		bit = DIRECTIVE_FORWARD;
+		break;
+	case T_BACKWARD:
+		bit = DIRECTIVE_BACKWARD;
+		break;
+	case T_POSITION:
+		bit = DIRECTIVE_POSITION;
+		break;
+	default:
+		INTERR;
+		break;
+	}
+	collinfo.directive[collinfo.directive_count] |= bit;
+}
+
+void
+add_order_directive(void)
+{
+	if (collinfo.directive_count >= COLL_WEIGHTS_MAX) {
+		fprintf(stderr,"too many directives (max %d)", COLL_WEIGHTS_MAX);
+	}
+	collinfo.directive_count++;
+}
+
+static void
+add_order_pri(int32_t ref)
+{
+	if (curr_weight >= NUM_WT) {
+		fprintf(stderr,"too many weights (max %d)", NUM_WT);
+		return;
+	}
+	order_weights[curr_weight] = ref;
+	curr_weight++;
+}
+
+void
+add_order_collsym(collsym_t *s)
+{
+	add_order_pri(s->ref);
+}
+
+void
+add_order_char(wchar_t wc)
+{
+	collchar_t *cc;
+
+	if ((cc = get_collchar(wc, 1)) == NULL) {
+		INTERR;
+		return;
+	}
+
+	add_order_pri(cc->ref[curr_weight]);
+}
+
+void
+add_order_collelem(collelem_t *e)
+{
+	add_order_pri(e->ref[curr_weight]);
+}
+
+void
+add_order_ignore(void)
+{
+	add_order_pri(pri_ignore);
+}
+
+void
+add_order_symbol(char *sym)
+{
+	collundef_t *c;
+	if ((c = get_collundef(sym)) == NULL) {
+		INTERR;
+		return;
+	}
+	add_order_pri(c->ref[curr_weight]);
+}
+
+void
+add_order_ellipsis(void)
+{
+	/* special NULL value indicates self reference */
+	add_order_pri(0);
+}
+
+void
+add_order_subst(void)
+{
+	subst_t srch;
+	subst_t	*s;
+	avl_index_t where;
+	int i;
+
+	(void) memset(&srch, 0, sizeof (srch));
+	for (i = 0; i < curr_subst; i++) {
+		srch.ref[i] = subst_weights[i];
+		subst_weights[i] = 0;
+	}
+	s = avl_find(&substs_ref[curr_weight], &srch, &where);
+
+	if (s == NULL) {
+		if ((s = calloc(sizeof (*s), 1)) == NULL) {
+			fprintf(stderr,"out of memory");
+			return;
+		}
+		s->key = new_pri();
+
+		/*
+		 * We use a self reference for our key, but we set a
+		 * high bit to indicate that this is a substitution
+		 * reference.  This will expedite table lookups later,
+		 * and prevent table lookups for situations that don't
+		 * require it.  (In short, its a big win, because we
+		 * can skip a lot of binary searching.)
+		 */
+		set_pri(s->key,
+		    (nextsubst[curr_weight] | COLLATE_SUBST_PRIORITY),
+		    RESOLVED);
+		nextsubst[curr_weight] += 1;
+
+		for (i = 0; i < curr_subst; i++) {
+			s->ref[i] = srch.ref[i];
+		}
+
+		avl_insert(&substs_ref[curr_weight], s, where);
+
+		if (avl_find(&substs[curr_weight], s, &where) != NULL) {
+			INTERR;
+			return;
+		}
+		avl_insert(&substs[curr_weight], s, where);
+	}
+	curr_subst = 0;
+
+
+	/*
+	 * We are using the current (unique) priority as a search key
+	 * in the substitution table.
+	 */
+	add_order_pri(s->key);
+}
+
+static void
+add_subst_pri(int32_t ref)
+{
+	if (curr_subst >= COLLATE_STR_LEN) {
+		fprintf(stderr,"substitution string is too long");
+		return;
+	}
+	subst_weights[curr_subst] = ref;
+	curr_subst++;
+}
+
+void
+add_subst_char(wchar_t wc)
+{
+	collchar_t *cc;
+
+
+	if (((cc = get_collchar(wc, 1)) == NULL) ||
+	    (cc->wc != wc)) {
+		INTERR;
+		return;
+	}
+	/* we take the weight for the character at that position */
+	add_subst_pri(cc->ref[curr_weight]);
+}
+
+void
+add_subst_collelem(collelem_t *e)
+{
+	add_subst_pri(e->ref[curr_weight]);
+}
+
+void
+add_subst_collsym(collsym_t *s)
+{
+	add_subst_pri(s->ref);
+}
+
+void
+add_subst_symbol(char *ptr)
+{
+	collundef_t *cu;
+
+	if ((cu = get_collundef(ptr)) != NULL) {
+		add_subst_pri(cu->ref[curr_weight]);
+	}
+}
+
+void
+add_weight(int32_t ref, int pass)
+{
+	weight_t srch;
+	weight_t *w;
+	avl_index_t where;
+
+	srch.pri = resolve_pri(ref);
+
+	/* No translation of ignores */
+	if (srch.pri == 0)
+		return;
+
+	/* Substitution priorities are not weights */
+	if (srch.pri & COLLATE_SUBST_PRIORITY)
+		return;
+
+	if (avl_find(&weights[pass], &srch, &where) != NULL)
+		return;
+
+	if ((w = calloc(sizeof (*w), 1)) == NULL) {
+		fprintf(stderr, "out of memory");
+		return;
+	}
+	w->pri = srch.pri;
+	avl_insert(&weights[pass], w, where);
+}
+
+void
+add_weights(int32_t *refs)
+{
+	int i;
+	for (i = 0; i < NUM_WT; i++) {
+		add_weight(refs[i], i);
+	}
+}
+
+int32_t
+get_weight(int32_t ref, int pass)
+{
+	weight_t	srch;
+	weight_t	*w;
+	int32_t		pri;
+
+	pri = resolve_pri(ref);
+	if (pri & COLLATE_SUBST_PRIORITY) {
+		return (pri);
+	}
+	if (pri <= 0) {
+		return (pri);
+	}
+	srch.pri = pri;
+	if ((w = avl_find(&weights[pass], &srch, NULL)) == NULL) {
+		INTERR;
+		return (-1);
+	}
+	return (w->opt);
+}
+
+wchar_t *
+wsncpy(wchar_t *s1, const wchar_t *s2, size_t n)
+{
+	wchar_t *os1 = s1;
+
+	n++;
+	while (--n > 0 && (*s1++ = *s2++) != 0)
+		continue;
+	if (n > 0)
+		while (--n > 0)
+			*s1++ = 0;
+	return (os1);
+}
+
+void
+dump_collate(void)
+{
+	FILE			*f;
+	int			i, j, n;
+	size_t			sz;
+	int32_t			pri;
+	collelem_t		*ce;
+	collchar_t		*cc;
+	subst_t			*sb;
+	char			vers[COLLATE_STR_LEN];
+	collate_char_t		chars[UCHAR_MAX + 1];
+	collate_large_t		*large;
+	collate_subst_t		*subst[COLL_WEIGHTS_MAX];
+	collate_chain_t		*chain;
+
+	/*
+	 * We have to run throught a preliminary pass to identify all the
+	 * weights that we use for each sorting level.
+	 */
+	for (i = 0; i < NUM_WT; i++) {
+		add_weight(pri_ignore, i);
+	}
+	for (i = 0; i < NUM_WT; i++) {
+		for (sb = avl_first(&substs[i]); sb;
+		    sb = AVL_NEXT(&substs[i], sb)) {
+			for (j = 0; sb->ref[j]; j++) {
+				add_weight(sb->ref[j], i);
+			}
+		}
+	}
+	for (ce = avl_first(&elem_by_expand);
+	    ce != NULL;
+	    ce = AVL_NEXT(&elem_by_expand, ce)) {
+		add_weights(ce->ref);
+	}
+	for (cc = avl_first(&collchars); cc; cc = AVL_NEXT(&collchars, cc)) {
+		add_weights(cc->ref);
+	}
+
+	/*
+	 * Now we walk the entire set of weights, removing the gaps
+	 * in the weights.  This gives us optimum usage.  The walk
+	 * occurs in priority.
+	 */
+	for (i = 0; i < NUM_WT; i++) {
+		weight_t *w;
+		for (w = avl_first(&weights[i]); w;
+		    w = AVL_NEXT(&weights[i], w)) {
+			w->opt = nweight[i];
+			nweight[i] += 1;
+		}
+	}
+
+	(void) memset(&chars, 0, sizeof (chars));
+	(void) memset(vers, 0, COLLATE_STR_LEN);
+	(void) strlcpy(vers, COLLATE_VERSION, sizeof (vers));
+
+	/*
+	 * We need to make sure we arrange for the UNDEFINED field
+	 * to show up.  Also, set the total weight counts.
+	 */
+	for (i = 0; i < NUM_WT; i++) {
+		if (resolve_pri(pri_undefined[i]) == -1) {
+			set_pri(pri_undefined[i], -1, RESOLVED);
+			/* they collate at the end of everything else */
+			collinfo.undef_pri[i] = COLLATE_MAX_PRIORITY;
+		}
+		collinfo.pri_count[i] = nweight[i];
+	}
+
+	collinfo.pri_count[NUM_WT] = max_wide();
+	collinfo.undef_pri[NUM_WT] = COLLATE_MAX_PRIORITY;
+	collinfo.directive[NUM_WT] = DIRECTIVE_UNDEFINED;
+
+	/*
+	 * Ordinary character priorities
+	 */
+	for (i = 0; i <= UCHAR_MAX; i++) {
+		if ((cc = get_collchar(i, 0)) != NULL) {
+			for (j = 0; j < NUM_WT; j++) {
+				chars[i].pri[j] = get_weight(cc->ref[j], j);
+			}
+		} else {
+			for (j = 0; j < NUM_WT; j++) {
+				chars[i].pri[j] =
+				    get_weight(pri_undefined[j], j);
+			}
+			/*
+			 * Per POSIX, for undefined characters, we
+			 * also have to add a last item, which is the
+			 * character code.
+			 */
+			chars[i].pri[NUM_WT] = i;
+		}
+	}
+
+	/*
+	 * Substitution tables
+	 */
+	for (i = 0; i < NUM_WT; i++) {
+		collate_subst_t *st = NULL;
+		n = collinfo.subst_count[i] = avl_numnodes(&substs[i]);
+		if ((st = calloc(sizeof (collate_subst_t) * n, 1)) == NULL) {
+			fprintf(stderr, "out of memory");
+			return;
+		}
+		n = 0;
+		for (sb = avl_first(&substs[i]); sb;
+		    sb = AVL_NEXT(&substs[i], sb)) {
+			if ((st[n].key = resolve_pri(sb->key)) < 0) {
+				/* by definition these resolve! */
+				INTERR;
+			}
+			if (st[n].key != (n | COLLATE_SUBST_PRIORITY)) {
+				INTERR;
+			}
+			for (j = 0; sb->ref[j]; j++) {
+				st[n].pri[j] = get_weight(sb->ref[j], i);
+			}
+			n++;
+		}
+		if (n != collinfo.subst_count[i])
+			INTERR;
+		subst[i] = st;
+	}
+
+
+	/*
+	 * Chains, i.e. collating elements
+	 */
+	collinfo.chain_count = avl_numnodes(&elem_by_expand);
+	chain = calloc(sizeof (collate_chain_t), collinfo.chain_count);
+	if (chain == NULL) {
+		fprintf(stderr, "out of memory");
+		return;
+	}
+	for (n = 0, ce = avl_first(&elem_by_expand);
+	    ce != NULL;
+	    ce = AVL_NEXT(&elem_by_expand, ce), n++) {
+		(void) wsncpy(chain[n].str, ce->expand, COLLATE_STR_LEN);
+		for (i = 0; i < NUM_WT; i++) {
+			chain[n].pri[i] = get_weight(ce->ref[i], i);
+		}
+	}
+	if (n != collinfo.chain_count)
+		INTERR;
+
+	/*
+	 * Large (> UCHAR_MAX) character priorities
+	 */
+	large = calloc(sizeof (collate_large_t) * avl_numnodes(&collchars), 1);
+	if (large == NULL) {
+		fprintf(stderr, "out of memory");
+		return;
+	}
+
+	i = 0;
+	for (cc = avl_first(&collchars); cc; cc = AVL_NEXT(&collchars, cc)) {
+		int	undef = 0;
+		/* we already gathered those */
+		if (cc->wc <= UCHAR_MAX)
+			continue;
+		for (j = 0; j < NUM_WT; j++) {
+			if ((pri = get_weight(cc->ref[j], j)) < 0) {
+				undef = 1;
+			}
+			if (undef && (pri >= 0)) {
+				/* if undefined, then all priorities are */
+				INTERR;
+			} else {
+				large[i].pri.pri[j] = pri;
+			}
+		}
+		if (!undef) {
+			large[i].val = cc->wc;
+			collinfo.large_count = i++;
+		}
+	}
+
+	if ((f = open_category()) == NULL) {
+		return;
+	}
+
+	/* Time to write the entire data set out */
+
+	if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) ||
+	    (wr_category(&collinfo, sizeof (collinfo), f) < 0) ||
+	    (wr_category(&chars, sizeof (chars), f) < 0)) {
+		return;
+	}
+
+	for (i = 0; i < NUM_WT; i++) {
+		sz =  sizeof (collate_subst_t) * collinfo.subst_count[i];
+		if (wr_category(subst[i], sz, f) < 0) {
+			return;
+		}
+	}
+	sz = sizeof (collate_chain_t) * collinfo.chain_count;
+	if (wr_category(chain, sz, f) < 0) {
+		return;
+	}
+	sz = sizeof (collate_large_t) * collinfo.large_count;
+	if (wr_category(large, sz, f) < 0) {
+		return;
+	}
+
+	close_category(f);
+}
diff --git a/usr.bin/localedef/ctype.c b/usr.bin/localedef/ctype.c
new file mode 100644
index 000000000000..8cb68135b4ee
--- /dev/null
+++ b/usr.bin/localedef/ctype.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2012 Garrett D'Amore <garrett@damore.org>  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * LC_CTYPE database generation routines for localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/avl.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include <wchar.h>
+#include <ctype.h>
+#include <wctype.h>
+#include <unistd.h>
+#include "localedef.h"
+#include "parser.h"
+#include "runefile.h"
+
+
+#define _ISUPPER	_CTYPE_U
+#define _ISLOWER	_CTYPE_L
+#define	_ISDIGIT	_CTYPE_D
+#define	_ISXDIGIT	_CTYPE_X
+#define	_ISSPACE	_CTYPE_S
+#define	_ISBLANK	_CTYPE_B
+#define	_ISALPHA	_CTYPE_A
+#define	_ISPUNCT	_CTYPE_P
+#define	_ISGRAPH	_CTYPE_G
+#define	_ISPRINT	_CTYPE_R
+#define	_ISCNTRL	_CTYPE_C
+#define	_E1		_CTYPE_Q
+#define	_E2		_CTYPE_I
+#define	_E3		0
+#define	_E4		0
+#define	_E5		_CTYPE_T
+
+static avl_tree_t	ctypes;
+
+static wchar_t		last_ctype;
+
+typedef struct ctype_node {
+	wchar_t wc;
+	int32_t	ctype;
+	int32_t	toupper;
+	int32_t	tolower;
+	avl_node_t avl;
+} ctype_node_t;
+
+typedef struct width_node {
+	wchar_t start;
+	wchar_t end;
+	int8_t width;
+	avl_node_t avl;
+} width_node_t;
+
+static int
+ctype_compare(const void *n1, const void *n2)
+{
+	const ctype_node_t *c1 = n1;
+	const ctype_node_t *c2 = n2;
+
+	return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0);
+}
+
+void
+init_ctype(void)
+{
+	avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t),
+	    offsetof(ctype_node_t, avl));
+}
+
+
+static void
+add_ctype_impl(ctype_node_t *ctn)
+{
+	switch (last_kw) {
+	case T_ISUPPER:
+		ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT);
+		break;
+	case T_ISLOWER:
+		ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT);
+		break;
+	case T_ISALPHA:
+		ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT);
+		break;
+	case T_ISDIGIT:
+		ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT);
+		break;
+	case T_ISSPACE:
+		ctn->ctype |= _ISSPACE;
+		break;
+	case T_ISCNTRL:
+		ctn->ctype |= _ISCNTRL;
+		break;
+	case T_ISGRAPH:
+		ctn->ctype |= (_ISGRAPH | _ISPRINT);
+		break;
+	case T_ISPRINT:
+		ctn->ctype |= _ISPRINT;
+		break;
+	case T_ISPUNCT:
+		ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT);
+		break;
+	case T_ISXDIGIT:
+		ctn->ctype |= (_ISXDIGIT | _ISPRINT);
+		break;
+	case T_ISBLANK:
+		ctn->ctype |= (_ISBLANK | _ISSPACE);
+		break;
+	case T_ISPHONOGRAM:
+		ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH);
+		break;
+	case T_ISIDEOGRAM:
+		ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH);
+		break;
+	case T_ISENGLISH:
+		ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH);
+		break;
+	case T_ISNUMBER:
+		ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH);
+		break;
+	case T_ISSPECIAL:
+		ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH);
+		break;
+	case T_ISALNUM:
+		/*
+		 * We can't do anything with this.  The character
+		 * should already be specified as a digit or alpha.
+		 */
+		break;
+	default:
+		errf("not a valid character class");
+	}
+}
+
+static ctype_node_t *
+get_ctype(wchar_t wc)
+{
+	ctype_node_t	srch;
+	ctype_node_t	*ctn;
+	avl_index_t	where;
+
+	srch.wc = wc;
+	if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) {
+		if ((ctn = calloc(1, sizeof (*ctn))) == NULL) {
+			errf("out of memory");
+			return (NULL);
+		}
+		ctn->wc = wc;
+
+		avl_insert(&ctypes, ctn, where);
+	}
+	return (ctn);
+}
+
+void
+add_ctype(int val)
+{
+	ctype_node_t	*ctn;
+
+	if ((ctn = get_ctype(val)) == NULL) {
+		INTERR;
+		return;
+	}
+	add_ctype_impl(ctn);
+	last_ctype = ctn->wc;
+}
+
+void
+add_ctype_range(int end)
+{
+	ctype_node_t	*ctn;
+	wchar_t		cur;
+
+	if (end < last_ctype) {
+		errf("malformed character range (%u ... %u))",
+		    last_ctype, end);
+		return;
+	}
+	for (cur = last_ctype + 1; cur <= end; cur++) {
+		if ((ctn = get_ctype(cur)) == NULL) {
+			INTERR;
+			return;
+		}
+		add_ctype_impl(ctn);
+	}
+	last_ctype = end;
+
+}
+
+/*
+ * A word about widths: if the width mask is specified, then libc
+ * unconditionally honors it.  Otherwise, it assumes printable
+ * characters have width 1, and non-printable characters have width
+ * -1 (except for NULL which is special with with 0).  Hence, we have
+ * no need to inject defaults here -- the "default" unset value of 0
+ * indicates that libc should use its own logic in wcwidth as described.
+ */
+void
+add_width(int wc, int width)
+{
+	ctype_node_t	*ctn;
+
+	if ((ctn = get_ctype(wc)) == NULL) {
+		INTERR;
+		return;
+	}
+	ctn->ctype &= ~(_CTYPE_SWM);
+	switch (width) {
+	case 0:
+		ctn->ctype |= _CTYPE_SW0;
+		break;
+	case 1:
+		ctn->ctype |= _CTYPE_SW1;
+		break;
+	case 2:
+		ctn->ctype |= _CTYPE_SW2;
+		break;
+	case 3:
+		ctn->ctype |= _CTYPE_SW3;
+		break;
+	}
+}
+
+void
+add_width_range(int start, int end, int width)
+{
+	for (; start <= end; start++) {
+		add_width(start, width);
+	}
+}
+
+void
+add_caseconv(int val, int wc)
+{
+	ctype_node_t	*ctn;
+
+	ctn = get_ctype(val);
+	if (ctn == NULL) {
+		INTERR;
+		return;
+	}
+
+	switch (last_kw) {
+	case T_TOUPPER:
+		ctn->toupper = wc;
+		break;
+	case T_TOLOWER:
+		ctn->tolower = wc;
+		break;
+	default:
+		INTERR;
+		break;
+	}
+}
+
+void
+dump_ctype(void)
+{
+	FILE		*f;
+	_FileRuneLocale	rl;
+	ctype_node_t	*ctn, *last_ct, *last_lo, *last_up;
+	_FileRuneEntry	*ct = NULL;
+	_FileRuneEntry	*lo = NULL;
+	_FileRuneEntry	*up = NULL;
+	wchar_t		wc;
+
+	(void) memset(&rl, 0, sizeof (rl));
+	last_ct = NULL;
+	last_lo = NULL;
+	last_up = NULL;
+
+	if ((f = open_category()) == NULL)
+		return;
+
+	(void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
+	(void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
+
+	/*
+	 * Initialize the identity map.
+	 */
+	for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) {
+		rl.maplower[wc] = wc;
+		rl.mapupper[wc] = wc;
+	}
+
+	for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) {
+		int conflict = 0;
+
+
+		wc = ctn->wc;
+
+		/*
+		 * POSIX requires certain portable characters have
+		 * certain types.  Add them if they are missing.
+		 */
+		if ((wc >= 1) && (wc <= 127)) {
+			if ((wc >= 'A') && (wc <= 'Z'))
+				ctn->ctype |= _ISUPPER;
+			if ((wc >= 'a') && (wc <= 'z'))
+				ctn->ctype |= _ISLOWER;
+			if ((wc >= '0') && (wc <= '9'))
+				ctn->ctype |= _ISDIGIT;
+			if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
+				ctn->ctype |= _ISSPACE;
+			if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
+				ctn->ctype |= _ISXDIGIT;
+			if (strchr(" \t", (char)wc))
+				ctn->ctype |= _ISBLANK;
+
+			/*
+			 * Technically these settings are only
+			 * required for the C locale.  However, it
+			 * turns out that because of the historical
+			 * version of isprint(), we need them for all
+			 * locales as well.  Note that these are not
+			 * necessarily valid punctation characters in
+			 * the current language, but ispunct() needs
+			 * to return TRUE for them.
+			 */
+			if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
+			    (char)wc))
+				ctn->ctype |= _ISPUNCT;
+		}
+
+		/*
+		 * POSIX also requires that certain types imply
+		 * others.  Add any inferred types here.
+		 */
+		if (ctn->ctype & (_ISUPPER |_ISLOWER))
+			ctn->ctype |= _ISALPHA;
+		if (ctn->ctype & _ISDIGIT)
+			ctn->ctype |= _ISXDIGIT;
+		if (ctn->ctype & _ISBLANK)
+			ctn->ctype |= _ISSPACE;
+		if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT))
+			ctn->ctype |= _ISGRAPH;
+		if (ctn->ctype & _ISGRAPH)
+			ctn->ctype |= _ISPRINT;
+
+		/*
+		 * Finally, POSIX requires that certain combinations
+		 * are invalid.  We don't flag this as a fatal error,
+		 * but we will warn about.
+		 */
+		if ((ctn->ctype & _ISALPHA) &&
+		    (ctn->ctype & (_ISPUNCT|_ISDIGIT)))
+			conflict++;
+		if ((ctn->ctype & _ISPUNCT) &
+		    (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
+			conflict++;
+		if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
+			conflict++;
+		if ((ctn->ctype & _ISCNTRL) & _ISPRINT)
+			conflict++;
+		if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
+			conflict++;
+
+		if (conflict) {
+			warn("conflicting classes for character 0x%x (%x)",
+			    wc, ctn->ctype);
+		}
+		/*
+		 * Handle the lower 256 characters using the simple
+		 * optimization.  Note that if we have not defined the
+		 * upper/lower case, then we identity map it.
+		 */
+		if ((unsigned)wc < _CACHED_RUNES) {
+			rl.runetype[wc] = ctn->ctype;
+			if (ctn->tolower)
+				rl.maplower[wc] = ctn->tolower;
+			if (ctn->toupper)
+				rl.mapupper[wc] = ctn->toupper;
+			continue;
+		}
+
+		if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) {
+			ct[rl.runetype_ext_nranges-1].max = wc;
+			last_ct = ctn;
+		} else {
+			rl.runetype_ext_nranges++;
+			ct = realloc(ct,
+			    sizeof (*ct) * rl.runetype_ext_nranges);
+			ct[rl.runetype_ext_nranges - 1].min = wc;
+			ct[rl.runetype_ext_nranges - 1].max = wc;
+			ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
+			last_ct = ctn;
+		}
+		if (ctn->tolower == 0) {
+			last_lo = NULL;
+		} else if ((last_lo != NULL) &&
+		    (last_lo->tolower + 1 == ctn->tolower)) {
+			lo[rl.maplower_ext_nranges-1].max = wc;
+			last_lo = ctn;
+		} else {
+			rl.maplower_ext_nranges++;
+			lo = realloc(lo,
+			    sizeof (*lo) * rl.maplower_ext_nranges);
+			lo[rl.maplower_ext_nranges - 1].min = wc;
+			lo[rl.maplower_ext_nranges - 1].max = wc;
+			lo[rl.maplower_ext_nranges - 1].map = ctn->tolower;
+			last_lo = ctn;
+		}
+
+		if (ctn->toupper == 0) {
+			last_up = NULL;
+		} else if ((last_up != NULL) &&
+		    (last_up->toupper + 1 == ctn->toupper)) {
+			up[rl.mapupper_ext_nranges-1].max = wc;
+			last_up = ctn;
+		} else {
+			rl.mapupper_ext_nranges++;
+			up = realloc(up,
+			    sizeof (*up) * rl.mapupper_ext_nranges);
+			up[rl.mapupper_ext_nranges - 1].min = wc;
+			up[rl.mapupper_ext_nranges - 1].max = wc;
+			up[rl.mapupper_ext_nranges - 1].map = ctn->toupper;
+			last_up = ctn;
+		}
+	}
+
+	if ((wr_category(&rl, sizeof (rl), f) < 0) ||
+	    (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) ||
+	    (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) ||
+	    (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) {
+		return;
+	}
+
+	close_category(f);
+}
diff --git a/usr.bin/localedef/localedef.1 b/usr.bin/localedef/localedef.1
new file mode 100644
index 000000000000..7cd026d78437
--- /dev/null
+++ b/usr.bin/localedef/localedef.1
@@ -0,0 +1,238 @@
+.\" Copyright (c) 1992, X/Open Company Limited  All Rights Reserved
+.\" Portions Copyright (c) 2003, Sun Microsystems, Inc.  All Rights Reserved
+.\" Portions Copyright 2013 DEY Storage Systems, Inc.
+.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for
+.\" permission to reproduce portions of its copyrighted documentation.
+.\" Original documentation from The Open Group can be obtained online at
+.\" http://www.opengroup.org/bookstore/.
+.\" The Institute of Electrical and Electronics Engineers and The Open Group,
+.\" have given us permission to reprint portions of their documentation. In
+.\" the following statement, the phrase "this text" refers to portions of the
+.\" system documentation. Portions of this text are reprinted and reproduced
+.\" in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1,
+.\" 2004 Edition, Standard for Information Technology -- Portable Operating
+.\" System Interface (POSIX), The Open Group Base Specifications Issue 6,
+.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics
+.\" Engineers, Inc and The Open Group. In the event of any discrepancy between
+.\" these versions and the original IEEE and The Open Group Standard, the
+.\" original IEEE and The Open Group Standard is the referee document. The
+.\" original Standard can be obtained online at
+.\" http://www.opengroup.org/unix/online.html.
+.\"  This notice shall appear on any product containing this material.
+.\" The contents of this file are subject to the terms of the Common
+.\" Development and Distribution License (the "License").  You may not use
+.\" this file except in compliance with the License.
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or
+.\" http://www.opensolaris.org/os/licensing.  See the License for the specific
+.\" language governing permissions and limitations under the License.
+.\" When distributing Covered Code, include this CDDL HEADER in each file and
+.\" include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable,
+.\" add the following below this CDDL HEADER, with the fields enclosed by
+.\" brackets "[]" replaced with your own identifying information:
+.\" Portions Copyright [yyyy] [name of copyright owner]
+.Dd July 28, 2015
+.Dt LOCALEDEF 1
+.Os
+.Sh NAME
+.Nm localedef
+.Nd define locale environment
+.Sh SYNOPSIS
+.Nm
+.Op Fl D
+.Op Fl c
+.Op Fl v
+.Op Fl U
+.Op Fl f Ar charmap
+.Op Fl w Ar widthfile
+.Op Fl i Ar sourcefile
+.Op Fl u Ar codeset
+localename
+.Sh DESCRIPTION
+The
+.Nm localedef
+utility converts source definitions for locale categories
+into a format usable by the functions and utilities whose operational behavior
+is determined by the setting of the locale environment variables; see
+.Xr environ 5 .
+.Pp
+The utility reads source definitions for one or more locale categories
+belonging to the same locale from the file named in the \fB-i\fR option (if
+specified) or from standard input.
+.Pp
+Each category source definition is identified by the corresponding environment
+variable name and terminated by an
+.Sy END
+.Em category-name
+statement. The following categories are supported:
+.Bl -tag -width LC_MONETARY
+.It LC_CTYPE
+Defines character classification and case conversion.
+.It LC_COLLATE
+Defines collation rules.
+.It LC_MONETARY
+Defines the format and symbols used in formatting of monetary information.
+.It LC_NUMERIC
+Defines the decimal delimiter, grouping and grouping symbol for non-monetary
+numeric editing.
+.It LC_TIME
+Defines the format and content of date and time information.
+.It LC_MESSAGES
+Defines the format and values of affirmative and negative responses.
+.El
+.Pp
+The following options are supported:
+.Bl -tag -width xx_sourcefile
+.It -D
+BSD-style output.  Rather than the default of creating the
+.Sy localename
+directory and creating files like LC_CTYPE, LC_COLLATE, etc, in that directory,
+the output files have the format "<localename>.<category>" and are
+dumped to the current directory.
+.It -c
+Creates permanent output even if warning messages have been issued.
+.It -v
+Emit verbose debugging output on standard output.
+.It -U
+Ignore the presence of character symbols that have no matching character
+definition.  This facilitates the use of a common locale definition file
+to be used across multiple encodings, even when some symbols are not
+present in a given encoding.
+.It -f charmap
+Specifies the pathname of a file containing a mapping of character symbols and
+collating element symbols to actual character encodings. This option must be
+specified if symbolic names (other than collating symbols defined in a
+.Sy collating-symbol
+keyword) are used. If the
+.Sy -f
+option is not present, the default character mapping will be used.
+.It -w widthfile
+The path name of the file containing character screen width definitions.
+If not supplied, then default screen widths will be assumed, which will
+generally not account for East Asian encodings requiring more than a single
+character cell to display, nor for combining or accent marks that occupy
+no additional screen width.
+.It -i sourcefile
+The path name of a file containing the source definitions. If this option is
+not present, source definitions will be read from standard input.
+.It -u codeset
+Specifies the name of a codeset used as the target mapping of character symbols
+and collating element symbols whose encoding values are defined in terms of the
+ISO/IEC 10646-1: 2000 standard position constant values. See NOTES.
+.El
+.Pp
+The following operands are required:
+.Bl -tag -width localename
+.It localename
+Identifies the locale. If the name contains one or more slash characters,
+.Ar localename
+will be interpreted as a path name where the created locale
+definitions will be stored. This capability may be restricted to users with
+appropriate privileges. (As a consequence of specifying one
+.Ar localename ,
+although several categories can be processed in one execution, only categories
+belonging to the same locale can be processed.)
+.El
+.Sh OUTPUT
+.Nm
+creates a directory of files that represents the locale's data, unless instructed
+otherwise by the
+.Sy -D
+(BSD output) option. The contants of this directory should generally be
+copied into the appropriate subdirectory of /usr/share/locale in order the
+definitions to be visible to programs linked with libc.
+.Sh ENVIRONMENT
+See
+.Xr Benviron 5
+for definitions of the following environment variables that affect the execution of
+.Nm :
+.Sy LANG ,
+.Sy LC_ALL ,
+.Sy LC_COLLATE ,
+.Sy LC_CTYPE ,
+.Sy LC_MESSAGES ,
+.Sy LC_MONETARY ,
+.Sy LC_MUMERIC ,
+.Sy LC_TIME ,
+and
+.Sy NLSPATH .
+.Sh EXIT STATUS
+The following exit values are returned:
+.Bl -tag -width XX
+.It 0
+No errors occurred and the locales were successfully created.
+.It 1
+Warnings occurred and the locales were successfully created.
+.It 2
+The locale specification exceeded implementation limits or the coded character
+set or sets used were not supported by the implementation, and no locale was
+created.
+.It >3
+Warnings or errors occurred and no output was created.
+.El
+.Pp
+If an error is detected, no permanent output will be created.
+.Sh SEE ALSO
+.Xr locale 1 ,
+.Xr iconv_open 3 ,
+.Xr nl_langinfo 3 ,
+.Xr strftime 3 ,
+.Xr environ 5 .
+.Sh WARNINGS
+If warnings occur, permanent output will be created if the
+.Sy -c
+option was specified. The following conditions will cause warning messages to be issued:
+.Bl -tag -width X
+.It *
+If a symbolic name not found in the
+.Em charmap
+file is used for the descriptions of the
+.Sy LC_CTYPE
+or
+.Sy LC_COLLATE
+categories (for other categories, this will be an error condition).
+.It *
+If optional keywords not supported by the implementation are present in the
+source.
+.El
+.Sh NOTES
+When the
+.Sy -u
+option is used, the
+.Em codeset
+option-argument is interpreted as a name of a codeset to which the
+ISO/IEC 10646-1: 2000 standard position constant values are converted. Both the
+ISO/IEC 10646-1: 2000 standard position constant values and other formats (decimal,
+hexadecimal, or octal) are valid as encoding values within the charmap file. The
+codeset can be any codeset that is supported by the \fBiconv_open\fR(3C) function
+on the system.
+.Pp
+When conflicts occur between the charmap specification of
+.Em codeset ,
+.Em mb_cur_max ,
+or
+.Em mb_cur_min
+and the corresponding value for the codeset represented by the
+.Sy -u
+option-argument
+.Em codeset ,
+the
+.Nm
+utility fails as an error.
+.Pp
+When conflicts occur between the charmap encoding values specified for symbolic
+names of characters of the portable character set and the character encoding
+values defined by the US-ASCII, the result is unspecified.
+.Sh HISTORY
+.Nm
+first appeared in
+.Dx
+4.4. It was ported from Illumos from the point
+.An Garrett D'Amore
+.Aq garrett@nexenta.com
+added multibyte support (October 2010).
+.An John Marino
+.Aq draco@marino.st
+provided the alternations necessary to compile cleanly on
+.Dx
+as well as altered libc to use the new collation (the changes were also based
+on Illumos, but modified to work with xlocale functionality.)
diff --git a/usr.bin/localedef/localedef.c b/usr.bin/localedef/localedef.c
new file mode 100644
index 000000000000..afc2e240860e
--- /dev/null
+++ b/usr.bin/localedef/localedef.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * POSIX localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <limits.h>
+#include <locale.h>
+#include <dirent.h>
+#include "localedef.h"
+#include "parser.h"
+
+#ifndef	TEXT_DOMAIN
+#define	TEXT_DOMAIN	"SYS_TEST"
+#endif
+
+int bsd = 0;
+int verbose = 0;
+int undefok = 0;
+int warnok = 0;
+static char *locname = NULL;
+static char locpath[PATH_MAX];
+
+const char *
+category_name(void)
+{
+	switch (get_category()) {
+	case T_CHARMAP:
+		return ("CHARMAP");
+	case T_WIDTH:
+		return ("WIDTH");
+	case T_COLLATE:
+		return ("LC_COLLATE");
+	case T_CTYPE:
+		return ("LC_CTYPE");
+	case T_MESSAGES:
+		return ("LC_MESSAGES");
+	case T_MONETARY:
+		return ("LC_MONETARY");
+	case T_NUMERIC:
+		return ("LC_NUMERIC");
+	case T_TIME:
+		return ("LC_TIME");
+	default:
+		INTERR;
+		return (NULL);
+	}
+}
+
+static char *
+category_file(void)
+{
+	if (bsd)
+		(void) snprintf(locpath, sizeof (locpath), "%s.%s",
+		    locname, category_name());
+	else
+		(void) snprintf(locpath, sizeof (locpath), "%s/%s",
+		    locname, category_name());
+	return (locpath);
+}
+
+FILE *
+open_category(void)
+{
+	FILE *file;
+
+	if (verbose) {
+		(void) printf("Writing category %s: ", category_name());
+		(void) fflush(stdout);
+	}
+
+	/* make the parent directory */
+	if (!bsd)
+		(void) mkdir(dirname(category_file()), 0755);
+
+	/*
+	 * note that we have to regenerate the file name, as dirname
+	 * clobbered it.
+	 */
+	file = fopen(category_file(), "w");
+	if (file == NULL) {
+		errf(strerror(errno));
+		return (NULL);
+	}
+	return (file);
+}
+
+void
+close_category(FILE *f)
+{
+	if (fchmod(fileno(f), 0644) < 0) {
+		(void) fclose(f);
+		(void) unlink(category_file());
+		errf(strerror(errno));
+	}
+	if (fclose(f) < 0) {
+		(void) unlink(category_file());
+		errf(strerror(errno));
+	}
+	if (verbose) {
+		(void) fprintf(stdout, "done.\n");
+		(void) fflush(stdout);
+	}
+}
+
+/*
+ * This function is used when copying the category from another
+ * locale.  Note that the copy is actually performed using a hard
+ * link for efficiency.
+ */
+void
+copy_category(char *src)
+{
+	char	srcpath[PATH_MAX];
+	int	rv;
+
+	(void) snprintf(srcpath, sizeof (srcpath), "%s/%s",
+	    src, category_name());
+	rv = access(srcpath, R_OK);
+	if ((rv != 0) && (strchr(srcpath, '/') == NULL)) {
+		/* Maybe we should try the system locale */
+		(void) snprintf(srcpath, sizeof (srcpath),
+		    "/usr/lib/locale/%s/%s", src, category_name());
+		rv = access(srcpath, R_OK);
+	}
+
+	if (rv != 0) {
+		fprintf(stderr,"source locale data unavailable: %s", src);
+		return;
+	}
+
+	if (verbose > 1) {
+		(void) printf("Copying category %s from %s: ",
+		    category_name(), src);
+		(void) fflush(stdout);
+	}
+
+	/* make the parent directory */
+	if (!bsd)
+		(void) mkdir(dirname(category_file()), 0755);
+
+	if (link(srcpath, category_file()) != 0) {
+		fprintf(stderr,"unable to copy locale data: %s",
+			strerror(errno));
+		return;
+	}
+	if (verbose > 1) {
+		(void) printf("done.\n");
+	}
+}
+
+int
+putl_category(const char *s, FILE *f)
+{
+	if (s && fputs(s, f) == EOF) {
+		(void) fclose(f);
+		(void) unlink(category_file());
+		errf(strerror(errno));
+		return (EOF);
+	}
+	if (fputc('\n', f) == EOF) {
+		(void) fclose(f);
+		(void) unlink(category_file());
+		errf(strerror(errno));
+		return (EOF);
+	}
+	return (0);
+}
+
+int
+wr_category(void *buf, size_t sz, FILE *f)
+{
+	if (!sz) {
+		return (0);
+	}
+	if (fwrite(buf, sz, 1, f) < 1) {
+		(void) fclose(f);
+		(void) unlink(category_file());
+		errf(strerror(errno));
+		return (EOF);
+	}
+	return (0);
+}
+
+int yyparse(void);
+
+static void
+usage(void)
+{
+	(void) fprintf(stderr, "Usage: localedef [options] localename\n");
+	(void) fprintf(stderr, "[options] are:\n");
+	(void) fprintf(stderr, "  -D          : BSD-style output\n");
+	(void) fprintf(stderr, "  -c          : ignore warnings\n");
+	(void) fprintf(stderr, "  -v          : verbose output\n");
+	(void) fprintf(stderr, "  -U          : ignore undefined symbols\n");
+	(void) fprintf(stderr, "  -f charmap  : use given charmap file\n");
+	(void) fprintf(stderr, "  -u encoding : assume encoding\n");
+	(void) fprintf(stderr, "  -w widths   : use screen widths file\n");
+	(void) fprintf(stderr, "  -i locsrc   : source file for locale\n");
+	exit(4);
+}
+
+int
+main(int argc, char **argv)
+{
+	int c;
+	char *lfname = NULL;
+	char *cfname = NULL;
+	char *wfname = NULL;
+	DIR *dir;
+
+	init_charmap();
+	init_collate();
+	init_ctype();
+	init_messages();
+	init_monetary();
+	init_numeric();
+	init_time();
+
+	yydebug = 0;
+
+	(void) setlocale(LC_ALL, "");
+
+	while ((c = getopt(argc, argv, "w:i:cf:u:vUD")) != -1) {
+		switch (c) {
+		case 'D':
+			bsd = 1;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case 'i':
+			lfname = optarg;
+			break;
+		case 'u':
+			set_wide_encoding(optarg);
+			break;
+		case 'f':
+			cfname = optarg;
+			break;
+		case 'U':
+			undefok++;
+			break;
+		case 'c':
+			warnok++;
+			break;
+		case 'w':
+			wfname = optarg;
+			break;
+		case '?':
+			usage();
+			break;
+		}
+	}
+
+	if ((argc - 1) != (optind)) {
+		usage();
+	}
+	locname = argv[argc - 1];
+	if (verbose) {
+		(void) printf("Processing locale %s.\n", locname);
+	}
+
+	if (cfname) {
+		if (verbose)
+			(void) printf("Loading charmap %s.\n", cfname);
+		reset_scanner(cfname);
+		(void) yyparse();
+	}
+
+	if (wfname) {
+		if (verbose)
+			(void) printf("Loading widths %s.\n", wfname);
+		reset_scanner(wfname);
+		(void) yyparse();
+	}
+
+	if (verbose) {
+		(void) printf("Loading POSIX portable characters.\n");
+	}
+	add_charmap_posix();
+
+	if (lfname) {
+		reset_scanner(lfname);
+	} else {
+		reset_scanner(NULL);
+	}
+
+	/* make the directory for the locale if not already present */
+	if (!bsd) {
+		while ((dir = opendir(locname)) == NULL) {
+			if ((errno != ENOENT) ||
+			    (mkdir(locname, 0755) <  0)) {
+				errf(strerror(errno));
+			}
+		}
+		(void) closedir(dir);
+		(void) mkdir(dirname(category_file()), 0755);
+	}
+
+	(void) yyparse();
+	if (verbose) {
+		(void) printf("All done.\n");
+	}
+	return (warnings ? 1 : 0);
+}
diff --git a/usr.bin/localedef/localedef.h b/usr.bin/localedef/localedef.h
new file mode 100644
index 000000000000..91e47ad62215
--- /dev/null
+++ b/usr.bin/localedef/localedef.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * POSIX localedef.
+ */
+
+/* Common header files. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+
+extern int com_char;
+extern int esc_char;
+extern int mb_cur_max;
+extern int mb_cur_min;
+extern int last_kw;
+extern int verbose;
+extern int yydebug;
+extern int lineno;
+extern int undefok;	/* mostly ignore undefined symbols */
+extern int warnok;
+extern int warnings;
+
+int yylex(void);
+void yyerror(const char *);
+void errf(const char *, ...);
+void warn(const char *, ...);
+
+int putl_category(const char *, FILE *);
+int wr_category(void *, size_t, FILE *);
+FILE *open_category(void);
+void close_category(FILE *);
+void copy_category(char *);
+const char *category_name(void);
+
+int get_category(void);
+int get_symbol(void);
+int get_escaped(int);
+int get_wide(void);
+void reset_scanner(const char *);
+void scan_to_eol(void);
+void add_wcs(wchar_t);
+void add_tok(int);
+wchar_t *get_wcs(void);
+
+/* charmap.c - CHARMAP handling */
+void init_charmap(void);
+void add_charmap(char *, int);
+void add_charmap_undefined(char *);
+void add_charmap_posix(void);
+void add_charmap_range(char *, char *, int);
+void add_charmap_char(char *name, int val);
+int lookup_charmap(const char *, wchar_t *);
+int check_charmap_undefined(char *);
+int check_charmap(wchar_t);
+
+/* collate.o - LC_COLLATE handling */
+typedef struct collelem collelem_t;
+typedef struct collsym collsym_t;
+void init_collate(void);
+void define_collsym(char *);
+void define_collelem(char *, wchar_t *);
+void add_order_directive(void);
+void add_order_bit(int);
+void dump_collate(void);
+collsym_t *lookup_collsym(char *);
+collelem_t *lookup_collelem(char *);
+void start_order_collelem(collelem_t *);
+void start_order_undefined(void);
+void start_order_symbol(char *);
+void start_order_char(wchar_t);
+void start_order_ellipsis(void);
+void end_order_collsym(collsym_t *);
+void end_order(void);
+void add_weight(int32_t, int);
+void add_weights(int32_t *);
+void add_weight_num(int);
+void add_order_collelem(collelem_t *);
+void add_order_collsym(collsym_t *);
+void add_order_char(wchar_t);
+void add_order_ignore(void);
+void add_order_ellipsis(void);
+void add_order_symbol(char *);
+void add_order_subst(void);
+void add_subst_char(wchar_t);
+void add_subst_collsym(collsym_t *);
+void add_subst_collelem(collelem_t *);
+void add_subst_symbol(char *);
+int32_t get_weight(int32_t, int);
+wchar_t * wsncpy(wchar_t *, const wchar_t *, size_t);
+
+
+/* ctype.c - LC_CTYPE handling */
+void init_ctype(void);
+void add_ctype(int);
+void add_ctype_range(int);
+void add_width(int, int);
+void add_width_range(int, int, int);
+void add_caseconv(int, int);
+void dump_ctype(void);
+
+/* messages.c - LC_MESSAGES handling */
+void init_messages(void);
+void add_message(wchar_t *);
+void dump_messages(void);
+
+/* monetary.c - LC_MONETARY handling */
+void init_monetary(void);
+void add_monetary_str(wchar_t *);
+void add_monetary_num(int);
+void reset_monetary_group(void);
+void add_monetary_group(int);
+void dump_monetary(void);
+
+/* numeric.c - LC_NUMERIC handling */
+void init_numeric(void);
+void add_numeric_str(wchar_t *);
+void reset_numeric_group(void);
+void add_numeric_group(int);
+void dump_numeric(void);
+
+/* time.c - LC_TIME handling */
+void init_time(void);
+void add_time_str(wchar_t *);
+void reset_time_list(void);
+void add_time_list(wchar_t *);
+void check_time_list(void);
+void dump_time(void);
+
+/* wide.c -  Wide character handling. */
+int to_wide(wchar_t *, const char *);
+int to_mbs(char *, wchar_t);
+int to_mb(char *, wchar_t);
+char *to_mb_string(const wchar_t *);
+void set_wide_encoding(const char *);
+void werr(const char *, ...);
+const char *get_wide_encoding(void);
+int max_wide(void);
+
+//#define	_(x)	gettext(x)
+#define	INTERR	fprintf(stderr,"internal fault (%s:%d)", __FILE__, __LINE__)
diff --git a/usr.bin/localedef/messages.c b/usr.bin/localedef/messages.c
new file mode 100644
index 000000000000..0502eb5c3c1e
--- /dev/null
+++ b/usr.bin/localedef/messages.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * LC_MESSAGES database generation routines for localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <string.h>
+#include <unistd.h>
+#include "localedef.h"
+#include "parser.h"
+#include "lmessages.h"
+
+static struct lc_messages_T msgs;
+
+void
+init_messages(void)
+{
+	(void) memset(&msgs, 0, sizeof (msgs));
+}
+
+void
+add_message(wchar_t *wcs)
+{
+	char *str;
+
+	if ((str = to_mb_string(wcs)) == NULL) {
+		INTERR;
+		return;
+	}
+	free(wcs);
+
+	switch (last_kw) {
+	case T_YESSTR:
+		msgs.yesstr = str;
+		break;
+	case T_NOSTR:
+		msgs.nostr = str;
+		break;
+	case T_YESEXPR:
+		msgs.yesexpr = str;
+		break;
+	case T_NOEXPR:
+		msgs.noexpr = str;
+		break;
+	default:
+		free(str);
+		INTERR;
+		break;
+	}
+}
+
+void
+dump_messages(void)
+{
+	FILE *f;
+	char *ptr;
+
+	if (msgs.yesstr == NULL) {
+		warn("missing field 'yesstr'");
+		msgs.yesstr = "";
+	}
+	if (msgs.nostr == NULL) {
+		warn("missing field 'nostr'");
+		msgs.nostr = "";
+	}
+
+	/*
+	 * CLDR likes to add : separated lists for yesstr and nostr.
+	 * Legacy Solaris code does not seem to grok this.  Fix it.
+	 */
+	if ((ptr = strchr(msgs.yesstr, ':')) != NULL)
+		*ptr = 0;
+	if ((ptr = strchr(msgs.nostr, ':')) != NULL)
+		*ptr = 0;
+
+	if ((f = open_category()) == NULL) {
+		return;
+	}
+
+	if ((putl_category(msgs.yesexpr, f) == EOF) ||
+	    (putl_category(msgs.noexpr, f) == EOF) ||
+	    (putl_category(msgs.yesstr, f) == EOF) ||
+	    (putl_category(msgs.nostr, f) == EOF)) {
+		return;
+	}
+	close_category(f);
+}
diff --git a/usr.bin/localedef/monetary.c b/usr.bin/localedef/monetary.c
new file mode 100644
index 000000000000..4700a40799b9
--- /dev/null
+++ b/usr.bin/localedef/monetary.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * LC_MONETARY database generation routines for localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <string.h>
+#include <unistd.h>
+#include "localedef.h"
+#include "parser.h"
+#include "lmonetary.h"
+
+static struct lc_monetary_T mon;
+
+void
+init_monetary(void)
+{
+	(void) memset(&mon, 0, sizeof (mon));
+}
+
+void
+add_monetary_str(wchar_t *wcs)
+{
+	char *str;
+
+	if ((str = to_mb_string(wcs)) == NULL) {
+		INTERR;
+		return;
+	}
+	free(wcs);
+	switch (last_kw) {
+	case T_INT_CURR_SYMBOL:
+		mon.int_curr_symbol = str;
+		break;
+	case T_CURRENCY_SYMBOL:
+		mon.currency_symbol = str;
+		break;
+	case T_MON_DECIMAL_POINT:
+		mon.mon_decimal_point = str;
+		break;
+	case T_MON_THOUSANDS_SEP:
+		mon.mon_thousands_sep = str;
+		break;
+	case T_POSITIVE_SIGN:
+		mon.positive_sign = str;
+		break;
+	case T_NEGATIVE_SIGN:
+		mon.negative_sign = str;
+		break;
+	default:
+		free(str);
+		INTERR;
+		break;
+	}
+}
+
+void
+add_monetary_num(int n)
+{
+	char *str = NULL;
+
+	(void) asprintf(&str, "%d", n);
+	if (str == NULL) {
+		fprintf(stderr, "out of memory");
+		return;
+	}
+
+	switch (last_kw) {
+	case T_INT_FRAC_DIGITS:
+		mon.int_frac_digits = str;
+		break;
+	case T_FRAC_DIGITS:
+		mon.frac_digits = str;
+		break;
+	case T_P_CS_PRECEDES:
+		mon.p_cs_precedes = str;
+		break;
+	case T_P_SEP_BY_SPACE:
+		mon.p_sep_by_space = str;
+		break;
+	case T_N_CS_PRECEDES:
+		mon.n_cs_precedes = str;
+		break;
+	case T_N_SEP_BY_SPACE:
+		mon.n_sep_by_space = str;
+		break;
+	case T_P_SIGN_POSN:
+		mon.p_sign_posn = str;
+		break;
+	case T_N_SIGN_POSN:
+		mon.n_sign_posn = str;
+		break;
+	case T_INT_P_CS_PRECEDES:
+		mon.int_p_cs_precedes = str;
+		break;
+	case T_INT_N_CS_PRECEDES:
+		mon.int_n_cs_precedes = str;
+		break;
+	case T_INT_P_SEP_BY_SPACE:
+		mon.int_p_sep_by_space = str;
+		break;
+	case T_INT_N_SEP_BY_SPACE:
+		mon.int_n_sep_by_space = str;
+		break;
+	case T_INT_P_SIGN_POSN:
+		mon.int_p_sign_posn = str;
+		break;
+	case T_INT_N_SIGN_POSN:
+		mon.int_n_sign_posn = str;
+		break;
+	case T_MON_GROUPING:
+		mon.mon_grouping = str;
+		break;
+	default:
+		INTERR;
+		break;
+	}
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+reset_monetary_group(void)
+{
+	free((char *)mon.mon_grouping);
+	mon.mon_grouping = NULL;
+}
+
+void
+add_monetary_group(int n)
+{
+	char *s = NULL;
+
+	if (mon.mon_grouping == NULL) {
+		(void) asprintf(&s, "%d", n);
+	} else {
+		(void) asprintf(&s, "%s;%d", mon.mon_grouping, n);
+	}
+	if (s == NULL)
+		fprintf(stderr, "out of memory");
+
+	free((char *)mon.mon_grouping);
+	mon.mon_grouping = s;
+}
+
+#pragma GCC diagnostic pop
+
+void
+dump_monetary(void)
+{
+	FILE *f;
+
+	if ((f = open_category()) == NULL) {
+		return;
+	}
+
+	if ((putl_category(mon.int_curr_symbol, f) == EOF) ||
+	    (putl_category(mon.currency_symbol, f) == EOF) ||
+	    (putl_category(mon.mon_decimal_point, f) == EOF) ||
+	    (putl_category(mon.mon_thousands_sep, f) == EOF) ||
+	    (putl_category(mon.mon_grouping, f) == EOF) ||
+	    (putl_category(mon.positive_sign, f) == EOF) ||
+	    (putl_category(mon.negative_sign, f) == EOF) ||
+	    (putl_category(mon.int_frac_digits, f) == EOF) ||
+	    (putl_category(mon.frac_digits, f) == EOF) ||
+	    (putl_category(mon.p_cs_precedes, f) == EOF) ||
+	    (putl_category(mon.p_sep_by_space, f) == EOF) ||
+	    (putl_category(mon.n_cs_precedes, f) == EOF) ||
+	    (putl_category(mon.n_sep_by_space, f) == EOF) ||
+	    (putl_category(mon.p_sign_posn, f) == EOF) ||
+	    (putl_category(mon.n_sign_posn, f) == EOF) ||
+	    (putl_category(mon.int_p_cs_precedes, f) == EOF) ||
+	    (putl_category(mon.int_n_cs_precedes, f) == EOF) ||
+	    (putl_category(mon.int_p_sep_by_space, f) == EOF) ||
+	    (putl_category(mon.int_n_sep_by_space, f) == EOF) ||
+	    (putl_category(mon.int_p_sign_posn, f) == EOF) ||
+	    (putl_category(mon.int_n_sign_posn, f) == EOF)) {
+		return;
+	}
+	close_category(f);
+}
diff --git a/usr.bin/localedef/numeric.c b/usr.bin/localedef/numeric.c
new file mode 100644
index 000000000000..0a293ff95b66
--- /dev/null
+++ b/usr.bin/localedef/numeric.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * LC_NUMERIC database generation routines for localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <string.h>
+#include <unistd.h>
+#include "localedef.h"
+#include "parser.h"
+#include "lnumeric.h"
+
+static struct lc_numeric_T numeric;
+
+void
+init_numeric(void)
+{
+	(void) memset(&numeric, 0, sizeof (numeric));
+}
+
+void
+add_numeric_str(wchar_t *wcs)
+{
+	char *str;
+
+	if ((str = to_mb_string(wcs)) == NULL) {
+		INTERR;
+		return;
+	}
+	free(wcs);
+
+	switch (last_kw) {
+	case T_DECIMAL_POINT:
+		numeric.decimal_point = str;
+		break;
+	case T_THOUSANDS_SEP:
+		numeric.thousands_sep = str;
+		break;
+	default:
+		free(str);
+		INTERR;
+		break;
+	}
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+reset_numeric_group(void)
+{
+	free((char *)numeric.grouping);
+	numeric.grouping = NULL;
+}
+
+void
+add_numeric_group(int n)
+{
+	char *s;
+
+	if (numeric.grouping == NULL) {
+		(void) asprintf(&s, "%d", n);
+	} else {
+		(void) asprintf(&s, "%s;%d", numeric.grouping, n);
+	}
+	if (s == NULL)
+		fprintf(stderr, "out of memory");
+
+	free((char *)numeric.grouping);
+	numeric.grouping = s;
+}
+
+#pragma GCC diagnostic pop
+
+void
+dump_numeric(void)
+{
+	FILE *f;
+
+	if ((f = open_category()) == NULL) {
+		return;
+	}
+
+	if ((putl_category(numeric.decimal_point, f) == EOF) ||
+	    (putl_category(numeric.thousands_sep, f) == EOF) ||
+	    (putl_category(numeric.grouping, f) == EOF)) {
+		return;
+	}
+	close_category(f);
+}
diff --git a/usr.bin/localedef/parser.y b/usr.bin/localedef/parser.y
new file mode 100644
index 000000000000..87ff95d5e956
--- /dev/null
+++ b/usr.bin/localedef/parser.y
@@ -0,0 +1,706 @@
+%{
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * POSIX localedef grammar.
+ */
+
+#include <wchar.h>
+#include <stdio.h>
+#include <limits.h>
+#include "localedef.h"
+
+%}
+%union {
+	int		num;
+	wchar_t		wc;
+	char		*token;
+	collsym_t	*collsym;
+	collelem_t	*collelem;
+}
+
+%token		T_CODE_SET
+%token		T_MB_CUR_MAX
+%token		T_MB_CUR_MIN
+%token		T_COM_CHAR
+%token		T_ESC_CHAR
+%token		T_LT
+%token		T_GT
+%token		T_NL
+%token		T_SEMI
+%token		T_COMMA
+%token		T_ELLIPSIS
+%token		T_RPAREN
+%token		T_LPAREN
+%token		T_QUOTE
+%token		T_NULL
+%token		T_WS
+%token		T_END
+%token		T_COPY
+%token		T_CHARMAP
+%token		T_WIDTH
+%token		T_CTYPE
+%token		T_ISUPPER
+%token		T_ISLOWER
+%token		T_ISALPHA
+%token		T_ISDIGIT
+%token		T_ISPUNCT
+%token		T_ISXDIGIT
+%token		T_ISSPACE
+%token		T_ISPRINT
+%token		T_ISGRAPH
+%token		T_ISBLANK
+%token		T_ISCNTRL
+%token		T_ISALNUM
+%token		T_ISSPECIAL
+%token		T_ISPHONOGRAM
+%token		T_ISIDEOGRAM
+%token		T_ISENGLISH
+%token		T_ISNUMBER
+%token		T_TOUPPER
+%token		T_TOLOWER
+%token		T_COLLATE
+%token		T_COLLATING_SYMBOL
+%token		T_COLLATING_ELEMENT
+%token		T_ORDER_START
+%token		T_ORDER_END
+%token		T_FORWARD
+%token		T_BACKWARD
+%token		T_POSITION
+%token		T_FROM
+%token		T_UNDEFINED
+%token		T_IGNORE
+%token		T_MESSAGES
+%token		T_YESSTR
+%token		T_NOSTR
+%token		T_YESEXPR
+%token		T_NOEXPR
+%token		T_MONETARY
+%token		T_INT_CURR_SYMBOL
+%token		T_CURRENCY_SYMBOL
+%token		T_MON_DECIMAL_POINT
+%token		T_MON_THOUSANDS_SEP
+%token		T_POSITIVE_SIGN
+%token		T_NEGATIVE_SIGN
+%token		T_MON_GROUPING
+%token		T_INT_FRAC_DIGITS
+%token		T_FRAC_DIGITS
+%token		T_P_CS_PRECEDES
+%token		T_P_SEP_BY_SPACE
+%token		T_N_CS_PRECEDES
+%token		T_N_SEP_BY_SPACE
+%token		T_P_SIGN_POSN
+%token		T_N_SIGN_POSN
+%token		T_INT_P_CS_PRECEDES
+%token		T_INT_N_CS_PRECEDES
+%token		T_INT_P_SEP_BY_SPACE
+%token		T_INT_N_SEP_BY_SPACE
+%token		T_INT_P_SIGN_POSN
+%token		T_INT_N_SIGN_POSN
+%token		T_NUMERIC
+%token		T_DECIMAL_POINT
+%token		T_THOUSANDS_SEP
+%token		T_GROUPING
+%token		T_TIME
+%token		T_ABDAY
+%token		T_DAY
+%token		T_ABMON
+%token		T_MON
+%token		T_ERA
+%token		T_ERA_D_FMT
+%token		T_ERA_T_FMT
+%token		T_ERA_D_T_FMT
+%token		T_ALT_DIGITS
+%token		T_D_T_FMT
+%token		T_D_FMT
+%token		T_T_FMT
+%token		T_AM_PM
+%token		T_T_FMT_AMPM
+%token		T_DATE_FMT
+%token	<wc>		T_CHAR
+%token	<token>		T_NAME
+%token	<num>		T_NUMBER
+%token	<token>		T_SYMBOL
+%token	<collsym>	T_COLLSYM
+%token	<collelem>	T_COLLELEM
+
+%%
+
+localedef	: setting_list categories
+		| categories
+		;
+
+string		: T_QUOTE charlist T_QUOTE
+		| T_QUOTE T_QUOTE
+		;
+
+charlist	: charlist T_CHAR
+		{
+			add_wcs($2);
+		}
+		| T_CHAR
+		{
+			add_wcs($1);
+		}
+		;
+
+setting_list	: setting_list setting
+		| setting
+		;
+
+
+setting		: T_COM_CHAR T_CHAR T_NL
+		{
+			com_char = $2;
+		}
+		| T_ESC_CHAR T_CHAR T_NL
+		{
+			esc_char = $2;
+		}
+		| T_MB_CUR_MAX T_NUMBER T_NL
+		{
+			mb_cur_max = $2;
+		}
+		| T_MB_CUR_MIN T_NUMBER T_NL
+		{
+			mb_cur_min = $2;
+		}
+		| T_CODE_SET string T_NL
+		{
+			wchar_t *w = get_wcs();
+			set_wide_encoding(to_mb_string(w));
+			free(w);
+		}
+		| T_CODE_SET T_NAME T_NL
+		{
+			set_wide_encoding($2);
+		}
+		;
+
+copycat		: T_COPY T_NAME T_NL
+		{
+			copy_category($2);
+		}
+		| T_COPY string T_NL
+		{
+			wchar_t *w = get_wcs();
+			copy_category(to_mb_string(w));
+			free(w);
+		}
+		;
+
+categories	: categories category
+		| category
+		;
+
+
+category	: charmap
+		| messages
+		| monetary
+		| ctype
+		| collate
+		| numeric
+		| time
+		;
+
+
+charmap		: T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
+		| T_WIDTH T_NL width_list T_END T_WIDTH T_NL
+		;
+
+
+charmap_list	: charmap_list charmap_entry
+		| charmap_entry
+		;
+
+
+charmap_entry	: T_SYMBOL T_CHAR
+		{
+			add_charmap($1, $2);
+			scan_to_eol();
+		}
+		| T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
+		{
+			add_charmap_range($1, $3, $4);
+			scan_to_eol();
+		}
+		| T_NL
+		;
+
+width_list	: width_list width_entry
+		| width_entry
+		;
+
+width_entry	: T_CHAR T_NUMBER T_NL
+		{
+			add_width($1, $2);
+		}
+		| T_SYMBOL T_NUMBER T_NL
+		{
+			add_charmap_undefined($1);
+		}
+		| T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL
+		{
+			add_width_range($1, $3, $4);
+		}
+		| T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
+		{
+			add_charmap_undefined($1);
+			add_charmap_undefined($3);
+		}
+		| T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
+		{
+			add_width($1, $4);
+			add_charmap_undefined($3);
+		}
+		| T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL
+		{
+			add_width($3, $4);
+			add_charmap_undefined($1);
+		}
+		| T_NL
+		;
+
+ctype		: T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL
+		{
+			dump_ctype();
+		}
+		| T_CTYPE T_NL copycat  T_END T_CTYPE T_NL
+		;
+
+ctype_list	: ctype_list ctype_kw
+		| ctype_kw
+		;
+
+ctype_kw	: T_ISUPPER cc_list T_NL
+		| T_ISLOWER cc_list T_NL
+		| T_ISALPHA cc_list T_NL
+		| T_ISDIGIT cc_list T_NL
+		| T_ISPUNCT cc_list T_NL
+		| T_ISXDIGIT cc_list T_NL
+		| T_ISSPACE cc_list T_NL
+		| T_ISPRINT cc_list T_NL
+		| T_ISGRAPH cc_list T_NL
+		| T_ISBLANK cc_list T_NL
+		| T_ISCNTRL cc_list T_NL
+		| T_ISALNUM cc_list T_NL
+		| T_ISSPECIAL cc_list T_NL
+		| T_ISENGLISH cc_list T_NL
+		| T_ISNUMBER cc_list T_NL
+		| T_ISIDEOGRAM cc_list T_NL
+		| T_ISPHONOGRAM cc_list T_NL
+		| T_TOUPPER conv_list T_NL
+		| T_TOLOWER conv_list T_NL
+		;
+
+
+cc_list		: cc_list T_SEMI T_CHAR
+		{
+			add_ctype($3);
+		}
+		| cc_list T_SEMI T_SYMBOL
+		{
+			add_charmap_undefined($3);
+		}
+		| cc_list T_SEMI T_ELLIPSIS T_SEMI T_CHAR
+		{
+			/* note that the endpoints *must* be characters */
+			add_ctype_range($5);
+		}
+		| T_CHAR
+		{
+			add_ctype($1);
+		}
+		| T_SYMBOL
+		{
+			add_charmap_undefined($1);
+		}
+		;
+
+conv_list	: conv_list T_SEMI conv_pair
+		| conv_pair
+		;
+
+
+conv_pair	: T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN
+		{
+			add_caseconv($2, $4);
+		}
+		| T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN
+		{
+			add_charmap_undefined($2);
+		}
+		| T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN
+		{
+			add_charmap_undefined($2);
+			add_charmap_undefined($4);
+		}
+		| T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN
+		{
+			add_charmap_undefined($4);
+		}
+		;
+
+collate		: T_COLLATE T_NL coll_order T_END T_COLLATE T_NL
+		{
+			dump_collate();
+		}
+		| T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL
+		{
+			dump_collate();
+		}
+		| T_COLLATE T_NL copycat T_END T_COLLATE T_NL
+		;
+
+
+coll_optional	: coll_optional coll_symbols
+		| coll_optional coll_elements
+		| coll_symbols
+		| coll_elements
+		;
+
+
+coll_symbols	: T_COLLATING_SYMBOL T_SYMBOL T_NL
+		{
+			define_collsym($2);
+		}
+		;
+
+
+coll_elements	: T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL
+		{
+			define_collelem($2, get_wcs());
+		}
+		;
+
+coll_order	: T_ORDER_START T_NL order_list T_ORDER_END T_NL
+		{
+			/* If no order list supplied default to one forward */
+			add_order_bit(T_FORWARD);
+			add_order_directive();
+		}
+		| T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL
+		;
+
+
+order_args	: order_args T_SEMI order_arg
+		{
+			add_order_directive();
+		}
+		| order_arg
+		{
+			add_order_directive();
+		}
+		;
+
+order_arg	: order_arg T_COMMA order_dir
+		| order_dir
+		;
+
+order_dir	: T_FORWARD
+		{
+			add_order_bit(T_FORWARD);
+		}
+		| T_BACKWARD
+		{
+			add_order_bit(T_BACKWARD);
+		}
+		| T_POSITION
+		{
+			add_order_bit(T_POSITION);
+		}
+		;
+
+order_list	: order_list order_item
+		| order_item
+		;
+
+order_item	: T_COLLSYM T_NL
+		{
+			end_order_collsym($1);
+		}
+		| order_itemkw T_NL
+		{
+			end_order();
+		}
+		| order_itemkw order_weights T_NL
+		{
+			end_order();
+		}
+		;
+
+order_itemkw	: T_CHAR
+		{
+			start_order_char($1);
+		}
+		| T_ELLIPSIS
+		{
+			start_order_ellipsis();
+		}
+		| T_COLLELEM
+		{
+			start_order_collelem($1);
+		}
+		| T_UNDEFINED
+		{
+			start_order_undefined();
+		}
+		| T_SYMBOL
+		{
+			start_order_symbol($1);
+		}
+		;
+
+order_weights	: order_weights T_SEMI order_weight
+		| order_weights T_SEMI
+		| order_weight
+		;
+
+order_weight	: T_COLLELEM
+		{
+			add_order_collelem($1);
+		}
+		| T_COLLSYM
+		{
+			add_order_collsym($1);
+		}
+		| T_CHAR
+		{
+			add_order_char($1);
+		}
+		| T_ELLIPSIS
+		{
+			add_order_ellipsis();
+		}
+		| T_IGNORE
+		{
+			add_order_ignore();
+		}
+		| T_SYMBOL
+		{
+			add_order_symbol($1);
+		}
+		| T_QUOTE order_str T_QUOTE
+		{
+			add_order_subst();
+		}
+		;
+
+order_str	: order_str order_stritem
+		| order_stritem
+		;
+
+order_stritem	: T_CHAR
+		{
+			add_subst_char($1);
+		}
+		| T_COLLSYM
+		{
+			add_subst_collsym($1);
+		}
+		| T_COLLELEM
+		{
+			add_subst_collelem($1);
+		}
+		| T_SYMBOL
+		{
+			add_subst_symbol($1);
+		}
+		;
+
+messages	: T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL
+		{
+			dump_messages();
+		}
+		| T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL
+		;
+
+messages_list	: messages_list messages_item
+		| messages_item
+		;
+
+messages_kw	: T_YESSTR
+		| T_NOSTR
+		| T_YESEXPR
+		| T_NOEXPR
+		;
+
+messages_item	: messages_kw string T_NL
+		{
+			add_message(get_wcs());
+		}
+		;
+
+monetary	: T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL
+		{
+			dump_monetary();
+		}
+		| T_MONETARY T_NL copycat T_END T_MONETARY T_NL
+		;
+
+monetary_list	: monetary_list monetary_kw
+		| monetary_kw
+		;
+
+monetary_strkw	: T_INT_CURR_SYMBOL
+		| T_CURRENCY_SYMBOL
+		| T_MON_DECIMAL_POINT
+		| T_MON_THOUSANDS_SEP
+		| T_POSITIVE_SIGN
+		| T_NEGATIVE_SIGN
+		;
+
+monetary_numkw	: T_INT_FRAC_DIGITS
+		| T_FRAC_DIGITS
+		| T_P_CS_PRECEDES
+		| T_P_SEP_BY_SPACE
+		| T_N_CS_PRECEDES
+		| T_N_SEP_BY_SPACE
+		| T_P_SIGN_POSN
+		| T_N_SIGN_POSN
+		| T_INT_P_CS_PRECEDES
+		| T_INT_N_CS_PRECEDES
+		| T_INT_P_SEP_BY_SPACE
+		| T_INT_N_SEP_BY_SPACE
+		| T_INT_P_SIGN_POSN
+		| T_INT_N_SIGN_POSN
+		;
+
+monetary_kw	: monetary_strkw string T_NL
+		{
+			add_monetary_str(get_wcs());
+		}
+		| monetary_numkw T_NUMBER T_NL
+		{
+			add_monetary_num($2);
+		}
+		| T_MON_GROUPING mon_group_list T_NL
+		;
+
+mon_group_list	: T_NUMBER
+		{
+			reset_monetary_group();
+			add_monetary_group($1);
+		}
+		| mon_group_list T_SEMI T_NUMBER
+		{
+			add_monetary_group($3);
+		}
+		;
+
+
+numeric		: T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL
+		{
+			dump_numeric();
+		}
+		| T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL
+		;
+
+
+numeric_list	: numeric_list numeric_item
+		| numeric_item
+		;
+
+
+numeric_item	: numeric_strkw string T_NL
+		{
+			add_numeric_str(get_wcs());
+		}
+		| T_GROUPING group_list T_NL
+		;
+
+numeric_strkw	: T_DECIMAL_POINT
+		| T_THOUSANDS_SEP
+		;
+
+
+group_list	: T_NUMBER
+		{
+			reset_numeric_group();
+			add_numeric_group($1);
+		}
+		| group_list T_SEMI T_NUMBER
+		{
+			add_numeric_group($3);
+		}
+		;
+
+
+time		: T_TIME T_NL time_kwlist T_END T_TIME T_NL
+		{
+			dump_time();
+		}
+		| T_TIME T_NL copycat T_END T_NUMERIC T_NL
+		;
+
+time_kwlist	: time_kwlist time_kw
+		| time_kw
+		;
+
+time_kw		: time_strkw string T_NL
+		{
+			add_time_str(get_wcs());
+		}
+		| time_listkw time_list T_NL
+		{
+			check_time_list();
+		}
+		;
+
+time_listkw	: T_ABDAY
+		| T_DAY
+		| T_ABMON
+		| T_MON
+		| T_ERA
+		| T_ALT_DIGITS
+		| T_AM_PM
+		;
+
+time_strkw	: T_ERA_D_T_FMT
+		| T_ERA_T_FMT
+		| T_ERA_D_FMT
+		| T_D_T_FMT
+		| T_D_FMT
+		| T_T_FMT
+		| T_T_FMT_AMPM
+		| T_DATE_FMT
+		;
+
+time_list	: time_list T_SEMI string
+		{
+			add_time_list(get_wcs());
+		}
+		| string
+		{
+			reset_time_list();
+			add_time_list(get_wcs());
+		}
+		;
diff --git a/usr.bin/localedef/scanner.c b/usr.bin/localedef/scanner.c
new file mode 100644
index 000000000000..0b57e5bd120e
--- /dev/null
+++ b/usr.bin/localedef/scanner.c
@@ -0,0 +1,866 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This file contains the "scanner", which tokenizes the input files
+ * for localedef for processing by the higher level grammar processor.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#include <wchar.h>
+#include <sys/types.h>
+#include <assert.h>
+#include "localedef.h"
+#include "parser.h"
+
+int			com_char = '#';
+int			esc_char = '\\';
+int			mb_cur_min = 1;
+int			mb_cur_max = 1;
+int			lineno = 1;
+int			warnings = 0;
+int			is_stdin = 1;
+FILE			*input;
+static int		nextline;
+//static FILE		*input = stdin;
+static const char	*filename = "<stdin>";
+static int		instring = 0;
+static int		escaped = 0;
+
+/*
+ * Token space ... grows on demand.
+ */
+static char *token = NULL;
+static int tokidx;
+static int toksz = 0;
+static int hadtok = 0;
+
+/*
+ * Wide string space ... grows on demand.
+ */
+static wchar_t *widestr = NULL;
+static int wideidx = 0;
+static int widesz = 0;
+
+/*
+ * The last keyword seen.  This is useful to trigger the special lexer rules
+ * for "copy" and also collating symbols and elements.
+ */
+int	last_kw = 0;
+static int	category = T_END;
+
+static struct token {
+	int id;
+	const char *name;
+} keywords[] = {
+	{ T_COM_CHAR,		"comment_char" },
+	{ T_ESC_CHAR,		"escape_char" },
+	{ T_END,		"END" },
+	{ T_COPY,		"copy" },
+	{ T_MESSAGES,		"LC_MESSAGES" },
+	{ T_YESSTR,		"yesstr" },
+	{ T_YESEXPR,		"yesexpr" },
+	{ T_NOSTR,		"nostr" },
+	{ T_NOEXPR,		"noexpr" },
+	{ T_MONETARY,		"LC_MONETARY" },
+	{ T_INT_CURR_SYMBOL,	"int_curr_symbol" },
+	{ T_CURRENCY_SYMBOL,	"currency_symbol" },
+	{ T_MON_DECIMAL_POINT,	"mon_decimal_point" },
+	{ T_MON_THOUSANDS_SEP,	"mon_thousands_sep" },
+	{ T_POSITIVE_SIGN,	"positive_sign" },
+	{ T_NEGATIVE_SIGN,	"negative_sign" },
+	{ T_MON_GROUPING,	"mon_grouping" },
+	{ T_INT_FRAC_DIGITS,	"int_frac_digits" },
+	{ T_FRAC_DIGITS,	"frac_digits" },
+	{ T_P_CS_PRECEDES,	"p_cs_precedes" },
+	{ T_P_SEP_BY_SPACE,	"p_sep_by_space" },
+	{ T_N_CS_PRECEDES,	"n_cs_precedes" },
+	{ T_N_SEP_BY_SPACE,	"n_sep_by_space" },
+	{ T_P_SIGN_POSN,	"p_sign_posn" },
+	{ T_N_SIGN_POSN,	"n_sign_posn" },
+	{ T_INT_P_CS_PRECEDES,	"int_p_cs_precedes" },
+	{ T_INT_N_CS_PRECEDES,	"int_n_cs_precedes" },
+	{ T_INT_P_SEP_BY_SPACE,	"int_p_sep_by_space" },
+	{ T_INT_N_SEP_BY_SPACE,	"int_n_sep_by_space" },
+	{ T_INT_P_SIGN_POSN,	"int_p_sign_posn" },
+	{ T_INT_N_SIGN_POSN,	"int_n_sign_posn" },
+	{ T_COLLATE,		"LC_COLLATE" },
+	{ T_COLLATING_SYMBOL,	"collating-symbol" },
+	{ T_COLLATING_ELEMENT,	"collating-element" },
+	{ T_FROM,		"from" },
+	{ T_ORDER_START,	"order_start" },
+	{ T_ORDER_END,		"order_end" },
+	{ T_FORWARD,		"forward" },
+	{ T_BACKWARD,		"backward" },
+	{ T_POSITION,		"position" },
+	{ T_IGNORE,		"IGNORE" },
+	{ T_UNDEFINED,		"UNDEFINED" },
+	{ T_NUMERIC,		"LC_NUMERIC" },
+	{ T_DECIMAL_POINT,	"decimal_point" },
+	{ T_THOUSANDS_SEP,	"thousands_sep" },
+	{ T_GROUPING,		"grouping" },
+	{ T_TIME,		"LC_TIME" },
+	{ T_ABDAY,		"abday" },
+	{ T_DAY,		"day" },
+	{ T_ABMON,		"abmon" },
+	{ T_MON,		"mon" },
+	{ T_D_T_FMT,		"d_t_fmt" },
+	{ T_D_FMT,		"d_fmt" },
+	{ T_T_FMT,		"t_fmt" },
+	{ T_AM_PM,		"am_pm" },
+	{ T_T_FMT_AMPM,		"t_fmt_ampm" },
+	{ T_ERA,		"era" },
+	{ T_ERA_D_FMT,		"era_d_fmt" },
+	{ T_ERA_T_FMT,		"era_t_fmt" },
+	{ T_ERA_D_T_FMT,	"era_d_t_fmt" },
+	{ T_ALT_DIGITS,		"alt_digits" },
+	{ T_CTYPE,		"LC_CTYPE" },
+	{ T_ISUPPER,		"upper" },
+	{ T_ISLOWER,		"lower" },
+	{ T_ISALPHA,		"alpha" },
+	{ T_ISDIGIT,		"digit" },
+	{ T_ISPUNCT,		"punct" },
+	{ T_ISXDIGIT,		"xdigit" },
+	{ T_ISSPACE,		"space" },
+	{ T_ISPRINT,		"print" },
+	{ T_ISGRAPH,		"graph" },
+	{ T_ISBLANK,		"blank" },
+	{ T_ISCNTRL,		"cntrl" },
+	/*
+	 * These entries are local additions, and not specified by
+	 * TOG.  Note that they are not guaranteed to be accurate for
+	 * all locales, and so applications should not depend on them.
+	 */
+	{ T_ISSPECIAL,		"special" },
+	{ T_ISENGLISH,		"english" },
+	{ T_ISPHONOGRAM,	"phonogram" },
+	{ T_ISIDEOGRAM,		"ideogram" },
+	{ T_ISNUMBER,		"number" },
+	/*
+	 * We have to support this in the grammar, but it would be a
+	 * syntax error to define a character as one of these without
+	 * also defining it as an alpha or digit.  We ignore it in our
+	 * parsing.
+	 */
+	{ T_ISALNUM,		"alnum" },
+	{ T_TOUPPER,		"toupper" },
+	{ T_TOLOWER,		"tolower" },
+
+	/*
+	 * These are keywords used in the charmap file.  Note that
+	 * Solaris orginally used angle brackets to wrap some of them,
+	 * but we removed that to simplify our parser.  The first of these
+	 * items are "global items."
+	 */
+	{ T_CHARMAP,		"CHARMAP" },
+	{ T_WIDTH,		"WIDTH" },
+
+	{ -1, NULL },
+};
+
+/*
+ * These special words are only used in a charmap file, enclosed in <>.
+ */
+static struct token symwords[] = {
+	{ T_COM_CHAR,		"comment_char" },
+	{ T_ESC_CHAR,		"escape_char" },
+	{ T_CODE_SET,		"code_set_name" },
+	{ T_MB_CUR_MAX,		"mb_cur_max" },
+	{ T_MB_CUR_MIN,		"mb_cur_min" },
+	{ -1, NULL },
+};
+
+static int categories[] = {
+	T_CHARMAP,
+	T_CTYPE,
+	T_COLLATE,
+	T_MESSAGES,
+	T_MONETARY,
+	T_NUMERIC,
+	T_TIME,
+	T_WIDTH,
+	0
+};
+
+void
+reset_scanner(const char *fname)
+{
+	if (fname == NULL) {
+		filename = "<stdin>";
+		is_stdin = 1;
+	} else {
+		if (!is_stdin)
+			(void) fclose(input);
+		if ((input = fopen(fname, "r")) == NULL) {
+			perror("fopen");
+			exit(4);
+		} else {
+			is_stdin = 0;
+		}
+		filename = fname;
+	}
+	com_char = '#';
+	esc_char = '\\';
+	instring = 0;
+	escaped = 0;
+	lineno = 1;
+	nextline = 1;
+	tokidx = 0;
+	wideidx = 0;
+}
+
+#define	hex(x)	\
+	(isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
+#define	isodigit(x)	((x >= '0') && (x <= '7'))
+
+static int
+scanc(void)
+{
+	int	c;
+
+	if (is_stdin)
+		c = getc(stdin);
+	else
+		c = getc(input);
+	lineno = nextline;
+	if (c == '\n') {
+		nextline++;
+	}
+	return (c);
+}
+
+static void
+unscanc(int c)
+{
+	if (c == '\n') {
+		nextline--;
+	}
+	if (ungetc(c, is_stdin ? stdin : input) < 0) {
+		yyerror("ungetc failed");
+	}
+}
+
+static int
+scan_hex_byte(void)
+{
+	int	c1, c2;
+	int	v;
+
+	c1 = scanc();
+	if (!isxdigit(c1)) {
+		yyerror("malformed hex digit");
+		return (0);
+	}
+	c2 = scanc();
+	if (!isxdigit(c2)) {
+		yyerror("malformed hex digit");
+		return (0);
+	}
+	v = ((hex(c1) << 4) | hex(c2));
+	return (v);
+}
+
+static int
+scan_dec_byte(void)
+{
+	int	c1, c2, c3;
+	int	b;
+
+	c1 = scanc();
+	if (!isdigit(c1)) {
+		yyerror("malformed decimal digit");
+		return (0);
+	}
+	b = c1 - '0';
+	c2 = scanc();
+	if (!isdigit(c2)) {
+		yyerror("malformed decimal digit");
+		return (0);
+	}
+	b *= 10;
+	b += (c2 - '0');
+	c3 = scanc();
+	if (!isdigit(c3)) {
+		unscanc(c3);
+	} else {
+		b *= 10;
+		b += (c3 - '0');
+	}
+	return (b);
+}
+
+static int
+scan_oct_byte(void)
+{
+	int c1, c2, c3;
+	int	b;
+
+	b = 0;
+
+	c1 = scanc();
+	if (!isodigit(c1)) {
+		yyerror("malformed octal digit");
+		return (0);
+	}
+	b = c1 - '0';
+	c2 = scanc();
+	if (!isodigit(c2)) {
+		yyerror("malformed octal digit");
+		return (0);
+	}
+	b *= 8;
+	b += (c2 - '0');
+	c3 = scanc();
+	if (!isodigit(c3)) {
+		unscanc(c3);
+	} else {
+		b *= 8;
+		b += (c3 - '0');
+	}
+	return (b);
+}
+
+void
+add_tok(int c)
+{
+	if ((tokidx + 1) >= toksz) {
+		toksz += 64;
+		if ((token = realloc(token, toksz)) == NULL) {
+			yyerror("out of memory");
+			tokidx = 0;
+			toksz = 0;
+			return;
+		}
+	}
+
+	token[tokidx++] = (char)c;
+	token[tokidx] = 0;
+}
+void
+add_wcs(wchar_t c)
+{
+	if ((wideidx + 1) >= widesz) {
+		widesz += 64;
+		widestr = realloc(widestr, (widesz * sizeof (wchar_t)));
+		if (widestr == NULL) {
+			yyerror("out of memory");
+			wideidx = 0;
+			widesz = 0;
+			return;
+		}
+	}
+
+	widestr[wideidx++] = c;
+	widestr[wideidx] = 0;
+}
+
+wchar_t *
+get_wcs(void)
+{
+	wchar_t *ws = widestr;
+	wideidx = 0;
+	widestr = NULL;
+	widesz = 0;
+	if (ws == NULL) {
+		if ((ws = wcsdup(L"")) == NULL) {
+			yyerror("out of memory");
+		}
+	}
+	return (ws);
+}
+
+static int
+get_byte(void)
+{
+	int	c;
+
+	if ((c = scanc()) != esc_char) {
+		unscanc(c);
+		return (EOF);
+	}
+	c = scanc();
+
+	switch (c) {
+	case 'd':
+	case 'D':
+		return (scan_dec_byte());
+	case 'x':
+	case 'X':
+		return (scan_hex_byte());
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		/* put the character back so we can get it */
+		unscanc(c);
+		return (scan_oct_byte());
+	default:
+		unscanc(c);
+		unscanc(esc_char);
+		return (EOF);
+	}
+}
+
+int
+get_escaped(int c)
+{
+	switch (c) {
+	case 'n':
+		return ('\n');
+	case 'r':
+		return ('\r');
+	case 't':
+		return ('\t');
+	case 'f':
+		return ('\f');
+	case 'v':
+		return ('\v');
+	case 'b':
+		return ('\b');
+	case 'a':
+		return ('\a');
+	default:
+		return (c);
+	}
+}
+
+int
+get_wide(void)
+{
+	static char mbs[MB_LEN_MAX + 1] = "";
+	static int mbi = 0;
+	int c;
+	wchar_t	wc;
+
+	if (mb_cur_max >= (int)sizeof (mbs)) {
+		yyerror("max multibyte character size too big");
+		mbi = 0;
+		return (T_NULL);
+	}
+	for (;;) {
+		if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) {
+			/*
+			 * end of the byte sequence reached, but no
+			 * valid wide decoding.  fatal error.
+			 */
+			mbi = 0;
+			yyerror("not a valid character encoding");
+			return (T_NULL);
+		}
+		mbs[mbi++] = c;
+		mbs[mbi] = 0;
+
+		/* does it decode? */
+		if (to_wide(&wc, mbs) >= 0) {
+			break;
+		}
+	}
+
+	mbi = 0;
+	if ((category != T_CHARMAP) && (category != T_WIDTH)) {
+		if (check_charmap(wc) < 0) {
+			yyerror("no symbolic name for character");
+			return (T_NULL);
+		}
+	}
+
+	yylval.wc = wc;
+	return (T_CHAR);
+}
+
+int
+get_symbol(void)
+{
+	int	c;
+
+	while ((c = scanc()) != EOF) {
+		if (escaped) {
+			escaped = 0;
+			if (c == '\n')
+				continue;
+			add_tok(get_escaped(c));
+			continue;
+		}
+		if (c == esc_char) {
+			escaped = 1;
+			continue;
+		}
+		if (c == '\n') {	/* well that's strange! */
+			yyerror("unterminated symbolic name");
+			continue;
+		}
+		if (c == '>') {		/* end of symbol */
+
+			/*
+			 * This restarts the token from the beginning
+			 * the next time we scan a character.  (This
+			 * token is complete.)
+			 */
+
+			if (token == NULL) {
+				yyerror("missing symbolic name");
+				return (T_NULL);
+			}
+			tokidx = 0;
+
+			/*
+			 * A few symbols are handled as keywords outside
+			 * of the normal categories.
+			 */
+			if (category == T_END) {
+				int i;
+				for (i = 0; symwords[i].name != 0; i++) {
+					if (strcmp(token, symwords[i].name) ==
+					    0) {
+						last_kw = symwords[i].id;
+						return (last_kw);
+					}
+				}
+			}
+			/*
+			 * Contextual rule: Only literal characters are
+			 * permitted in CHARMAP.  Anywhere else the symbolic
+			 * forms are fine.
+			 */
+			if ((category != T_CHARMAP) &&
+			    (lookup_charmap(token, &yylval.wc)) != -1) {
+				return (T_CHAR);
+			}
+			if ((yylval.collsym = lookup_collsym(token)) != NULL) {
+				return (T_COLLSYM);
+			}
+			if ((yylval.collelem = lookup_collelem(token)) !=
+			    NULL) {
+				return (T_COLLELEM);
+			}
+			/* its an undefined symbol */
+			yylval.token = strdup(token);
+			token = NULL;
+			toksz = 0;
+			tokidx = 0;
+			return (T_SYMBOL);
+		}
+		add_tok(c);
+	}
+
+	yyerror("unterminated symbolic name");
+	return (EOF);
+}
+
+int
+get_category(void)
+{
+	return (category);
+}
+
+static int
+consume_token(void)
+{
+	int	len = tokidx;
+	int	i;
+
+	tokidx = 0;
+	if (token == NULL)
+		return (T_NULL);
+
+	/*
+	 * this one is special, because we don't want it to alter the
+	 * last_kw field.
+	 */
+	if (strcmp(token, "...") == 0) {
+		return (T_ELLIPSIS);
+	}
+
+	/* search for reserved words first */
+	for (i = 0; keywords[i].name; i++) {
+		int j;
+		if (strcmp(keywords[i].name, token) != 0) {
+			continue;
+		}
+
+		last_kw = keywords[i].id;
+
+		/* clear the top level category if we're done with it */
+		if (last_kw == T_END) {
+			category = T_END;
+		}
+
+		/* set the top level category if we're changing */
+		for (j = 0; categories[j]; j++) {
+			if (categories[j] != last_kw)
+				continue;
+			category = last_kw;
+		}
+
+		return (keywords[i].id);
+	}
+
+	/* maybe its a numeric constant? */
+	if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
+		char *eptr;
+		yylval.num = strtol(token, &eptr, 10);
+		if (*eptr != 0)
+			yyerror("malformed number");
+		return (T_NUMBER);
+	}
+
+	/*
+	 * A single lone character is treated as a character literal.
+	 * To avoid duplication of effort, we stick in the charmap.
+	 */
+	if (len == 1) {
+		yylval.wc = token[0];
+		return (T_CHAR);
+	}
+
+	/* anything else is treated as a symbolic name */
+	yylval.token = strdup(token);
+	token = NULL;
+	toksz = 0;
+	tokidx = 0;
+	return (T_NAME);
+}
+
+void
+scan_to_eol(void)
+{
+	int	c;
+	while ((c = scanc()) != '\n') {
+		if (c == EOF) {
+			/* end of file without newline! */
+			errf("missing newline");
+			return;
+		}
+	}
+	assert(c == '\n');
+}
+
+int
+yylex(void)
+{
+	int		c;
+
+	while ((c = scanc()) != EOF) {
+
+		/* special handling for quoted string */
+		if (instring) {
+			if (escaped) {
+				escaped = 0;
+
+				/* if newline, just eat and forget it */
+				if (c == '\n')
+					continue;
+
+				if (strchr("xXd01234567", c)) {
+					unscanc(c);
+					unscanc(esc_char);
+					return (get_wide());
+				}
+				yylval.wc = get_escaped(c);
+				return (T_CHAR);
+			}
+			if (c == esc_char) {
+				escaped = 1;
+				continue;
+			}
+			switch (c) {
+			case '<':
+				return (get_symbol());
+			case '>':
+				/* oops! should generate syntax error  */
+				return (T_GT);
+			case '"':
+				instring = 0;
+				return (T_QUOTE);
+			default:
+				yylval.wc = c;
+				return (T_CHAR);
+			}
+		}
+
+		/* escaped characters first */
+		if (escaped) {
+			escaped = 0;
+			if (c == '\n') {
+				/* eat the newline */
+				continue;
+			}
+			hadtok = 1;
+			if (tokidx) {
+				/* an escape mid-token is nonsense */
+				return (T_NULL);
+			}
+
+			/* numeric escapes are treated as wide characters */
+			if (strchr("xXd01234567", c)) {
+				unscanc(c);
+				unscanc(esc_char);
+				return (get_wide());
+			}
+
+			add_tok(get_escaped(c));
+			continue;
+		}
+
+		/* if it is the escape charter itself note it */
+		if (c == esc_char) {
+			escaped = 1;
+			continue;
+		}
+
+		/* remove from the comment char to end of line */
+		if (c == com_char) {
+			while (c != '\n') {
+				if ((c = scanc()) == EOF) {
+					/* end of file without newline! */
+					return (EOF);
+				}
+			}
+			assert(c == '\n');
+			if (!hadtok) {
+				/*
+				 * If there were no tokens on this line,
+				 * then just pretend it didn't exist at all.
+				 */
+				continue;
+			}
+			hadtok = 0;
+			return (T_NL);
+		}
+
+		if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
+			/*
+			 * These are all token delimiters.  If there
+			 * is a token already in progress, we need to
+			 * process it.
+			 */
+			unscanc(c);
+			return (consume_token());
+		}
+
+		switch (c) {
+		case '\n':
+			if (!hadtok) {
+				/*
+				 * If the line was completely devoid of tokens,
+				 * then just ignore it.
+				 */
+				continue;
+			}
+			/* we're starting a new line, reset the token state */
+			hadtok = 0;
+			return (T_NL);
+		case ',':
+			hadtok = 1;
+			return (T_COMMA);
+		case ';':
+			hadtok = 1;
+			return (T_SEMI);
+		case '(':
+			hadtok = 1;
+			return (T_LPAREN);
+		case ')':
+			hadtok = 1;
+			return (T_RPAREN);
+		case '>':
+			hadtok = 1;
+			return (T_GT);
+		case '<':
+			/* symbol start! */
+			hadtok = 1;
+			return (get_symbol());
+		case ' ':
+		case '\t':
+			/* whitespace, just ignore it */
+			continue;
+		case '"':
+			hadtok = 1;
+			instring = 1;
+			return (T_QUOTE);
+		default:
+			hadtok = 1;
+			add_tok(c);
+			continue;
+		}
+	}
+	return (EOF);
+}
+
+void
+yyerror(const char *msg)
+{
+	(void) fprintf(stderr, "%s: %d: error: %s\n",
+	    filename, lineno, msg);
+	exit(4);
+}
+
+void
+errf(const char *fmt, ...)
+{
+	char	*msg;
+
+	va_list	va;
+	va_start(va, fmt);
+	(void) vasprintf(&msg, fmt, va);
+	va_end(va);
+
+	(void) fprintf(stderr, "%s: %d: error: %s\n",
+	    filename, lineno, msg);
+	free(msg);
+	exit(4);
+}
+
+void
+warn(const char *fmt, ...)
+{
+	char	*msg;
+
+	va_list	va;
+	va_start(va, fmt);
+	(void) vasprintf(&msg, fmt, va);
+	va_end(va);
+
+	(void) fprintf(stderr, "%s: %d: warning: %s\n",
+	    filename, lineno, msg);
+	free(msg);
+	warnings++;
+	if (!warnok)
+		exit(4);
+}
diff --git a/usr.bin/localedef/time.c b/usr.bin/localedef/time.c
new file mode 100644
index 000000000000..8a8c7107918f
--- /dev/null
+++ b/usr.bin/localedef/time.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * LC_TIME database generation routines for localedef.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <string.h>
+#include <unistd.h>
+#include "localedef.h"
+#include "parser.h"
+#include "timelocal.h"
+
+struct lc_time_T tm;
+
+void
+init_time(void)
+{
+	(void) memset(&tm, 0, sizeof (tm));
+}
+
+void
+add_time_str(wchar_t *wcs)
+{
+	char	*str;
+
+	if ((str = to_mb_string(wcs)) == NULL) {
+		INTERR;
+		return;
+	}
+	free(wcs);
+
+	switch (last_kw) {
+	case T_D_T_FMT:
+		tm.c_fmt = str;
+		break;
+	case T_D_FMT:
+		tm.x_fmt = str;
+		break;
+	case T_T_FMT:
+		tm.X_fmt = str;
+		break;
+	case T_T_FMT_AMPM:
+		tm.ampm_fmt = str;
+		break;
+	case T_DATE_FMT:
+		/*
+		 * This one is a Solaris extension, Too bad date just
+		 * doesn't use %c, which would be simpler.
+		 */
+		tm.date_fmt = str;
+		break;
+	case T_ERA_D_FMT:
+	case T_ERA_T_FMT:
+	case T_ERA_D_T_FMT:
+		/* Silently ignore it. */
+		break;
+	default:
+		free(str);
+		INTERR;
+		break;
+	}
+}
+
+static void
+add_list(const char *ptr[], char *str, int limit)
+{
+	int	i;
+	for (i = 0; i < limit; i++) {
+		if (ptr[i] == NULL) {
+			ptr[i] = str;
+			return;
+		}
+	}
+	fprintf(stderr,"too many list elements");
+}
+
+void
+add_time_list(wchar_t *wcs)
+{
+	char *str;
+
+	if ((str = to_mb_string(wcs)) == NULL) {
+		INTERR;
+		return;
+	}
+	free(wcs);
+
+	switch (last_kw) {
+	case T_ABMON:
+		add_list(tm.mon, str, 12);
+		break;
+	case T_MON:
+		add_list(tm.month, str, 12);
+		break;
+	case T_ABDAY:
+		add_list(tm.wday, str, 7);
+		break;
+	case T_DAY:
+		add_list(tm.weekday, str, 7);
+		break;
+	case T_AM_PM:
+		if (tm.am == NULL) {
+			tm.am = str;
+		} else if (tm.pm == NULL) {
+			tm.pm = str;
+		} else {
+			fprintf(stderr,"too many list elements");
+		}
+		break;
+	case T_ALT_DIGITS:
+	case T_ERA:
+		free(str);
+		break;
+	default:
+		free(str);
+		INTERR;
+		break;
+	}
+}
+
+void
+check_time_list(void)
+{
+	switch (last_kw) {
+	case T_ABMON:
+		if (tm.mon[11] != NULL)
+			return;
+		break;
+	case T_MON:
+		if (tm.month[11] != NULL)
+			return;
+		break;
+	case T_ABDAY:
+		if (tm.wday[6] != NULL)
+			return;
+		break;
+	case T_DAY:
+		if (tm.weekday[6] != NULL)
+			return;
+		break;
+	case T_AM_PM:
+		if (tm.pm != NULL)
+			return;
+		break;
+	case T_ERA:
+	case T_ALT_DIGITS:
+		return;
+	default:
+		fprintf(stderr,"unknown list");
+		break;
+	}
+
+	fprintf(stderr,"too few items in list (%d)", last_kw);
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+reset_time_list(void)
+{
+	int i;
+	switch (last_kw) {
+	case T_ABMON:
+		for (i = 0; i < 12; i++) {
+			free((char *)tm.mon[i]);
+			tm.mon[i] = NULL;
+		}
+		break;
+	case T_MON:
+		for (i = 0; i < 12; i++) {
+			free((char *)tm.month[i]);
+			tm.month[i] = NULL;
+		}
+		break;
+	case T_ABDAY:
+		for (i = 0; i < 7; i++) {
+			free((char *)tm.wday[i]);
+			tm.wday[i] = NULL;
+		}
+		break;
+	case T_DAY:
+		for (i = 0; i < 7; i++) {
+			free((char *)tm.weekday[i]);
+			tm.weekday[i] = NULL;
+		}
+		break;
+	case T_AM_PM:
+		free((char *)tm.am);
+		tm.am = NULL;
+		free((char *)tm.pm);
+		tm.pm = NULL;
+		break;
+	}
+}
+
+#pragma GCC diagnostic pop
+
+void
+dump_time(void)
+{
+	FILE *f;
+	int i;
+
+	if ((f = open_category()) == NULL) {
+		return;
+	}
+
+	for (i = 0; i < 12; i++) {
+		if (putl_category(tm.mon[i], f) == EOF) {
+			return;
+		}
+	}
+	for (i = 0; i < 12; i++) {
+		if (putl_category(tm.month[i], f) == EOF) {
+			return;
+		}
+	}
+	for (i = 0; i < 7; i++) {
+		if (putl_category(tm.wday[i], f) == EOF) {
+			return;
+		}
+	}
+	for (i = 0; i < 7; i++) {
+		if (putl_category(tm.weekday[i], f) == EOF) {
+			return;
+		}
+	}
+
+	/*
+	 * NOTE: If date_fmt is not specified, then we'll default to
+	 * using the %c for date.  This is reasonable for most
+	 * locales, although for reasons that I don't understand
+	 * Solaris historically has had a seperate format for date.
+	 */
+	if ((putl_category(tm.X_fmt, f) == EOF) ||
+	    (putl_category(tm.x_fmt, f) == EOF) ||
+	    (putl_category(tm.c_fmt, f) == EOF) ||
+	    (putl_category(tm.am, f) == EOF) ||
+	    (putl_category(tm.pm, f) == EOF) ||
+	    (putl_category(tm.date_fmt ? tm.date_fmt : tm.c_fmt, f) == EOF) ||
+	    (putl_category(tm.ampm_fmt, f) == EOF)) {
+		return;
+	}
+	close_category(f);
+}
diff --git a/usr.bin/localedef/wide.c b/usr.bin/localedef/wide.c
new file mode 100644
index 000000000000..1c57fed2de7c
--- /dev/null
+++ b/usr.bin/localedef/wide.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2012 Garrett D'Amore <garrett@damore.org>  All rights reserved.
+ * Copyright 2015 John Marino <draco@marino.st>
+ *
+ * This source code is derived from the illumos localedef command, and
+ * provided under BSD-style license terms by Nexenta Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * The functions in this file convert from the standard multibyte forms
+ * to the wide character forms used internally by libc.  Unfortunately,
+ * this approach means that we need a method for each and every encoding.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+#include <sys/types.h>
+#include "localedef.h"
+
+static int towide_none(wchar_t *, const char *, unsigned);
+static int towide_utf8(wchar_t *, const char *, unsigned);
+static int towide_big5(wchar_t *, const char *, unsigned);
+static int towide_gbk(wchar_t *, const char *, unsigned);
+static int towide_gb2312(wchar_t *, const char *, unsigned);
+static int towide_gb18030(wchar_t *, const char *, unsigned);
+static int towide_mskanji(wchar_t *, const char *, unsigned);
+static int towide_euccn(wchar_t *, const char *, unsigned);
+static int towide_eucjp(wchar_t *, const char *, unsigned);
+static int towide_euckr(wchar_t *, const char *, unsigned);
+static int towide_euctw(wchar_t *, const char *, unsigned);
+
+static int tomb_none(char *, wchar_t);
+static int tomb_utf8(char *, wchar_t);
+static int tomb_mbs(char *, wchar_t);
+
+static int (*_towide)(wchar_t *, const char *, unsigned) = towide_none;
+static int (*_tomb)(char *, wchar_t) = tomb_none;
+static const char *_encoding = "NONE";
+static int _nbits = 7;
+
+/*
+ * Table of supported encodings.  We only bother to list the multibyte
+ * encodings here, because single byte locales are handed by "NONE".
+ */
+static struct {
+	const char *name;
+	/* the name that the underlying libc implemenation uses */
+	const char *cname;
+	/* the maximum number of bits required for priorities */
+	int nbits;
+	int (*towide)(wchar_t *, const char *, unsigned);
+	int (*tomb)(char *, wchar_t);
+} mb_encodings[] = {
+	/*
+	 * UTF8 values max out at 0x1fffff (although in theory there could
+	 * be later extensions, but it won't happen.)  This means we only need
+	 * 21 bits to be able to encode the entire range of priorities.
+	 */
+	{ "UTF-8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
+	{ "UTF8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
+	{ "utf8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
+	{ "utf-8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
+
+	{ "EUC-CN",	"EUC-CN",	16, towide_euccn, tomb_mbs },
+	{ "eucCN",	"EUC-CN",	16, towide_euccn, tomb_mbs },
+	/*
+	 * Becuase the 3-byte form of EUC-JP use the same leading byte,
+	 * only 17 bits required to provide unique priorities.  (The low
+	 * bit of that first byte is set.)  By setting this value low,
+	 * we can get by with only 3 bytes in the strxfrm expansion.
+	 */
+	{ "EUC-JP",	"EUC-JP",	17, towide_eucjp, tomb_mbs },
+	{ "eucJP",	"EUC-JP",	17, towide_eucjp, tomb_mbs },
+
+	{ "EUC-KR",	"EUC-KR",	16, towide_euckr, tomb_mbs },
+	{ "eucKR",	"EUC-KR",	16, towide_euckr, tomb_mbs },
+	/*
+	 * EUC-TW uses 2 bytes most of the time, but 4 bytes if the
+	 * high order byte is 0x8E.  However, with 4 byte encodings,
+	 * the third byte will be A0-B0.  So we only need to consider
+	 * the lower order 24 bits for collation.
+	 */
+	{ "EUC-TW",	"EUC-TW",	24, towide_euctw, tomb_mbs },
+	{ "eucTW",	"EUC-TW",	24, towide_euctw, tomb_mbs },
+
+	{ "MS_Kanji",	"MSKanji",	16, towide_mskanji, tomb_mbs },
+	{ "MSKanji",	"MSKanji",	16, towide_mskanji, tomb_mbs },
+	{ "PCK",	"MSKanji",	16, towide_mskanji, tomb_mbs },
+	{ "SJIS",	"MSKanji",	16, towide_mskanji, tomb_mbs },
+	{ "Shift_JIS",	"MSKanji",	16, towide_mskanji, tomb_mbs },
+
+	{ "BIG5",	"BIG5",		16, towide_big5, tomb_mbs },
+	{ "big5",	"BIG5",		16, towide_big5, tomb_mbs },
+	{ "Big5",	"BIG5",		16, towide_big5, tomb_mbs },
+
+	{ "GBK",	"GBK",		16, towide_gbk,	tomb_mbs },
+
+	/*
+	 * GB18030 can get away with just 31 bits.  This is because the
+	 * high order bit is always set for 4 byte values, and the
+	 * at least one of the other bits in that 4 byte value will
+	 * be non-zero.
+	 */
+	{ "GB18030",	"GB18030",	31, towide_gb18030, tomb_mbs },
+
+	/*
+	 * This should probably be an aliase for euc-cn, or vice versa.
+	 */
+	{ "GB2312",	"GB2312",	16, towide_gb2312, tomb_mbs },
+
+	{ NULL, NULL, 0, 0, 0 },
+};
+
+static char *
+show_mb(const char *mb)
+{
+	static char buf[64];
+
+	/* ASCII stuff we just print */
+	if (isascii(*mb) && isgraph(*mb)) {
+		buf[0] = *mb;
+		buf[1] = 0;
+		return (buf);
+	}
+	buf[0] = 0;
+	while (*mb != 0) {
+		char scr[8];
+		(void) snprintf(scr, sizeof (scr), "\\x%02x", *mb);
+		(void) strlcat(buf, scr, sizeof (buf));
+		mb++;
+	}
+	return (buf);
+}
+
+static char	*widemsg;
+
+void
+werr(const char *fmt, ...)
+{
+	char	*msg;
+
+	va_list	va;
+	va_start(va, fmt);
+	(void) vasprintf(&msg, fmt, va);
+	va_end(va);
+
+	free(widemsg);
+	widemsg = msg;
+}
+
+/*
+ * This is used for 8-bit encodings.
+ */
+int
+towide_none(wchar_t *c, const char *mb, unsigned n __unused)
+{
+	if (mb_cur_max != 1) {
+		werr("invalid or unsupported multibyte locale");
+		return (-1);
+	}
+	*c = (uint8_t)*mb;
+	return (1);
+}
+
+int
+tomb_none(char *mb, wchar_t wc)
+{
+	if (mb_cur_max != 1) {
+		werr("invalid or unsupported multibyte locale");
+		return (-1);
+	}
+	*(uint8_t *)mb = (wc & 0xff);
+	mb[1] = 0;
+	return (1);
+}
+
+/*
+ * UTF-8 stores wide characters in UTF-32 form.
+ */
+int
+towide_utf8(wchar_t *wc, const char *mb, unsigned n)
+{
+	wchar_t	c;
+	int	nb;
+	int	lv;	/* lowest legal value */
+	int	i;
+	const uint8_t *s = (const uint8_t *)mb;
+
+	c = *s;
+
+	if ((c & 0x80) == 0) {
+		/* 7-bit ASCII */
+		*wc = c;
+		return (1);
+	} else if ((c & 0xe0) == 0xc0) {
+		/* u80-u7ff - two bytes encoded */
+		nb = 2;
+		lv = 0x80;
+		c &= ~0xe0;
+	} else if ((c & 0xf0) == 0xe0) {
+		/* u800-uffff - three bytes encoded */
+		nb = 3;
+		lv = 0x800;
+		c &= ~0xf0;
+	} else if ((c & 0xf8) == 0xf0) {
+		/* u1000-u1fffff - four bytes encoded */
+		nb = 4;
+		lv = 0x1000;
+		c &= ~0xf8;
+	} else {
+		/* 5 and 6 byte encodings are not legal unicode */
+		werr("utf8 encoding too large (%s)", show_mb(mb));
+		return (-1);
+	}
+	if (nb > (int)n) {
+		werr("incomplete utf8 sequence (%s)", show_mb(mb));
+		return (-1);
+	}
+
+	for (i = 1; i < nb; i++) {
+		if (((s[i]) & 0xc0) != 0x80) {
+			werr("illegal utf8 byte (%x)", s[i]);
+			return (-1);
+		}
+		c <<= 6;
+		c |= (s[i] & 0x3f);
+	}
+
+	if (c < lv) {
+		werr("illegal redundant utf8 encoding (%s)", show_mb(mb));
+		return (-1);
+	}
+	*wc = c;
+	return (nb);
+}
+
+int
+tomb_utf8(char *mb, wchar_t wc)
+{
+	uint8_t *s = (uint8_t *)mb;
+	uint8_t msk;
+	int cnt;
+	int i;
+
+	if (wc <= 0x7f) {
+		s[0] = wc & 0x7f;
+		s[1] = 0;
+		return (1);
+	}
+	if (wc <= 0x7ff) {
+		cnt = 2;
+		msk = 0xc0;
+	} else if (wc <= 0xffff) {
+		cnt = 3;
+		msk = 0xe0;
+	} else if (wc <= 0x1fffff) {
+		cnt = 4;
+		msk = 0xf0;
+	} else {
+		werr("illegal uf8 char (%x)", wc);
+		return (-1);
+	}
+	for (i = cnt - 1; i; i--) {
+		s[i] = (wc & 0x3f) | 0x80;
+		wc >>= 6;
+	}
+	s[0] = (msk) | wc;
+	s[cnt] = 0;
+	return (cnt);
+}
+
+/*
+ * Several encodings share a simplistic dual byte encoding.  In these
+ * forms, they all indicate that a two byte sequence is to be used if
+ * the first byte has its high bit set.  They all store this simple
+ * encoding as a 16-bit value, although a great many of the possible
+ * code points are not used in most character sets.  This gives a possible
+ * set of just over 32,000 valid code points.
+ *
+ * 0x00 - 0x7f		- 1 byte encoding
+ * 0x80 - 0x7fff	- illegal
+ * 0x8000 - 0xffff	- 2 byte encoding
+ */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+static int
+towide_dbcs(wchar_t *wc, const char *mb, unsigned n)
+{
+	wchar_t	c;
+
+	c = *(uint8_t *)mb;
+
+	if ((c & 0x80) == 0) {
+		/* 7-bit */
+		*wc = c;
+		return (1);
+	}
+	if (n < 2) {
+		werr("incomplete character sequence (%s)", show_mb(mb));
+		return (-1);
+	}
+
+	/* Store both bytes as a single 16-bit wide. */
+	c <<= 8;
+	c |= (uint8_t)(mb[1]);
+	*wc = c;
+	return (2);
+}
+
+/*
+ * Most multibyte locales just convert the wide character to the multibyte
+ * form by stripping leading null bytes, and writing the 32-bit quantity
+ * in big-endian order.
+ */
+int
+tomb_mbs(char *mb, wchar_t wc)
+{
+	uint8_t *s = (uint8_t *)mb;
+	int 	n = 0, c;
+
+	if ((wc & 0xff000000U) != 0) {
+		n = 4;
+	} else if ((wc & 0x00ff0000U) != 0) {
+		n = 3;
+	} else if ((wc & 0x0000ff00U) != 0) {
+		n = 2;
+	} else {
+		n = 1;
+	}
+	c = n;
+	while (n) {
+		n--;
+		s[n] = wc & 0xff;
+		wc >>= 8;
+	}
+	/* ensure null termination */
+	s[c] = 0;
+	return (c);
+}
+
+
+/*
+ * big5 is a simple dual byte character set.
+ */
+int
+towide_big5(wchar_t *wc, const char *mb, unsigned n)
+{
+	return (towide_dbcs(wc, mb, n));
+}
+
+/*
+ * GBK encodes wides in the same way that big5 does, the high order
+ * bit of the first byte indicates a double byte character.
+ */
+int
+towide_gbk(wchar_t *wc, const char *mb, unsigned n)
+{
+	return (towide_dbcs(wc, mb, n));
+}
+
+/*
+ * GB2312 is another DBCS.  Its cleaner than others in that the second
+ * byte does not encode ASCII, but it supports characters.
+ */
+int
+towide_gb2312(wchar_t *wc, const char *mb, unsigned n)
+{
+	return (towide_dbcs(wc, mb, n));
+}
+
+/*
+ * GB18030.  This encodes as 8, 16, or 32-bits.
+ * 7-bit values are in 1 byte,  4 byte sequences are used when
+ * the second byte encodes 0x30-39 and all other sequences are 2 bytes.
+ */
+int
+towide_gb18030(wchar_t *wc, const char *mb, unsigned n)
+{
+	wchar_t	c;
+
+	c = *(uint8_t *)mb;
+
+	if ((c & 0x80) == 0) {
+		/* 7-bit */
+		*wc = c;
+		return (1);
+	}
+	if (n < 2) {
+		werr("incomplete character sequence (%s)", show_mb(mb));
+		return (-1);
+	}
+
+	/* pull in the second byte */
+	c <<= 8;
+	c |= (uint8_t)(mb[1]);
+
+	if (((c & 0xff) >= 0x30) && ((c & 0xff) <= 0x39)) {
+		if (n < 4) {
+			werr("incomplete 4-byte character sequence (%s)",
+			    show_mb(mb));
+			return (-1);
+		}
+		c <<= 8;
+		c |= (uint8_t)(mb[2]);
+		c <<= 8;
+		c |= (uint8_t)(mb[3]);
+		*wc = c;
+		return (4);
+	}
+
+	*wc = c;
+	return (2);
+}
+
+/*
+ * MS-Kanji (aka SJIS) is almost a clean DBCS like the others, but it
+ * also has a range of single byte characters above 0x80.  (0xa1-0xdf).
+ */
+int
+towide_mskanji(wchar_t *wc, const char *mb, unsigned n)
+{
+	wchar_t	c;
+
+	c = *(uint8_t *)mb;
+
+	if ((c < 0x80) || ((c > 0xa0) && (c < 0xe0))) {
+		/* 7-bit */
+		*wc = c;
+		return (1);
+	}
+
+	if (n < 2) {
+		werr("incomplete character sequence (%s)", show_mb(mb));
+		return (-1);
+	}
+
+	/* Store both bytes as a single 16-bit wide. */
+	c <<= 8;
+	c |= (uint8_t)(mb[1]);
+	*wc = c;
+	return (2);
+}
+
+/*
+ * EUC forms.  EUC encodings are "variable".  FreeBSD carries some additional
+ * variable data to encode these, but we're going to treat each as independent
+ * instead.  Its the only way we can sensibly move forward.
+ *
+ * Note that the way in which the different EUC forms vary is how wide
+ * CS2 and CS3 are and what the first byte of them is.
+ */
+static int
+towide_euc_impl(wchar_t *wc, const char *mb, unsigned n,
+    uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
+{
+	int i;
+	int width = 2;
+	wchar_t	c;
+
+	c = *(uint8_t *)mb;
+
+	/*
+	 * All variations of EUC encode 7-bit ASCII as one byte, and use
+	 * additional bytes for more than that.
+	 */
+	if ((c & 0x80) == 0) {
+		/* 7-bit */
+		*wc = c;
+		return (1);
+	}
+
+	/*
+	 * All EUC variants reserve 0xa1-0xff to identify CS1, which
+	 * is always two bytes wide.  Note that unused CS will be zero,
+	 * and that cannot be true because we know that the high order
+	 * bit must be set.
+	 */
+	if (c >= 0xa1) {
+		width = 2;
+	} else if (c == cs2) {
+		width = cs2width;
+	} else if (c == cs3) {
+		width = cs3width;
+	}
+
+	if ((int)n < width) {
+		werr("incomplete character sequence (%s)", show_mb(mb));
+		return (-1);
+	}
+
+	for (i = 1; i < width; i++) {
+		/* pull in the next byte */
+		c <<= 8;
+		c |= (uint8_t)(mb[i]);
+	}
+
+	*wc = c;
+	return (width);
+}
+
+#pragma GCC diagnostic pop
+
+/*
+ * EUC-CN encodes as follows:
+ *
+ * Code set 0 (ASCII):				0x21-0x7E
+ * Code set 1 (CNS 11643-1992 Plane 1):		0xA1A1-0xFEFE
+ * Code set 2:					unused
+ * Code set 3:					unused
+ */
+int
+towide_euccn(wchar_t *wc, const char *mb, unsigned n)
+{
+	return (towide_euc_impl(wc, mb, n, 0x8e, 4, 0, 0));
+}
+
+/*
+ * EUC-JP encodes as follows:
+ *
+ * Code set 0 (ASCII or JIS X 0201-1976 Roman):	0x21-0x7E
+ * Code set 1 (JIS X 0208):			0xA1A1-0xFEFE
+ * Code set 2 (half-width katakana):		0x8EA1-0x8EDF
+ * Code set 3 (JIS X 0212-1990):		0x8FA1A1-0x8FFEFE
+ */
+int
+towide_eucjp(wchar_t *wc, const char *mb, unsigned n)
+{
+	return (towide_euc_impl(wc, mb, n, 0x8e, 2, 0x8f, 3));
+}
+
+/*
+ * EUC-KR encodes as follows:
+ *
+ * Code set 0 (ASCII or KS C 5636-1993):	0x21-0x7E
+ * Code set 1 (KS C 5601-1992):			0xA1A1-0xFEFE
+ * Code set 2:					unused
+ * Code set 3:					unused
+ */
+int
+towide_euckr(wchar_t *wc, const char *mb, unsigned n)
+{
+	return (towide_euc_impl(wc, mb, n, 0, 0, 0, 0));
+}
+
+/*
+ * EUC-TW encodes as follows:
+ *
+ * Code set 0 (ASCII):				0x21-0x7E
+ * Code set 1 (CNS 11643-1992 Plane 1):		0xA1A1-0xFEFE
+ * Code set 2 (CNS 11643-1992 Planes 1-16):	0x8EA1A1A1-0x8EB0FEFE
+ * Code set 3:					unused
+ */
+int
+towide_euctw(wchar_t *wc, const char *mb, unsigned n)
+{
+	return (towide_euc_impl(wc, mb, n, 0x8e, 4, 0, 0));
+}
+
+/*
+ * Public entry points.
+ */
+
+int
+to_wide(wchar_t *wc, const char *mb)
+{
+	/* this won't fail hard */
+	return (_towide(wc, mb, strlen(mb)));
+}
+
+int
+to_mb(char *mb, wchar_t wc)
+{
+	int	rv;
+
+	if ((rv = _tomb(mb, wc)) < 0) {
+		errf(widemsg);
+		free(widemsg);
+		widemsg = NULL;
+	}
+	return (rv);
+}
+
+char *
+to_mb_string(const wchar_t *wcs)
+{
+	char	*mbs;
+	char	*ptr;
+	int	len;
+
+	mbs = malloc((wcslen(wcs) * mb_cur_max) + 1);
+	if (mbs == NULL) {
+		errf("out of memory");
+		return (NULL);
+	}
+	ptr = mbs;
+	while (*wcs) {
+		if ((len = to_mb(ptr, *wcs)) < 0) {
+			INTERR;
+			free(mbs);
+			return (NULL);
+		}
+		wcs++;
+		ptr += len;
+	}
+	*ptr = 0;
+	return (mbs);
+}
+
+void
+set_wide_encoding(const char *encoding)
+{
+	int i;
+
+	_towide = towide_none;
+	_tomb = tomb_none;
+	_encoding = "NONE";
+	_nbits = 8;
+
+	for (i = 0; mb_encodings[i].name; i++) {
+		if (strcasecmp(encoding, mb_encodings[i].name) == 0) {
+			_towide = mb_encodings[i].towide;
+			_tomb = mb_encodings[i].tomb;
+			_encoding = mb_encodings[i].cname;
+			_nbits = mb_encodings[i].nbits;
+			break;
+		}
+	}
+}
+
+const char *
+get_wide_encoding(void)
+{
+	return (_encoding);
+}
+
+int
+max_wide(void)
+{
+	return ((int)((1U << _nbits) - 1));
+}