2004-07-09 02:08:07 +00:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 2004 Tim J. Robbins.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* "Set of characters" ADT implemented as a splay tree of extents, with
|
|
|
|
* a lookup table cache to simplify looking up the first bunch of
|
|
|
|
* characters (which are presumably more common than others).
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <assert.h>
|
2009-12-13 03:14:06 +00:00
|
|
|
#include <stdbool.h>
|
2004-07-09 02:08:07 +00:00
|
|
|
#include <stdlib.h>
|
2009-12-13 03:14:06 +00:00
|
|
|
#include <wchar.h>
|
|
|
|
#include <wctype.h>
|
2004-07-09 02:08:07 +00:00
|
|
|
#include "cset.h"
|
|
|
|
|
|
|
|
static struct csnode * cset_delete(struct csnode *, wchar_t);
|
|
|
|
static __inline int cset_rangecmp(struct csnode *, wchar_t);
|
|
|
|
static struct csnode * cset_splay(struct csnode *, wchar_t);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cset_alloc --
|
|
|
|
* Allocate a set of characters.
|
|
|
|
*/
|
|
|
|
struct cset *
|
|
|
|
cset_alloc(void)
|
|
|
|
{
|
|
|
|
struct cset *cs;
|
|
|
|
|
|
|
|
if ((cs = malloc(sizeof(*cs))) == NULL)
|
|
|
|
return (NULL);
|
|
|
|
cs->cs_root = NULL;
|
|
|
|
cs->cs_classes = NULL;
|
|
|
|
cs->cs_havecache = false;
|
2004-07-10 06:28:18 +00:00
|
|
|
cs->cs_invert = false;
|
2004-07-09 02:08:07 +00:00
|
|
|
return (cs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cset_add --
|
|
|
|
* Add a character to the set.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
cset_add(struct cset *cs, wchar_t ch)
|
|
|
|
{
|
|
|
|
struct csnode *csn, *ncsn;
|
|
|
|
wchar_t oval;
|
|
|
|
|
|
|
|
cs->cs_havecache = false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Inserting into empty tree; new item becomes the root.
|
|
|
|
*/
|
|
|
|
if (cs->cs_root == NULL) {
|
|
|
|
csn = malloc(sizeof(*cs->cs_root));
|
|
|
|
if (csn == NULL)
|
|
|
|
return (false);
|
|
|
|
csn->csn_left = csn->csn_right = NULL;
|
|
|
|
csn->csn_min = csn->csn_max = ch;
|
|
|
|
cs->cs_root = csn;
|
|
|
|
return (true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Splay to check whether the item already exists, and otherwise,
|
|
|
|
* where we should put it.
|
|
|
|
*/
|
|
|
|
csn = cs->cs_root = cset_splay(cs->cs_root, ch);
|
|
|
|
|
|
|
|
/*
|
2004-07-14 08:33:14 +00:00
|
|
|
* Avoid adding duplicate nodes.
|
2004-07-09 02:08:07 +00:00
|
|
|
*/
|
|
|
|
if (cset_rangecmp(csn, ch) == 0)
|
|
|
|
return (true);
|
|
|
|
|
|
|
|
/*
|
2004-07-14 08:33:14 +00:00
|
|
|
* Allocate a new node and make it the new root.
|
2004-07-09 02:08:07 +00:00
|
|
|
*/
|
|
|
|
ncsn = malloc(sizeof(*ncsn));
|
|
|
|
if (ncsn == NULL)
|
|
|
|
return (false);
|
|
|
|
ncsn->csn_min = ncsn->csn_max = ch;
|
|
|
|
if (cset_rangecmp(csn, ch) < 0) {
|
|
|
|
ncsn->csn_left = csn->csn_left;
|
|
|
|
ncsn->csn_right = csn;
|
|
|
|
csn->csn_left = NULL;
|
|
|
|
} else {
|
|
|
|
ncsn->csn_right = csn->csn_right;
|
|
|
|
ncsn->csn_left = csn;
|
|
|
|
csn->csn_right = NULL;
|
|
|
|
}
|
|
|
|
cs->cs_root = ncsn;
|
|
|
|
|
|
|
|
/*
|
2004-07-14 08:33:14 +00:00
|
|
|
* Coalesce with left and right neighbours if possible.
|
2004-07-09 02:08:07 +00:00
|
|
|
*/
|
2004-07-14 08:33:14 +00:00
|
|
|
if (ncsn->csn_left != NULL) {
|
|
|
|
ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
|
|
|
|
if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
|
|
|
|
oval = ncsn->csn_left->csn_min;
|
|
|
|
ncsn->csn_left = cset_delete(ncsn->csn_left,
|
|
|
|
ncsn->csn_left->csn_min);
|
|
|
|
ncsn->csn_min = oval;
|
|
|
|
}
|
2004-07-09 02:08:07 +00:00
|
|
|
}
|
2004-07-14 08:33:14 +00:00
|
|
|
if (ncsn->csn_right != NULL) {
|
|
|
|
ncsn->csn_right = cset_splay(ncsn->csn_right,
|
|
|
|
ncsn->csn_max + 1);
|
|
|
|
if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
|
|
|
|
oval = ncsn->csn_right->csn_max;
|
|
|
|
ncsn->csn_right = cset_delete(ncsn->csn_right,
|
|
|
|
ncsn->csn_right->csn_min);
|
|
|
|
ncsn->csn_max = oval;
|
|
|
|
}
|
2004-07-09 02:08:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cset_in_hard --
|
|
|
|
* Determine whether a character is in the set without using
|
|
|
|
* the cache.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
cset_in_hard(struct cset *cs, wchar_t ch)
|
|
|
|
{
|
|
|
|
struct csclass *csc;
|
|
|
|
|
|
|
|
for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
|
2011-10-14 10:43:55 +00:00
|
|
|
if (csc->csc_invert ^ (iswctype(ch, csc->csc_type) != 0))
|
2004-07-09 02:08:07 +00:00
|
|
|
return (cs->cs_invert ^ true);
|
|
|
|
if (cs->cs_root != NULL) {
|
|
|
|
cs->cs_root = cset_splay(cs->cs_root, ch);
|
2011-10-14 10:43:55 +00:00
|
|
|
return (cs->cs_invert ^ (cset_rangecmp(cs->cs_root, ch) == 0));
|
2004-07-09 02:08:07 +00:00
|
|
|
}
|
|
|
|
return (cs->cs_invert ^ false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cset_cache --
|
|
|
|
* Update the cache.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
cset_cache(struct cset *cs)
|
|
|
|
{
|
|
|
|
wchar_t i;
|
|
|
|
|
|
|
|
for (i = 0; i < CS_CACHE_SIZE; i++)
|
|
|
|
cs->cs_cache[i] = cset_in_hard(cs, i);
|
|
|
|
|
|
|
|
cs->cs_havecache = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cset_invert --
|
|
|
|
* Invert the character set.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
cset_invert(struct cset *cs)
|
|
|
|
{
|
|
|
|
|
|
|
|
cs->cs_invert ^= true;
|
|
|
|
cs->cs_havecache = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cset_addclass --
|
|
|
|
* Add a wctype()-style character class to the set, optionally
|
|
|
|
* inverting it.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
cset_addclass(struct cset *cs, wctype_t type, bool invert)
|
|
|
|
{
|
|
|
|
struct csclass *csc;
|
|
|
|
|
|
|
|
csc = malloc(sizeof(*csc));
|
|
|
|
if (csc == NULL)
|
|
|
|
return (false);
|
|
|
|
csc->csc_type = type;
|
|
|
|
csc->csc_invert = invert;
|
|
|
|
csc->csc_next = cs->cs_classes;
|
|
|
|
cs->cs_classes = csc;
|
|
|
|
cs->cs_havecache = false;
|
|
|
|
return (true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline int
|
|
|
|
cset_rangecmp(struct csnode *t, wchar_t ch)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (ch < t->csn_min)
|
|
|
|
return (-1);
|
|
|
|
if (ch > t->csn_max)
|
|
|
|
return (1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct csnode *
|
|
|
|
cset_splay(struct csnode *t, wchar_t ch)
|
|
|
|
{
|
|
|
|
struct csnode N, *l, *r, *y;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Based on public domain code from Sleator.
|
|
|
|
*/
|
|
|
|
|
|
|
|
assert(t != NULL);
|
|
|
|
|
|
|
|
N.csn_left = N.csn_right = NULL;
|
|
|
|
l = r = &N;
|
|
|
|
for (;;) {
|
|
|
|
if (cset_rangecmp(t, ch) < 0) {
|
|
|
|
if (t->csn_left != NULL &&
|
|
|
|
cset_rangecmp(t->csn_left, ch) < 0) {
|
|
|
|
y = t->csn_left;
|
|
|
|
t->csn_left = y->csn_right;
|
|
|
|
y->csn_right = t;
|
|
|
|
t = y;
|
|
|
|
}
|
|
|
|
if (t->csn_left == NULL)
|
|
|
|
break;
|
|
|
|
r->csn_left = t;
|
|
|
|
r = t;
|
|
|
|
t = t->csn_left;
|
|
|
|
} else if (cset_rangecmp(t, ch) > 0) {
|
|
|
|
if (t->csn_right != NULL &&
|
|
|
|
cset_rangecmp(t->csn_right, ch) > 0) {
|
|
|
|
y = t->csn_right;
|
|
|
|
t->csn_right = y->csn_left;
|
|
|
|
y->csn_left = t;
|
|
|
|
t = y;
|
|
|
|
}
|
|
|
|
if (t->csn_right == NULL)
|
|
|
|
break;
|
|
|
|
l->csn_right = t;
|
|
|
|
l = t;
|
|
|
|
t = t->csn_right;
|
|
|
|
} else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
l->csn_right = t->csn_left;
|
|
|
|
r->csn_left = t->csn_right;
|
|
|
|
t->csn_left = N.csn_right;
|
|
|
|
t->csn_right = N.csn_left;
|
|
|
|
return (t);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct csnode *
|
|
|
|
cset_delete(struct csnode *t, wchar_t ch)
|
|
|
|
{
|
|
|
|
struct csnode *x;
|
|
|
|
|
|
|
|
assert(t != NULL);
|
|
|
|
t = cset_splay(t, ch);
|
|
|
|
assert(cset_rangecmp(t, ch) == 0);
|
|
|
|
if (t->csn_left == NULL)
|
|
|
|
x = t->csn_right;
|
|
|
|
else {
|
|
|
|
x = cset_splay(t->csn_left, ch);
|
|
|
|
x->csn_right = t->csn_right;
|
|
|
|
}
|
|
|
|
free(t);
|
|
|
|
return x;
|
|
|
|
}
|