d291df1e3f
data structures that scale better with large character sets, instead of arrays indexed by character value: - Sets of characters to delete/squeeze are stored in a new "cset" structure, which is implemented as a splay tree of extents. This structure has the ability to store character classes (ala wctype(3)), but this is not currently fully utilized. - Mappings between characters are stored in a new "cmap" structure, which is also a splay tree. - The parser no longer builds arrays containing all the characters in a particular class; instead, next() determines them on-the-fly using nextwctype(3).
84 lines
2.3 KiB
C
84 lines
2.3 KiB
C
/*-
|
|
* Copyright (c) 2004 Tim J. Robbins.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#ifndef CMAP_H
|
|
#define CMAP_H
|
|
|
|
#include <limits.h>
|
|
#include <stdbool.h>
|
|
#include <wchar.h>
|
|
|
|
struct cmapnode {
|
|
wint_t cmn_from;
|
|
wint_t cmn_to;
|
|
struct cmapnode *cmn_left;
|
|
struct cmapnode *cmn_right;
|
|
};
|
|
|
|
struct cmap {
|
|
#define CM_CACHE_SIZE 128
|
|
wint_t cm_cache[CM_CACHE_SIZE];
|
|
bool cm_havecache;
|
|
struct cmapnode *cm_root;
|
|
#define CM_DEF_SELF -2
|
|
wint_t cm_def;
|
|
wint_t cm_min;
|
|
wint_t cm_max;
|
|
};
|
|
|
|
struct cmap * cmap_alloc(void);
|
|
bool cmap_add(struct cmap *, wint_t, wint_t);
|
|
wint_t cmap_lookup_hard(struct cmap *, wint_t);
|
|
void cmap_cache(struct cmap *);
|
|
wint_t cmap_default(struct cmap *, wint_t);
|
|
|
|
static __inline wint_t
|
|
cmap_lookup(struct cmap *cm, wint_t from)
|
|
{
|
|
|
|
if (from < CM_CACHE_SIZE && cm->cm_havecache)
|
|
return (cm->cm_cache[from]);
|
|
return (cmap_lookup_hard(cm, from));
|
|
}
|
|
|
|
static __inline wint_t
|
|
cmap_min(struct cmap *cm)
|
|
{
|
|
|
|
return (cm->cm_min);
|
|
}
|
|
|
|
static __inline wint_t
|
|
cmap_max(struct cmap *cm)
|
|
{
|
|
|
|
return (cm->cm_max);
|
|
}
|
|
|
|
#endif
|