freebsd-skq/contrib/mdocml/dbm.c
bapt 6efa6107d2 Import mandoc cvs snapshot 20170121 (pre 1.14)
Note that mandoc does not use anymore sqlite3 but a home made database format
An important improvement has been made as well in makewhatis performance:
Tests on my laptop shows makewhatis on the entire system goes from 26s to 12s
2017-01-21 13:17:25 +00:00

481 lines
9.3 KiB
C

/* $Id: dbm.c,v 1.5 2016/10/18 22:27:25 schwarze Exp $ */
/*
* Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Map-based version of the mandoc database, for read-only access.
* The interface is defined in "dbm.h".
*/
#include "config.h"
#include <assert.h>
#if HAVE_ENDIAN
#include <endian.h>
#elif HAVE_SYS_ENDIAN
#include <sys/endian.h>
#elif HAVE_NTOHL
#include <arpa/inet.h>
#endif
#if HAVE_ERR
#include <err.h>
#endif
#include <errno.h>
#include <regex.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mansearch.h"
#include "dbm_map.h"
#include "dbm.h"
struct macro {
int32_t value;
int32_t pages;
};
struct page {
int32_t name;
int32_t sect;
int32_t arch;
int32_t desc;
int32_t file;
};
enum iter {
ITER_NONE = 0,
ITER_NAME,
ITER_SECT,
ITER_ARCH,
ITER_DESC,
ITER_MACRO
};
static struct macro *macros[MACRO_MAX];
static int32_t nvals[MACRO_MAX];
static struct page *pages;
static int32_t npages;
static enum iter iteration;
static struct dbm_res page_bytitle(enum iter, const struct dbm_match *);
static struct dbm_res page_byarch(const struct dbm_match *);
static struct dbm_res page_bymacro(int32_t, const struct dbm_match *);
static char *macro_bypage(int32_t, int32_t);
/*** top level functions **********************************************/
/*
* Open a disk-based mandoc database for read-only access.
* Map the pages and macros[] arrays.
* Return 0 on success. Return -1 and set errno on failure.
*/
int
dbm_open(const char *fname)
{
const int32_t *mp, *ep;
int32_t im;
if (dbm_map(fname) == -1)
return -1;
if ((npages = be32toh(*dbm_getint(4))) < 0) {
warnx("dbm_open(%s): Invalid number of pages: %d",
fname, npages);
goto fail;
}
pages = (struct page *)dbm_getint(5);
if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
warnx("dbm_open(%s): Invalid offset of macros array", fname);
goto fail;
}
if (be32toh(*mp) != MACRO_MAX) {
warnx("dbm_open(%s): Invalid number of macros: %d",
fname, be32toh(*mp));
goto fail;
}
for (im = 0; im < MACRO_MAX; im++) {
if ((ep = dbm_get(*++mp)) == NULL) {
warnx("dbm_open(%s): Invalid offset of macro %d",
fname, im);
goto fail;
}
nvals[im] = be32toh(*ep);
macros[im] = (struct macro *)++ep;
}
return 0;
fail:
dbm_unmap();
errno = EFTYPE;
return -1;
}
void
dbm_close(void)
{
dbm_unmap();
}
/*** functions for handling pages *************************************/
int32_t
dbm_page_count(void)
{
return npages;
}
/*
* Give the caller pointers to the data for one manual page.
*/
struct dbm_page *
dbm_page_get(int32_t ip)
{
static struct dbm_page res;
assert(ip >= 0);
assert(ip < npages);
res.name = dbm_get(pages[ip].name);
if (res.name == NULL)
res.name = "(NULL)";
res.sect = dbm_get(pages[ip].sect);
if (res.sect == NULL)
res.sect = "(NULL)";
res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
res.desc = dbm_get(pages[ip].desc);
if (res.desc == NULL)
res.desc = "(NULL)";
res.file = dbm_get(pages[ip].file);
if (res.file == NULL)
res.file = " (NULL)";
res.addr = dbm_addr(pages + ip);
return &res;
}
/*
* Functions to start filtered iterations over manual pages.
*/
void
dbm_page_byname(const struct dbm_match *match)
{
assert(match != NULL);
page_bytitle(ITER_NAME, match);
}
void
dbm_page_bysect(const struct dbm_match *match)
{
assert(match != NULL);
page_bytitle(ITER_SECT, match);
}
void
dbm_page_byarch(const struct dbm_match *match)
{
assert(match != NULL);
page_byarch(match);
}
void
dbm_page_bydesc(const struct dbm_match *match)
{
assert(match != NULL);
page_bytitle(ITER_DESC, match);
}
void
dbm_page_bymacro(int32_t im, const struct dbm_match *match)
{
assert(im >= 0);
assert(im < MACRO_MAX);
assert(match != NULL);
page_bymacro(im, match);
}
/*
* Return the number of the next manual page in the current iteration.
*/
struct dbm_res
dbm_page_next(void)
{
struct dbm_res res = {-1, 0};
switch(iteration) {
case ITER_NONE:
return res;
case ITER_ARCH:
return page_byarch(NULL);
case ITER_MACRO:
return page_bymacro(0, NULL);
default:
return page_bytitle(iteration, NULL);
}
}
/*
* Functions implementing the iteration over manual pages.
*/
static struct dbm_res
page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
{
static const struct dbm_match *match;
static const char *cp;
static int32_t ip;
struct dbm_res res = {-1, 0};
assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
arg_iter == ITER_SECT);
/* Initialize for a new iteration. */
if (arg_match != NULL) {
iteration = arg_iter;
match = arg_match;
switch (iteration) {
case ITER_NAME:
cp = dbm_get(pages[0].name);
break;
case ITER_SECT:
cp = dbm_get(pages[0].sect);
break;
case ITER_DESC:
cp = dbm_get(pages[0].desc);
break;
default:
abort();
}
if (cp == NULL) {
iteration = ITER_NONE;
match = NULL;
cp = NULL;
ip = npages;
} else
ip = 0;
return res;
}
/* Search for a name. */
while (ip < npages) {
if (iteration == ITER_NAME)
cp++;
if (dbm_match(match, cp))
break;
cp = strchr(cp, '\0') + 1;
if (iteration == ITER_DESC)
ip++;
else if (*cp == '\0') {
cp++;
ip++;
}
}
/* Reached the end without a match. */
if (ip == npages) {
iteration = ITER_NONE;
match = NULL;
cp = NULL;
return res;
}
/* Found a match; save the quality for later retrieval. */
res.page = ip;
res.bits = iteration == ITER_NAME ? cp[-1] : 0;
/* Skip the remaining names of this page. */
if (++ip < npages) {
do {
cp++;
} while (cp[-1] != '\0' ||
(iteration != ITER_DESC && cp[-2] != '\0'));
}
return res;
}
static struct dbm_res
page_byarch(const struct dbm_match *arg_match)
{
static const struct dbm_match *match;
struct dbm_res res = {-1, 0};
static int32_t ip;
const char *cp;
/* Initialize for a new iteration. */
if (arg_match != NULL) {
iteration = ITER_ARCH;
match = arg_match;
ip = 0;
return res;
}
/* Search for an architecture. */
for ( ; ip < npages; ip++)
if (pages[ip].arch)
for (cp = dbm_get(pages[ip].arch);
*cp != '\0';
cp = strchr(cp, '\0') + 1)
if (dbm_match(match, cp)) {
res.page = ip++;
return res;
}
/* Reached the end without a match. */
iteration = ITER_NONE;
match = NULL;
return res;
}
static struct dbm_res
page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
{
static const struct dbm_match *match;
static const int32_t *pp;
static const char *cp;
static int32_t im, iv;
struct dbm_res res = {-1, 0};
assert(im >= 0);
assert(im < MACRO_MAX);
/* Initialize for a new iteration. */
if (arg_match != NULL) {
iteration = ITER_MACRO;
match = arg_match;
im = arg_im;
cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
pp = NULL;
iv = -1;
return res;
}
if (iteration != ITER_MACRO)
return res;
/* Find the next matching macro value. */
while (pp == NULL || *pp == 0) {
if (++iv == nvals[im]) {
iteration = ITER_NONE;
return res;
}
if (iv)
cp = strchr(cp, '\0') + 1;
if (dbm_match(match, cp))
pp = dbm_get(macros[im][iv].pages);
}
/* Found a matching page. */
res.page = (struct page *)dbm_get(*pp++) - pages;
return res;
}
/*** functions for handling macros ************************************/
int32_t
dbm_macro_count(int32_t im)
{
assert(im >= 0);
assert(im < MACRO_MAX);
return nvals[im];
}
struct dbm_macro *
dbm_macro_get(int32_t im, int32_t iv)
{
static struct dbm_macro macro;
assert(im >= 0);
assert(im < MACRO_MAX);
assert(iv >= 0);
assert(iv < nvals[im]);
macro.value = dbm_get(macros[im][iv].value);
macro.pp = dbm_get(macros[im][iv].pages);
return &macro;
}
/*
* Filtered iteration over macro entries.
*/
void
dbm_macro_bypage(int32_t im, int32_t ip)
{
assert(im >= 0);
assert(im < MACRO_MAX);
assert(ip != 0);
macro_bypage(im, ip);
}
char *
dbm_macro_next(void)
{
return macro_bypage(MACRO_MAX, 0);
}
static char *
macro_bypage(int32_t arg_im, int32_t arg_ip)
{
static const int32_t *pp;
static int32_t im, ip, iv;
/* Initialize for a new iteration. */
if (arg_im < MACRO_MAX && arg_ip != 0) {
im = arg_im;
ip = arg_ip;
pp = dbm_get(macros[im]->pages);
iv = 0;
return NULL;
}
if (im >= MACRO_MAX)
return NULL;
/* Search for the next value. */
while (iv < nvals[im]) {
if (*pp == ip)
break;
if (*pp == 0)
iv++;
pp++;
}
/* Reached the end without a match. */
if (iv == nvals[im]) {
im = MACRO_MAX;
ip = 0;
pp = NULL;
return NULL;
}
/* Found a match; skip the remaining pages of this entry. */
if (++iv < nvals[im])
while (*pp++ != 0)
continue;
return dbm_get(macros[im][iv - 1].value);
}