6efa6107d2
Note that mandoc does not use anymore sqlite3 but a home made database format An important improvement has been made as well in makewhatis performance: Tests on my laptop shows makewhatis on the entire system goes from 26s to 12s
481 lines
9.3 KiB
C
481 lines
9.3 KiB
C
/* $Id: dbm.c,v 1.5 2016/10/18 22:27:25 schwarze Exp $ */
|
|
/*
|
|
* Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*
|
|
* Map-based version of the mandoc database, for read-only access.
|
|
* The interface is defined in "dbm.h".
|
|
*/
|
|
#include "config.h"
|
|
|
|
#include <assert.h>
|
|
#if HAVE_ENDIAN
|
|
#include <endian.h>
|
|
#elif HAVE_SYS_ENDIAN
|
|
#include <sys/endian.h>
|
|
#elif HAVE_NTOHL
|
|
#include <arpa/inet.h>
|
|
#endif
|
|
#if HAVE_ERR
|
|
#include <err.h>
|
|
#endif
|
|
#include <errno.h>
|
|
#include <regex.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "mansearch.h"
|
|
#include "dbm_map.h"
|
|
#include "dbm.h"
|
|
|
|
struct macro {
|
|
int32_t value;
|
|
int32_t pages;
|
|
};
|
|
|
|
struct page {
|
|
int32_t name;
|
|
int32_t sect;
|
|
int32_t arch;
|
|
int32_t desc;
|
|
int32_t file;
|
|
};
|
|
|
|
enum iter {
|
|
ITER_NONE = 0,
|
|
ITER_NAME,
|
|
ITER_SECT,
|
|
ITER_ARCH,
|
|
ITER_DESC,
|
|
ITER_MACRO
|
|
};
|
|
|
|
static struct macro *macros[MACRO_MAX];
|
|
static int32_t nvals[MACRO_MAX];
|
|
static struct page *pages;
|
|
static int32_t npages;
|
|
static enum iter iteration;
|
|
|
|
static struct dbm_res page_bytitle(enum iter, const struct dbm_match *);
|
|
static struct dbm_res page_byarch(const struct dbm_match *);
|
|
static struct dbm_res page_bymacro(int32_t, const struct dbm_match *);
|
|
static char *macro_bypage(int32_t, int32_t);
|
|
|
|
|
|
/*** top level functions **********************************************/
|
|
|
|
/*
|
|
* Open a disk-based mandoc database for read-only access.
|
|
* Map the pages and macros[] arrays.
|
|
* Return 0 on success. Return -1 and set errno on failure.
|
|
*/
|
|
int
|
|
dbm_open(const char *fname)
|
|
{
|
|
const int32_t *mp, *ep;
|
|
int32_t im;
|
|
|
|
if (dbm_map(fname) == -1)
|
|
return -1;
|
|
|
|
if ((npages = be32toh(*dbm_getint(4))) < 0) {
|
|
warnx("dbm_open(%s): Invalid number of pages: %d",
|
|
fname, npages);
|
|
goto fail;
|
|
}
|
|
pages = (struct page *)dbm_getint(5);
|
|
|
|
if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
|
|
warnx("dbm_open(%s): Invalid offset of macros array", fname);
|
|
goto fail;
|
|
}
|
|
if (be32toh(*mp) != MACRO_MAX) {
|
|
warnx("dbm_open(%s): Invalid number of macros: %d",
|
|
fname, be32toh(*mp));
|
|
goto fail;
|
|
}
|
|
for (im = 0; im < MACRO_MAX; im++) {
|
|
if ((ep = dbm_get(*++mp)) == NULL) {
|
|
warnx("dbm_open(%s): Invalid offset of macro %d",
|
|
fname, im);
|
|
goto fail;
|
|
}
|
|
nvals[im] = be32toh(*ep);
|
|
macros[im] = (struct macro *)++ep;
|
|
}
|
|
return 0;
|
|
|
|
fail:
|
|
dbm_unmap();
|
|
errno = EFTYPE;
|
|
return -1;
|
|
}
|
|
|
|
void
|
|
dbm_close(void)
|
|
{
|
|
dbm_unmap();
|
|
}
|
|
|
|
|
|
/*** functions for handling pages *************************************/
|
|
|
|
int32_t
|
|
dbm_page_count(void)
|
|
{
|
|
return npages;
|
|
}
|
|
|
|
/*
|
|
* Give the caller pointers to the data for one manual page.
|
|
*/
|
|
struct dbm_page *
|
|
dbm_page_get(int32_t ip)
|
|
{
|
|
static struct dbm_page res;
|
|
|
|
assert(ip >= 0);
|
|
assert(ip < npages);
|
|
res.name = dbm_get(pages[ip].name);
|
|
if (res.name == NULL)
|
|
res.name = "(NULL)";
|
|
res.sect = dbm_get(pages[ip].sect);
|
|
if (res.sect == NULL)
|
|
res.sect = "(NULL)";
|
|
res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
|
|
res.desc = dbm_get(pages[ip].desc);
|
|
if (res.desc == NULL)
|
|
res.desc = "(NULL)";
|
|
res.file = dbm_get(pages[ip].file);
|
|
if (res.file == NULL)
|
|
res.file = " (NULL)";
|
|
res.addr = dbm_addr(pages + ip);
|
|
return &res;
|
|
}
|
|
|
|
/*
|
|
* Functions to start filtered iterations over manual pages.
|
|
*/
|
|
void
|
|
dbm_page_byname(const struct dbm_match *match)
|
|
{
|
|
assert(match != NULL);
|
|
page_bytitle(ITER_NAME, match);
|
|
}
|
|
|
|
void
|
|
dbm_page_bysect(const struct dbm_match *match)
|
|
{
|
|
assert(match != NULL);
|
|
page_bytitle(ITER_SECT, match);
|
|
}
|
|
|
|
void
|
|
dbm_page_byarch(const struct dbm_match *match)
|
|
{
|
|
assert(match != NULL);
|
|
page_byarch(match);
|
|
}
|
|
|
|
void
|
|
dbm_page_bydesc(const struct dbm_match *match)
|
|
{
|
|
assert(match != NULL);
|
|
page_bytitle(ITER_DESC, match);
|
|
}
|
|
|
|
void
|
|
dbm_page_bymacro(int32_t im, const struct dbm_match *match)
|
|
{
|
|
assert(im >= 0);
|
|
assert(im < MACRO_MAX);
|
|
assert(match != NULL);
|
|
page_bymacro(im, match);
|
|
}
|
|
|
|
/*
|
|
* Return the number of the next manual page in the current iteration.
|
|
*/
|
|
struct dbm_res
|
|
dbm_page_next(void)
|
|
{
|
|
struct dbm_res res = {-1, 0};
|
|
|
|
switch(iteration) {
|
|
case ITER_NONE:
|
|
return res;
|
|
case ITER_ARCH:
|
|
return page_byarch(NULL);
|
|
case ITER_MACRO:
|
|
return page_bymacro(0, NULL);
|
|
default:
|
|
return page_bytitle(iteration, NULL);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Functions implementing the iteration over manual pages.
|
|
*/
|
|
static struct dbm_res
|
|
page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
|
|
{
|
|
static const struct dbm_match *match;
|
|
static const char *cp;
|
|
static int32_t ip;
|
|
struct dbm_res res = {-1, 0};
|
|
|
|
assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
|
|
arg_iter == ITER_SECT);
|
|
|
|
/* Initialize for a new iteration. */
|
|
|
|
if (arg_match != NULL) {
|
|
iteration = arg_iter;
|
|
match = arg_match;
|
|
switch (iteration) {
|
|
case ITER_NAME:
|
|
cp = dbm_get(pages[0].name);
|
|
break;
|
|
case ITER_SECT:
|
|
cp = dbm_get(pages[0].sect);
|
|
break;
|
|
case ITER_DESC:
|
|
cp = dbm_get(pages[0].desc);
|
|
break;
|
|
default:
|
|
abort();
|
|
}
|
|
if (cp == NULL) {
|
|
iteration = ITER_NONE;
|
|
match = NULL;
|
|
cp = NULL;
|
|
ip = npages;
|
|
} else
|
|
ip = 0;
|
|
return res;
|
|
}
|
|
|
|
/* Search for a name. */
|
|
|
|
while (ip < npages) {
|
|
if (iteration == ITER_NAME)
|
|
cp++;
|
|
if (dbm_match(match, cp))
|
|
break;
|
|
cp = strchr(cp, '\0') + 1;
|
|
if (iteration == ITER_DESC)
|
|
ip++;
|
|
else if (*cp == '\0') {
|
|
cp++;
|
|
ip++;
|
|
}
|
|
}
|
|
|
|
/* Reached the end without a match. */
|
|
|
|
if (ip == npages) {
|
|
iteration = ITER_NONE;
|
|
match = NULL;
|
|
cp = NULL;
|
|
return res;
|
|
}
|
|
|
|
/* Found a match; save the quality for later retrieval. */
|
|
|
|
res.page = ip;
|
|
res.bits = iteration == ITER_NAME ? cp[-1] : 0;
|
|
|
|
/* Skip the remaining names of this page. */
|
|
|
|
if (++ip < npages) {
|
|
do {
|
|
cp++;
|
|
} while (cp[-1] != '\0' ||
|
|
(iteration != ITER_DESC && cp[-2] != '\0'));
|
|
}
|
|
return res;
|
|
}
|
|
|
|
static struct dbm_res
|
|
page_byarch(const struct dbm_match *arg_match)
|
|
{
|
|
static const struct dbm_match *match;
|
|
struct dbm_res res = {-1, 0};
|
|
static int32_t ip;
|
|
const char *cp;
|
|
|
|
/* Initialize for a new iteration. */
|
|
|
|
if (arg_match != NULL) {
|
|
iteration = ITER_ARCH;
|
|
match = arg_match;
|
|
ip = 0;
|
|
return res;
|
|
}
|
|
|
|
/* Search for an architecture. */
|
|
|
|
for ( ; ip < npages; ip++)
|
|
if (pages[ip].arch)
|
|
for (cp = dbm_get(pages[ip].arch);
|
|
*cp != '\0';
|
|
cp = strchr(cp, '\0') + 1)
|
|
if (dbm_match(match, cp)) {
|
|
res.page = ip++;
|
|
return res;
|
|
}
|
|
|
|
/* Reached the end without a match. */
|
|
|
|
iteration = ITER_NONE;
|
|
match = NULL;
|
|
return res;
|
|
}
|
|
|
|
static struct dbm_res
|
|
page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
|
|
{
|
|
static const struct dbm_match *match;
|
|
static const int32_t *pp;
|
|
static const char *cp;
|
|
static int32_t im, iv;
|
|
struct dbm_res res = {-1, 0};
|
|
|
|
assert(im >= 0);
|
|
assert(im < MACRO_MAX);
|
|
|
|
/* Initialize for a new iteration. */
|
|
|
|
if (arg_match != NULL) {
|
|
iteration = ITER_MACRO;
|
|
match = arg_match;
|
|
im = arg_im;
|
|
cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
|
|
pp = NULL;
|
|
iv = -1;
|
|
return res;
|
|
}
|
|
if (iteration != ITER_MACRO)
|
|
return res;
|
|
|
|
/* Find the next matching macro value. */
|
|
|
|
while (pp == NULL || *pp == 0) {
|
|
if (++iv == nvals[im]) {
|
|
iteration = ITER_NONE;
|
|
return res;
|
|
}
|
|
if (iv)
|
|
cp = strchr(cp, '\0') + 1;
|
|
if (dbm_match(match, cp))
|
|
pp = dbm_get(macros[im][iv].pages);
|
|
}
|
|
|
|
/* Found a matching page. */
|
|
|
|
res.page = (struct page *)dbm_get(*pp++) - pages;
|
|
return res;
|
|
}
|
|
|
|
|
|
/*** functions for handling macros ************************************/
|
|
|
|
int32_t
|
|
dbm_macro_count(int32_t im)
|
|
{
|
|
assert(im >= 0);
|
|
assert(im < MACRO_MAX);
|
|
return nvals[im];
|
|
}
|
|
|
|
struct dbm_macro *
|
|
dbm_macro_get(int32_t im, int32_t iv)
|
|
{
|
|
static struct dbm_macro macro;
|
|
|
|
assert(im >= 0);
|
|
assert(im < MACRO_MAX);
|
|
assert(iv >= 0);
|
|
assert(iv < nvals[im]);
|
|
macro.value = dbm_get(macros[im][iv].value);
|
|
macro.pp = dbm_get(macros[im][iv].pages);
|
|
return ¯o;
|
|
}
|
|
|
|
/*
|
|
* Filtered iteration over macro entries.
|
|
*/
|
|
void
|
|
dbm_macro_bypage(int32_t im, int32_t ip)
|
|
{
|
|
assert(im >= 0);
|
|
assert(im < MACRO_MAX);
|
|
assert(ip != 0);
|
|
macro_bypage(im, ip);
|
|
}
|
|
|
|
char *
|
|
dbm_macro_next(void)
|
|
{
|
|
return macro_bypage(MACRO_MAX, 0);
|
|
}
|
|
|
|
static char *
|
|
macro_bypage(int32_t arg_im, int32_t arg_ip)
|
|
{
|
|
static const int32_t *pp;
|
|
static int32_t im, ip, iv;
|
|
|
|
/* Initialize for a new iteration. */
|
|
|
|
if (arg_im < MACRO_MAX && arg_ip != 0) {
|
|
im = arg_im;
|
|
ip = arg_ip;
|
|
pp = dbm_get(macros[im]->pages);
|
|
iv = 0;
|
|
return NULL;
|
|
}
|
|
if (im >= MACRO_MAX)
|
|
return NULL;
|
|
|
|
/* Search for the next value. */
|
|
|
|
while (iv < nvals[im]) {
|
|
if (*pp == ip)
|
|
break;
|
|
if (*pp == 0)
|
|
iv++;
|
|
pp++;
|
|
}
|
|
|
|
/* Reached the end without a match. */
|
|
|
|
if (iv == nvals[im]) {
|
|
im = MACRO_MAX;
|
|
ip = 0;
|
|
pp = NULL;
|
|
return NULL;
|
|
}
|
|
|
|
/* Found a match; skip the remaining pages of this entry. */
|
|
|
|
if (++iv < nvals[im])
|
|
while (*pp++ != 0)
|
|
continue;
|
|
|
|
return dbm_get(macros[im][iv - 1].value);
|
|
}
|