Revert "sqlite3: Vendor import of sqlite3 3.35.5"

The source to be used should be sqlite-autoconf-3350500.tar.gz
instead of the souce sqlite-amalgamation-3350500.zip used by
the port.

This reverts commit eccd5a4d39.
This commit is contained in:
Cy Schubert 2021-05-06 13:08:52 -07:00
parent eccd5a4d39
commit 8b10604cd1
1916 changed files with 274087 additions and 959493 deletions

View File

@ -1 +0,0 @@
compat

View File

@ -1 +0,0 @@
compat/*

View File

@ -1,6 +0,0 @@
The author disclaims copyright to this source code. In place of
a legal notice, here is a blessing:
* May you do good and not evil.
* May you find forgiveness for yourself and forgive others.
* May you share freely, never taking more than you give.

File diff suppressed because it is too large Load Diff

View File

@ -1,115 +0,0 @@
#!/usr/make
#
# Makefile for SQLITE
#
# This is a template makefile for SQLite. Most people prefer to
# use the autoconf generated "configure" script to generate the
# makefile automatically. But that does not work for everybody
# and in every situation. If you are having problems with the
# "configure" script, you might want to try this makefile as an
# alternative. Create a copy of this file, edit the parameters
# below and type "make".
#
#### The toplevel directory of the source tree. This is the directory
# that contains this "Makefile.in" and the "configure.in" script.
#
TOP = ../sqlite
#### C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
BCC = gcc -g -O0
#BCC = /opt/ancic/bin/c89 -0
#### If the target operating system supports the "usleep()" system
# call, then define the HAVE_USLEEP macro for all C modules.
#
#USLEEP =
USLEEP = -DHAVE_USLEEP=1
#### If you want the SQLite library to be safe for use within a
# multi-threaded program, then define the following macro
# appropriately:
#
#THREADSAFE = -DTHREADSAFE=1
THREADSAFE = -DTHREADSAFE=0
#### Specify any extra linker options needed to make the library
# thread safe
#
THREADLIB = -lpthread -lm -ldl
#THREADLIB =
#### Specify any extra libraries needed to access required functions.
#
#TLIBS = -lrt # fdatasync on Solaris 8
TLIBS =
#### Leave SQLITE_DEBUG undefined for maximum speed. Use SQLITE_DEBUG=1
# to check for memory leaks. Use SQLITE_DEBUG=2 to print a log of all
# malloc()s and free()s in order to track down memory leaks.
#
# SQLite uses some expensive assert() statements in the inner loop.
# You can make the library go almost twice as fast if you compile
# with -DNDEBUG=1
#
OPTS += -DSQLITE_DEBUG=1
OPTS += -DSQLITE_ENABLE_WHERETRACE
OPTS += -DSQLITE_ENABLE_SELECTTRACE
#### The suffix to add to executable files. ".exe" for windows.
# Nothing for unix.
#
#EXE = .exe
EXE =
#### C Compile and options for use in building executables that
# will run on the target platform. This is usually the same
# as BCC, unless you are cross-compiling.
#
TCC = gcc -O0
#TCC = gcc -g -O0 -Wall
#TCC = gcc -g -O0 -Wall -fprofile-arcs -ftest-coverage
#TCC = /opt/mingw/bin/i386-mingw32-gcc -O6
#TCC = /opt/ansic/bin/c89 -O +z -Wl,-a,archive
#### Tools used to build a static library.
#
AR = ar cr
#AR = /opt/mingw/bin/i386-mingw32-ar cr
RANLIB = ranlib
#RANLIB = /opt/mingw/bin/i386-mingw32-ranlib
MKSHLIB = gcc -shared
SO = so
SHPREFIX = lib
# SO = dll
# SHPREFIX =
#### Extra compiler options needed for programs that use the TCL library.
#
TCL_FLAGS = -I/home/drh/tcl/include/tcl8.6
#### Linker options needed to link against the TCL library.
#
#LIBTCL = -ltcl -lm -ldl
LIBTCL = /home/drh/tcl/lib/libtcl8.6.a -lm -lpthread -ldl -lz
#### Additional objects for SQLite library when TCL support is enabled.
#TCLOBJ =
TCLOBJ = tclsqlite.o
#### Compiler options needed for programs that use the readline() library.
#
READLINE_FLAGS =
#READLINE_FLAGS = -DHAVE_READLINE=1 -I/usr/include/readline
#### Linker options needed by programs using readline() must link against.
#
LIBREADLINE =
#LIBREADLINE = -static -lreadline -ltermcap
# You should not have to change anything below this line
###############################################################################
include $(TOP)/main.mk

File diff suppressed because it is too large Load Diff

327
README.md
View File

@ -1,327 +0,0 @@
<h1 align="center">SQLite Source Repository</h1>
This repository contains the complete source code for the
[SQLite database engine](https://sqlite.org/). Some test scripts
are also included. However, many other test scripts
and most of the documentation are managed separately.
## Version Control
SQLite sources are managed using the
[Fossil](https://www.fossil-scm.org/), a distributed version control system
that was specifically designed and written to support SQLite development.
The [Fossil repository](https://sqlite.org/src/timeline) contains the urtext.
If you are reading this on GitHub or some other Git repository or service,
then you are looking at a mirror. The names of check-ins and
other artifacts in a Git mirror are different from the official
names for those objects. The offical names for check-ins are
found in a footer on the check-in comment for authorized mirrors.
The official check-in name can also be seen in the `manifest.uuid` file
in the root of the tree. Always use the official name, not the
Git-name, when communicating about an SQLite check-in.
If you pulled your SQLite source code from a secondary source and want to
verify its integrity, there are hints on how to do that in the
[Verifying Code Authenticity](#vauth) section below.
## Obtaining The Code
If you do not want to use Fossil, you can download tarballs or ZIP
archives or [SQLite archives](https://sqlite.org/cli.html#sqlar) as follows:
* Lastest trunk check-in as
[Tarball](https://www.sqlite.org/src/tarball/sqlite.tar.gz),
[ZIP-archive](https://www.sqlite.org/src/zip/sqlite.zip), or
[SQLite-archive](https://www.sqlite.org/src/sqlar/sqlite.sqlar).
* Latest release as
[Tarball](https://www.sqlite.org/src/tarball/sqlite.tar.gz?r=release),
[ZIP-archive](https://www.sqlite.org/src/zip/sqlite.zip?r=release), or
[SQLite-archive](https://www.sqlite.org/src/sqlar/sqlite.sqlar?r=release).
* For other check-ins, substitute an appropriate branch name or
tag or hash prefix in place of "release" in the URLs of the previous
bullet. Or browse the [timeline](https://www.sqlite.org/src/timeline)
to locate the check-in desired, click on its information page link,
then click on the "Tarball" or "ZIP Archive" links on the information
page.
If you do want to use Fossil to check out the source tree,
first install Fossil version 2.0 or later.
(Source tarballs and precompiled binaries available
[here](https://www.fossil-scm.org/fossil/uv/download.html). Fossil is
a stand-alone program. To install, simply download or build the single
executable file and put that file someplace on your $PATH.)
Then run commands like this:
mkdir ~/sqlite
cd ~/sqlite
fossil clone https://www.sqlite.org/src sqlite.fossil
fossil open sqlite.fossil
After setting up a repository using the steps above, you can always
update to the lastest version using:
fossil update trunk ;# latest trunk check-in
fossil update release ;# latest official release
Or type "fossil ui" to get a web-based user interface.
## Compiling
First create a directory in which to place
the build products. It is recommended, but not required, that the
build directory be separate from the source directory. Cd into the
build directory and then from the build directory run the configure
script found at the root of the source tree. Then run "make".
For example:
tar xzf sqlite.tar.gz ;# Unpack the source tree into "sqlite"
mkdir bld ;# Build will occur in a sibling directory
cd bld ;# Change to the build directory
../sqlite/configure ;# Run the configure script
make ;# Run the makefile.
make sqlite3.c ;# Build the "amalgamation" source file
make test ;# Run some tests (requires Tcl)
See the makefile for additional targets.
The configure script uses autoconf 2.61 and libtool. If the configure
script does not work out for you, there is a generic makefile named
"Makefile.linux-gcc" in the top directory of the source tree that you
can copy and edit to suit your needs. Comments on the generic makefile
show what changes are needed.
## Using MSVC
On Windows, all applicable build products can be compiled with MSVC.
First open the command prompt window associated with the desired compiler
version (e.g. "Developer Command Prompt for VS2013"). Next, use NMAKE
with the provided "Makefile.msc" to build one of the supported targets.
For example:
mkdir bld
cd bld
nmake /f Makefile.msc TOP=..\sqlite
nmake /f Makefile.msc sqlite3.c TOP=..\sqlite
nmake /f Makefile.msc sqlite3.dll TOP=..\sqlite
nmake /f Makefile.msc sqlite3.exe TOP=..\sqlite
nmake /f Makefile.msc test TOP=..\sqlite
There are several build options that can be set via the NMAKE command
line. For example, to build for WinRT, simply add "FOR_WINRT=1" argument
to the "sqlite3.dll" command line above. When debugging into the SQLite
code, adding the "DEBUG=1" argument to one of the above command lines is
recommended.
SQLite does not require [Tcl](http://www.tcl.tk/) to run, but a Tcl installation
is required by the makefiles (including those for MSVC). SQLite contains
a lot of generated code and Tcl is used to do much of that code generation.
## Source Code Tour
Most of the core source files are in the **src/** subdirectory. The
**src/** folder also contains files used to build the "testfixture" test
harness. The names of the source files used by "testfixture" all begin
with "test".
The **src/** also contains the "shell.c" file
which is the main program for the "sqlite3.exe"
[command-line shell](https://sqlite.org/cli.html) and
the "tclsqlite.c" file which implements the
[Tcl bindings](https://sqlite.org/tclsqlite.html) for SQLite.
(Historical note: SQLite began as a Tcl
extension and only later escaped to the wild as an independent library.)
Test scripts and programs are found in the **test/** subdirectory.
Addtional test code is found in other source repositories.
See [How SQLite Is Tested](http://www.sqlite.org/testing.html) for
additional information.
The **ext/** subdirectory contains code for extensions. The
Full-text search engine is in **ext/fts3**. The R-Tree engine is in
**ext/rtree**. The **ext/misc** subdirectory contains a number of
smaller, single-file extensions, such as a REGEXP operator.
The **tool/** subdirectory contains various scripts and programs used
for building generated source code files or for testing or for generating
accessory programs such as "sqlite3_analyzer(.exe)".
### Generated Source Code Files
Several of the C-language source files used by SQLite are generated from
other sources rather than being typed in manually by a programmer. This
section will summarize those automatically-generated files. To create all
of the automatically-generated files, simply run "make target&#95;source".
The "target&#95;source" make target will create a subdirectory "tsrc/" and
fill it with all the source files needed to build SQLite, both
manually-edited files and automatically-generated files.
The SQLite interface is defined by the **sqlite3.h** header file, which is
generated from src/sqlite.h.in, ./manifest.uuid, and ./VERSION. The
[Tcl script](http://www.tcl.tk) at tool/mksqlite3h.tcl does the conversion.
The manifest.uuid file contains the SHA3 hash of the particular check-in
and is used to generate the SQLITE\_SOURCE\_ID macro. The VERSION file
contains the current SQLite version number. The sqlite3.h header is really
just a copy of src/sqlite.h.in with the source-id and version number inserted
at just the right spots. Note that comment text in the sqlite3.h file is
used to generate much of the SQLite API documentation. The Tcl scripts
used to generate that documentation are in a separate source repository.
The SQL language parser is **parse.c** which is generate from a grammar in
the src/parse.y file. The conversion of "parse.y" into "parse.c" is done
by the [lemon](./doc/lemon.html) LALR(1) parser generator. The source code
for lemon is at tool/lemon.c. Lemon uses the tool/lempar.c file as a
template for generating its parser.
Lemon also generates the **parse.h** header file, at the same time it
generates parse.c.
The **opcodes.h** header file contains macros that define the numbers
corresponding to opcodes in the "VDBE" virtual machine. The opcodes.h
file is generated by the scanning the src/vdbe.c source file. The
Tcl script at ./mkopcodeh.tcl does this scan and generates opcodes.h.
A second Tcl script, ./mkopcodec.tcl, then scans opcodes.h to generate
the **opcodes.c** source file, which contains a reverse mapping from
opcode-number to opcode-name that is used for EXPLAIN output.
The **keywordhash.h** header file contains the definition of a hash table
that maps SQL language keywords (ex: "CREATE", "SELECT", "INDEX", etc.) into
the numeric codes used by the parse.c parser. The keywordhash.h file is
generated by a C-language program at tool mkkeywordhash.c.
The **pragma.h** header file contains various definitions used to parse
and implement the PRAGMA statements. The header is generated by a
script **tool/mkpragmatab.tcl**. If you want to add a new PRAGMA, edit
the **tool/mkpragmatab.tcl** file to insert the information needed by the
parser for your new PRAGMA, then run the script to regenerate the
**pragma.h** header file.
### The Amalgamation
All of the individual C source code and header files (both manually-edited
and automatically-generated) can be combined into a single big source file
**sqlite3.c** called "the amalgamation". The amalgamation is the recommended
way of using SQLite in a larger application. Combining all individual
source code files into a single big source code file allows the C compiler
to perform more cross-procedure analysis and generate better code. SQLite
runs about 5% faster when compiled from the amalgamation versus when compiled
from individual source files.
The amalgamation is generated from the tool/mksqlite3c.tcl Tcl script.
First, all of the individual source files must be gathered into the tsrc/
subdirectory (using the equivalent of "make target_source") then the
tool/mksqlite3c.tcl script is run to copy them all together in just the
right order while resolving internal "#include" references.
The amalgamation source file is more than 200K lines long. Some symbolic
debuggers (most notably MSVC) are unable to deal with files longer than 64K
lines. To work around this, a separate Tcl script, tool/split-sqlite3c.tcl,
can be run on the amalgamation to break it up into a single small C file
called **sqlite3-all.c** that does #include on about seven other files
named **sqlite3-1.c**, **sqlite3-2.c**, ..., **sqlite3-7.c**. In this way,
all of the source code is contained within a single translation unit so
that the compiler can do extra cross-procedure optimization, but no
individual source file exceeds 32K lines in length.
## How It All Fits Together
SQLite is modular in design.
See the [architectural description](http://www.sqlite.org/arch.html)
for details. Other documents that are useful in
(helping to understand how SQLite works include the
[file format](http://www.sqlite.org/fileformat2.html) description,
the [virtual machine](http://www.sqlite.org/opcode.html) that runs
prepared statements, the description of
[how transactions work](http://www.sqlite.org/atomiccommit.html), and
the [overview of the query planner](http://www.sqlite.org/optoverview.html).
Years of effort have gone into optimizating SQLite, both
for small size and high performance. And optimizations tend to result in
complex code. So there is a lot of complexity in the current SQLite
implementation. It will not be the easiest library in the world to hack.
Key files:
* **sqlite.h.in** - This file defines the public interface to the SQLite
library. Readers will need to be familiar with this interface before
trying to understand how the library works internally.
* **sqliteInt.h** - this header file defines many of the data objects
used internally by SQLite. In addition to "sqliteInt.h", some
subsystems have their own header files.
* **parse.y** - This file describes the LALR(1) grammar that SQLite uses
to parse SQL statements, and the actions that are taken at each step
in the parsing process.
* **vdbe.c** - This file implements the virtual machine that runs
prepared statements. There are various helper files whose names
begin with "vdbe". The VDBE has access to the vdbeInt.h header file
which defines internal data objects. The rest of SQLite interacts
with the VDBE through an interface defined by vdbe.h.
* **where.c** - This file (together with its helper files named
by "where*.c") analyzes the WHERE clause and generates
virtual machine code to run queries efficiently. This file is
sometimes called the "query optimizer". It has its own private
header file, whereInt.h, that defines data objects used internally.
* **btree.c** - This file contains the implementation of the B-Tree
storage engine used by SQLite. The interface to the rest of the system
is defined by "btree.h". The "btreeInt.h" header defines objects
used internally by btree.c and not published to the rest of the system.
* **pager.c** - This file contains the "pager" implementation, the
module that implements transactions. The "pager.h" header file
defines the interface between pager.c and the rest of the system.
* **os_unix.c** and **os_win.c** - These two files implement the interface
between SQLite and the underlying operating system using the run-time
pluggable VFS interface.
* **shell.c.in** - This file is not part of the core SQLite library. This
is the file that, when linked against sqlite3.a, generates the
"sqlite3.exe" command-line shell. The "shell.c.in" file is transformed
into "shell.c" as part of the build process.
* **tclsqlite.c** - This file implements the Tcl bindings for SQLite. It
is not part of the core SQLite library. But as most of the tests in this
repository are written in Tcl, the Tcl language bindings are important.
* **test*.c** - Files in the src/ folder that begin with "test" go into
building the "testfixture.exe" program. The testfixture.exe program is
an enhanced Tcl shell. The testfixture.exe program runs scripts in the
test/ folder to validate the core SQLite code. The testfixture program
(and some other test programs too) is build and run when you type
"make test".
* **ext/misc/json1.c** - This file implements the various JSON functions
that are build into SQLite.
There are many other source files. Each has a succinct header comment that
describes its purpose and role within the larger system.
<a name="vauth"></a>
## Verifying Code Authenticity
The `manifest` file at the root directory of the source tree
contains either a SHA3-256 hash (for newer files) or a SHA1 hash (for
older files) for every source file in the repository.
The SHA3-256 hash of the `manifest`
file itself is the official name of the version of the source tree that you
have. The `manifest.uuid` file should contain the SHA3-256 hash of the
`manifest` file. If all of the above hash comparisons are correct, then
you can be confident that your source tree is authentic and unadulterated.
The format of the `manifest` file should be mostly self-explanatory, but
if you want details, they are available
[here](https://fossil-scm.org/fossil/doc/trunk/www/fileformat.wiki#manifest).
## Contacts
The main SQLite website is [http://www.sqlite.org/](http://www.sqlite.org/)
with geographically distributed backups at
[http://www2.sqlite.org/](http://www2.sqlite.org) and
[http://www3.sqlite.org/](http://www3.sqlite.org).

View File

@ -1 +0,0 @@
3.35.5

5551
aclocal.m4 vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +0,0 @@
This directory contains components use to build an autoconf-ready package
of the SQLite amalgamation: sqlite-autoconf-30XXXXXX.tar.gz
To build the autoconf amalgamation, run from the top-level:
./configure
make amalgamation-tarball
The amalgamation-tarball target (also available in "main.mk") runs the
script tool/mkautoconfamal.sh which does the work. Refer to that script
for details.

View File

@ -1,285 +0,0 @@
#-----------------------------------------------------------------------
# Supports the following non-standard switches.
#
# --enable-threadsafe
# --enable-readline
# --enable-editline
# --enable-static-shell
# --enable-dynamic-extensions
#
AC_PREREQ(2.61)
AC_INIT(sqlite, --SQLITE-VERSION--, http://www.sqlite.org)
AC_CONFIG_SRCDIR([sqlite3.c])
AC_CONFIG_AUX_DIR([.])
# Use automake.
AM_INIT_AUTOMAKE([foreign])
AC_SYS_LARGEFILE
# Check for required programs.
AC_PROG_CC
AC_PROG_LIBTOOL
AC_PROG_MKDIR_P
# Check for library functions that SQLite can optionally use.
AC_CHECK_FUNCS([fdatasync usleep fullfsync localtime_r gmtime_r])
AC_FUNC_STRERROR_R
AC_CONFIG_FILES([Makefile sqlite3.pc])
BUILD_CFLAGS=
AC_SUBST(BUILD_CFLAGS)
#-------------------------------------------------------------------------
# Two options to enable readline compatible libraries:
#
# --enable-editline
# --enable-readline
#
# Both are enabled by default. If, after command line processing both are
# still enabled, the script searches for editline first and automatically
# disables readline if it is found. So, to use readline explicitly, the
# user must pass "--disable-editline". To disable command line editing
# support altogether, "--disable-editline --disable-readline".
#
# When searching for either library, check for headers before libraries
# as some distros supply packages that contain libraries but not header
# files, which come as a separate development package.
#
AC_ARG_ENABLE(editline, [AS_HELP_STRING([--enable-editline],[use BSD libedit])])
AC_ARG_ENABLE(readline, [AS_HELP_STRING([--enable-readline],[use readline])])
AS_IF([ test x"$enable_editline" != xno ],[
AC_CHECK_HEADERS([editline/readline.h],[
sLIBS=$LIBS
LIBS=""
AC_SEARCH_LIBS([readline],[edit],[
AC_DEFINE([HAVE_EDITLINE],1,Define to use BSD editline)
READLINE_LIBS="$LIBS -ltinfo"
enable_readline=no
],[],[-ltinfo])
AS_UNSET(ac_cv_search_readline)
LIBS=$sLIBS
])
])
AS_IF([ test x"$enable_readline" != xno ],[
AC_CHECK_HEADERS([readline/readline.h],[
sLIBS=$LIBS
LIBS=""
AC_SEARCH_LIBS(tgetent, termcap curses ncurses ncursesw, [], [])
AC_SEARCH_LIBS(readline,[readline edit], [
AC_DEFINE([HAVE_READLINE],1,Define to use readline or wrapper)
READLINE_LIBS=$LIBS
])
LIBS=$sLIBS
])
])
AC_SUBST(READLINE_LIBS)
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-threadsafe
#
AC_ARG_ENABLE(threadsafe, [AS_HELP_STRING(
[--enable-threadsafe], [build a thread-safe library [default=yes]])],
[], [enable_threadsafe=yes])
if test x"$enable_threadsafe" == "xno"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_THREADSAFE=0"
else
BUILD_CFLAGS="$BUILD_CFLAGS -D_REENTRANT=1 -DSQLITE_THREADSAFE=1"
AC_SEARCH_LIBS(pthread_create, pthread)
AC_SEARCH_LIBS(pthread_mutexattr_init, pthread)
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-dynamic-extensions
#
AC_ARG_ENABLE(dynamic-extensions, [AS_HELP_STRING(
[--enable-dynamic-extensions], [support loadable extensions [default=yes]])],
[], [enable_dynamic_extensions=yes])
if test x"$enable_dynamic_extensions" != "xno"; then
AC_SEARCH_LIBS(dlopen, dl)
else
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_OMIT_LOAD_EXTENSION=1"
fi
AC_MSG_CHECKING([for whether to support dynamic extensions])
AC_MSG_RESULT($enable_dynamic_extensions)
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-math
#
AC_ARG_ENABLE(math, [AS_HELP_STRING(
[--enable-math], [SQL math functions [default=yes]])],
[], [enable_math=yes])
AC_MSG_CHECKING([SQL math functions])
if test x"$enable_math" = "xyes"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_MATH_FUNCTIONS"
AC_MSG_RESULT([enabled])
AC_SEARCH_LIBS(ceil, m)
else
AC_MSG_RESULT([disabled])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-fts4
#
AC_ARG_ENABLE(fts4, [AS_HELP_STRING(
[--enable-fts4], [include fts4 support [default=yes]])],
[], [enable_fts4=yes])
AC_MSG_CHECKING([FTS4 extension])
if test x"$enable_fts4" = "xyes"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_FTS4"
AC_MSG_RESULT([enabled])
else
AC_MSG_RESULT([disabled])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-fts3
#
AC_ARG_ENABLE(fts3, [AS_HELP_STRING(
[--enable-fts3], [include fts3 support [default=no]])],
[], [])
AC_MSG_CHECKING([FTS3 extension])
if test x"$enable_fts3" = "xyes" -a x"$enable_fts4" = "xno"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_FTS3"
AC_MSG_RESULT([enabled])
else
AC_MSG_RESULT([disabled])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-fts5
#
AC_ARG_ENABLE(fts5, [AS_HELP_STRING(
[--enable-fts5], [include fts5 support [default=yes]])],
[], [enable_fts5=yes])
AC_MSG_CHECKING([FTS5 extension])
if test x"$enable_fts5" = "xyes"; then
AC_MSG_RESULT([enabled])
AC_SEARCH_LIBS(log, m)
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_FTS5"
else
AC_MSG_RESULT([disabled])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-json1
#
AC_ARG_ENABLE(json1, [AS_HELP_STRING(
[--enable-json1], [include json1 support [default=yes]])],
[],[enable_json1=yes])
AC_MSG_CHECKING([JSON functions])
if test x"$enable_json1" = "xyes"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_JSON1"
AC_MSG_RESULT([enabled])
else
AC_MSG_RESULT([disabled])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-rtree
#
AC_ARG_ENABLE(rtree, [AS_HELP_STRING(
[--enable-rtree], [include rtree support [default=yes]])],
[], [enable_rtree=yes])
AC_MSG_CHECKING([RTREE extension])
if test x"$enable_rtree" = "xyes"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_GEOPOLY"
AC_MSG_RESULT([enabled])
else
AC_MSG_RESULT([disabled])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-session
#
AC_ARG_ENABLE(session, [AS_HELP_STRING(
[--enable-session], [enable the session extension [default=no]])],
[], [])
AC_MSG_CHECKING([Session extension])
if test x"$enable_session" = "xyes"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_ENABLE_SESSION -DSQLITE_ENABLE_PREUPDATE_HOOK"
AC_MSG_RESULT([enabled])
else
AC_MSG_RESULT([disabled])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-debug
#
AC_ARG_ENABLE(debug, [AS_HELP_STRING(
[--enable-debug], [build with debugging features enabled [default=no]])],
[], [])
AC_MSG_CHECKING([Build type])
if test x"$enable_debug" = "xyes"; then
BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_DEBUG -DSQLITE_ENABLE_SELECTTRACE -DSQLITE_ENABLE_WHERETRACE"
CFLAGS="-g -O0"
AC_MSG_RESULT([debug])
else
AC_MSG_RESULT([release])
fi
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# --enable-static-shell
#
AC_ARG_ENABLE(static-shell, [AS_HELP_STRING(
[--enable-static-shell],
[statically link libsqlite3 into shell tool [default=yes]])],
[], [enable_static_shell=yes])
if test x"$enable_static_shell" = "xyes"; then
EXTRA_SHELL_OBJ=sqlite3-sqlite3.$OBJEXT
else
EXTRA_SHELL_OBJ=libsqlite3.la
fi
AC_SUBST(EXTRA_SHELL_OBJ)
#-----------------------------------------------------------------------
AC_CHECK_FUNCS(posix_fallocate)
AC_CHECK_HEADERS(zlib.h,[
AC_SEARCH_LIBS(deflate,z,[BUILD_CFLAGS="$BUILD_CFLAGS -DSQLITE_HAVE_ZLIB"])
])
AC_SEARCH_LIBS(system,,,[SHELL_CFLAGS="-DSQLITE_NOHAVE_SYSTEM"])
AC_SUBST(SHELL_CFLAGS)
#-----------------------------------------------------------------------
# UPDATE: Maybe it's better if users just set CFLAGS before invoking
# configure. This option doesn't really add much...
#
# --enable-tempstore
#
# AC_ARG_ENABLE(tempstore, [AS_HELP_STRING(
# [--enable-tempstore],
# [in-memory temporary tables (never, no, yes, always) [default=no]])],
# [], [enable_tempstore=no])
# AC_MSG_CHECKING([for whether or not to store temp tables in-memory])
# case "$enable_tempstore" in
# never ) TEMP_STORE=0 ;;
# no ) TEMP_STORE=1 ;;
# always ) TEMP_STORE=3 ;;
# yes ) TEMP_STORE=3 ;;
# * )
# TEMP_STORE=1
# enable_tempstore=yes
# ;;
# esac
# AC_MSG_RESULT($enable_tempstore)
# AC_SUBST(TEMP_STORE)
#-----------------------------------------------------------------------
AC_OUTPUT

347
compile Executable file
View File

@ -0,0 +1,347 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2012-10-14.11; # UTC
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
nl='
'
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent tools from complaining about whitespace usage.
IFS=" "" $nl"
file_conv=
# func_file_conv build_file lazy
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts. If the determined conversion
# type is listed in (the comma separated) LAZY, no conversion will
# take place.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv/,$2, in
*,$file_conv,*)
;;
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_cl_dashL linkdir
# Make cl look for libraries in LINKDIR
func_cl_dashL ()
{
func_file_conv "$1"
if test -z "$lib_path"; then
lib_path=$file
else
lib_path="$lib_path;$file"
fi
linker_opts="$linker_opts -LIBPATH:$file"
}
# func_cl_dashl library
# Do a library search-path lookup for cl
func_cl_dashl ()
{
lib=$1
found=no
save_IFS=$IFS
IFS=';'
for dir in $lib_path $LIB
do
IFS=$save_IFS
if $shared && test -f "$dir/$lib.dll.lib"; then
found=yes
lib=$dir/$lib.dll.lib
break
fi
if test -f "$dir/$lib.lib"; then
found=yes
lib=$dir/$lib.lib
break
fi
if test -f "$dir/lib$lib.a"; then
found=yes
lib=$dir/lib$lib.a
break
fi
done
IFS=$save_IFS
if test "$found" != yes; then
lib=$lib.lib
fi
}
# func_cl_wrapper cl arg...
# Adjust compile command to suit cl
func_cl_wrapper ()
{
# Assume a capable shell
lib_path=
shared=:
linker_opts=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
eat=1
case $2 in
*.o | *.[oO][bB][jJ])
func_file_conv "$2"
set x "$@" -Fo"$file"
shift
;;
*)
func_file_conv "$2"
set x "$@" -Fe"$file"
shift
;;
esac
;;
-I)
eat=1
func_file_conv "$2" mingw
set x "$@" -I"$file"
shift
;;
-I*)
func_file_conv "${1#-I}" mingw
set x "$@" -I"$file"
shift
;;
-l)
eat=1
func_cl_dashl "$2"
set x "$@" "$lib"
shift
;;
-l*)
func_cl_dashl "${1#-l}"
set x "$@" "$lib"
shift
;;
-L)
eat=1
func_cl_dashL "$2"
;;
-L*)
func_cl_dashL "${1#-L}"
;;
-static)
shared=false
;;
-Wl,*)
arg=${1#-Wl,}
save_ifs="$IFS"; IFS=','
for flag in $arg; do
IFS="$save_ifs"
linker_opts="$linker_opts $flag"
done
IFS="$save_ifs"
;;
-Xlinker)
eat=1
linker_opts="$linker_opts $2"
;;
-*)
set x "$@" "$1"
shift
;;
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
func_file_conv "$1"
set x "$@" -Tp"$file"
shift
;;
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
func_file_conv "$1" mingw
set x "$@" "$file"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -n "$linker_opts"; then
linker_opts="-link$linker_opts"
fi
exec "$@" $linker_opts
exit 1
}
eat=
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand '-c -o'.
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file 'INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
func_cl_wrapper "$@" # Doesn't return...
;;
esac
ofile=
cfile=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
# So we strip '-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no '-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# '.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

353
config.guess vendored Normal file → Executable file
View File

@ -1,8 +1,8 @@
#! /bin/sh
# Attempt to guess a canonical system name.
# Copyright 1992-2019 Free Software Foundation, Inc.
# Copyright 1992-2018 Free Software Foundation, Inc.
timestamp='2019-05-28'
timestamp='2018-02-24'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@ -50,7 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
Copyright 1992-2019 Free Software Foundation, Inc.
Copyright 1992-2018 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@ -84,6 +84,8 @@ if test $# != 0; then
exit 1
fi
trap 'exit 1' 1 2 15
# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
# compiler to aid in system detection is discouraged as it requires
# temporary files to be created and, as you can see below, it is a
@ -94,38 +96,34 @@ fi
# Portable tmp directory creation inspired by the Autoconf team.
tmp=
# shellcheck disable=SC2172
trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
set_cc_for_build() {
: "${TMPDIR=/tmp}"
# shellcheck disable=SC2039
{ tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
dummy=$tmp/dummy
case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
,,) echo "int x;" > "$dummy.c"
for driver in cc gcc c89 c99 ; do
if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
CC_FOR_BUILD="$driver"
break
fi
done
if test x"$CC_FOR_BUILD" = x ; then
CC_FOR_BUILD=no_compiler_found
fi
;;
,,*) CC_FOR_BUILD=$CC ;;
,*,*) CC_FOR_BUILD=$HOST_CC ;;
esac
}
set_cc_for_build='
trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
: ${TMPDIR=/tmp} ;
{ tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
dummy=$tmp/dummy ;
tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
case $CC_FOR_BUILD,$HOST_CC,$CC in
,,) echo "int x;" > "$dummy.c" ;
for c in cc gcc c89 c99 ; do
if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
CC_FOR_BUILD="$c"; break ;
fi ;
done ;
if test x"$CC_FOR_BUILD" = x ; then
CC_FOR_BUILD=no_compiler_found ;
fi
;;
,,*) CC_FOR_BUILD=$CC ;;
,*,*) CC_FOR_BUILD=$HOST_CC ;;
esac ; set_cc_for_build= ;'
# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
# (ghazi@noc.rutgers.edu 1994-08-24)
if test -f /.attbin/uname ; then
if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
PATH=$PATH:/.attbin ; export PATH
fi
@ -140,7 +138,7 @@ Linux|GNU|GNU/*)
# We could probably try harder.
LIBC=gnu
set_cc_for_build
eval "$set_cc_for_build"
cat <<-EOF > "$dummy.c"
#include <features.h>
#if defined(__UCLIBC__)
@ -201,7 +199,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
os=netbsdelf
;;
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
set_cc_for_build
eval "$set_cc_for_build"
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ELF__
then
@ -239,7 +237,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
# contains redundant information, the shorter form:
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
echo "$machine-${os}${release}${abi-}"
echo "$machine-${os}${release}${abi}"
exit ;;
*:Bitrig:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
@ -391,7 +389,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
echo i386-pc-auroraux"$UNAME_RELEASE"
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
set_cc_for_build
eval "$set_cc_for_build"
SUN_ARCH=i386
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
@ -484,7 +482,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
echo clipper-intergraph-clix"$UNAME_RELEASE"
exit ;;
mips:*:*:UMIPS | mips:*:*:RISCos)
set_cc_for_build
eval "$set_cc_for_build"
sed 's/^ //' << EOF > "$dummy.c"
#ifdef __cplusplus
#include <stdio.h> /* for printf() prototype */
@ -581,7 +579,7 @@ EOF
exit ;;
*:AIX:2:3)
if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
set_cc_for_build
eval "$set_cc_for_build"
sed 's/^ //' << EOF > "$dummy.c"
#include <sys/systemcfg.h>
@ -662,7 +660,7 @@ EOF
esac
fi
if [ "$HP_ARCH" = "" ]; then
set_cc_for_build
eval "$set_cc_for_build"
sed 's/^ //' << EOF > "$dummy.c"
#define _HPUX_SOURCE
@ -702,7 +700,7 @@ EOF
esac
if [ "$HP_ARCH" = hppa2.0w ]
then
set_cc_for_build
eval "$set_cc_for_build"
# hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
# 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
@ -728,7 +726,7 @@ EOF
echo ia64-hp-hpux"$HPUX_REV"
exit ;;
3050*:HI-UX:*:*)
set_cc_for_build
eval "$set_cc_for_build"
sed 's/^ //' << EOF > "$dummy.c"
#include <unistd.h>
int
@ -842,17 +840,6 @@ EOF
*:BSD/OS:*:*)
echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
exit ;;
arm:FreeBSD:*:*)
UNAME_PROCESSOR=`uname -p`
set_cc_for_build
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_PCS_VFP
then
echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabi
else
echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabihf
fi
exit ;;
*:FreeBSD:*:*)
UNAME_PROCESSOR=`/usr/bin/uname -p`
case "$UNAME_PROCESSOR" in
@ -894,7 +881,7 @@ EOF
echo "$UNAME_MACHINE"-pc-uwin
exit ;;
amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
echo x86_64-pc-cygwin
echo x86_64-unknown-cygwin
exit ;;
prep*:SunOS:5.*:*)
echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
@ -907,8 +894,8 @@ EOF
# other systems with GNU libc and userland
echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC"
exit ;;
*:Minix:*:*)
echo "$UNAME_MACHINE"-unknown-minix
i*86:Minix:*:*)
echo "$UNAME_MACHINE"-pc-minix
exit ;;
aarch64:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
@ -935,7 +922,7 @@ EOF
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
arm*:Linux:*:*)
set_cc_for_build
eval "$set_cc_for_build"
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_EABI__
then
@ -984,51 +971,23 @@ EOF
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
mips:Linux:*:* | mips64:Linux:*:*)
set_cc_for_build
IS_GLIBC=0
test x"${LIBC}" = xgnu && IS_GLIBC=1
eval "$set_cc_for_build"
sed 's/^ //' << EOF > "$dummy.c"
#undef CPU
#undef mips
#undef mipsel
#undef mips64
#undef mips64el
#if ${IS_GLIBC} && defined(_ABI64)
LIBCABI=gnuabi64
#else
#if ${IS_GLIBC} && defined(_ABIN32)
LIBCABI=gnuabin32
#else
LIBCABI=${LIBC}
#endif
#endif
#if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
CPU=mipsisa64r6
#else
#if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
CPU=mipsisa32r6
#else
#if defined(__mips64)
CPU=mips64
#else
CPU=mips
#endif
#endif
#endif
#undef ${UNAME_MACHINE}
#undef ${UNAME_MACHINE}el
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
MIPS_ENDIAN=el
CPU=${UNAME_MACHINE}el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
MIPS_ENDIAN=
CPU=${UNAME_MACHINE}
#else
MIPS_ENDIAN=
CPU=
#endif
#endif
EOF
eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`"
test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`"
test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; }
;;
mips64el:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
@ -1087,7 +1046,11 @@ EOF
echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
exit ;;
x86_64:Linux:*:*)
echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
if objdump -f /bin/sh | grep -q elf32-x86-64; then
echo "$UNAME_MACHINE"-pc-linux-"$LIBC"x32
else
echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
fi
exit ;;
xtensa*:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
@ -1141,7 +1104,7 @@ EOF
*Pentium) UNAME_MACHINE=i586 ;;
*Pent*|*Celeron) UNAME_MACHINE=i686 ;;
esac
echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}"
exit ;;
i*86:*:3.2:*)
if test -f /usr/options/cb.name; then
@ -1325,39 +1288,38 @@ EOF
echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
exit ;;
*:Darwin:*:*)
UNAME_PROCESSOR=`uname -p`
case $UNAME_PROCESSOR in
unknown) UNAME_PROCESSOR=powerpc ;;
esac
if command -v xcode-select > /dev/null 2> /dev/null && \
! xcode-select --print-path > /dev/null 2> /dev/null ; then
# Avoid executing cc if there is no toolchain installed as
# cc will be a stub that puts up a graphical alert
# prompting the user to install developer tools.
CC_FOR_BUILD=no_compiler_found
else
set_cc_for_build
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
eval "$set_cc_for_build"
if test "$UNAME_PROCESSOR" = unknown ; then
UNAME_PROCESSOR=powerpc
fi
if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
case $UNAME_PROCESSOR in
i386) UNAME_PROCESSOR=x86_64 ;;
powerpc) UNAME_PROCESSOR=powerpc64 ;;
esac
fi
# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_PPC >/dev/null
then
UNAME_PROCESSOR=powerpc
if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then
if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
case $UNAME_PROCESSOR in
i386) UNAME_PROCESSOR=x86_64 ;;
powerpc) UNAME_PROCESSOR=powerpc64 ;;
esac
fi
# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_PPC >/dev/null
then
UNAME_PROCESSOR=powerpc
fi
fi
elif test "$UNAME_PROCESSOR" = i386 ; then
# uname -m returns i386 or x86_64
UNAME_PROCESSOR=$UNAME_MACHINE
# Avoid executing cc on OS X 10.9, as it ships with a stub
# that puts up a graphical alert prompting to install
# developer tools. Any system running Mac OS X 10.7 or
# later (Darwin 11 and later) is required to have a 64-bit
# processor. This is not true of the ARM version of Darwin
# that Apple uses in portable devices.
UNAME_PROCESSOR=x86_64
fi
echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
exit ;;
@ -1400,7 +1362,6 @@ EOF
# "uname -m" is not consistent, so use $cputype instead. 386
# is converted to i386 for consistency with other x86
# operating systems.
# shellcheck disable=SC2154
if test "$cputype" = 386; then
UNAME_MACHINE=i386
else
@ -1457,148 +1418,8 @@ EOF
amd64:Isilon\ OneFS:*:*)
echo x86_64-unknown-onefs
exit ;;
*:Unleashed:*:*)
echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
exit ;;
esac
# No uname command or uname output not recognized.
set_cc_for_build
cat > "$dummy.c" <<EOF
#ifdef _SEQUENT_
#include <sys/types.h>
#include <sys/utsname.h>
#endif
#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
#include <signal.h>
#if defined(_SIZE_T_) || defined(SIGLOST)
#include <sys/utsname.h>
#endif
#endif
#endif
main ()
{
#if defined (sony)
#if defined (MIPSEB)
/* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
I don't know.... */
printf ("mips-sony-bsd\n"); exit (0);
#else
#include <sys/param.h>
printf ("m68k-sony-newsos%s\n",
#ifdef NEWSOS4
"4"
#else
""
#endif
); exit (0);
#endif
#endif
#if defined (NeXT)
#if !defined (__ARCHITECTURE__)
#define __ARCHITECTURE__ "m68k"
#endif
int version;
version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
if (version < 4)
printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
else
printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
exit (0);
#endif
#if defined (MULTIMAX) || defined (n16)
#if defined (UMAXV)
printf ("ns32k-encore-sysv\n"); exit (0);
#else
#if defined (CMU)
printf ("ns32k-encore-mach\n"); exit (0);
#else
printf ("ns32k-encore-bsd\n"); exit (0);
#endif
#endif
#endif
#if defined (__386BSD__)
printf ("i386-pc-bsd\n"); exit (0);
#endif
#if defined (sequent)
#if defined (i386)
printf ("i386-sequent-dynix\n"); exit (0);
#endif
#if defined (ns32000)
printf ("ns32k-sequent-dynix\n"); exit (0);
#endif
#endif
#if defined (_SEQUENT_)
struct utsname un;
uname(&un);
if (strncmp(un.version, "V2", 2) == 0) {
printf ("i386-sequent-ptx2\n"); exit (0);
}
if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
printf ("i386-sequent-ptx1\n"); exit (0);
}
printf ("i386-sequent-ptx\n"); exit (0);
#endif
#if defined (vax)
#if !defined (ultrix)
#include <sys/param.h>
#if defined (BSD)
#if BSD == 43
printf ("vax-dec-bsd4.3\n"); exit (0);
#else
#if BSD == 199006
printf ("vax-dec-bsd4.3reno\n"); exit (0);
#else
printf ("vax-dec-bsd\n"); exit (0);
#endif
#endif
#else
printf ("vax-dec-bsd\n"); exit (0);
#endif
#else
#if defined(_SIZE_T_) || defined(SIGLOST)
struct utsname un;
uname (&un);
printf ("vax-dec-ultrix%s\n", un.release); exit (0);
#else
printf ("vax-dec-ultrix\n"); exit (0);
#endif
#endif
#endif
#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
#if defined(_SIZE_T_) || defined(SIGLOST)
struct utsname *un;
uname (&un);
printf ("mips-dec-ultrix%s\n", un.release); exit (0);
#else
printf ("mips-dec-ultrix\n"); exit (0);
#endif
#endif
#endif
#if defined (alliant) && defined (i860)
printf ("i860-alliant-bsd\n"); exit (0);
#endif
exit (1);
}
EOF
$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`$dummy` &&
{ echo "$SYSTEM_NAME"; exit; }
# Apollos put the system type in the environment.
test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
echo "$0: unable to guess system type" >&2
case "$UNAME_MACHINE:$UNAME_SYSTEM" in
@ -1652,7 +1473,7 @@ EOF
exit 1
# Local variables:
# eval: (add-hook 'before-save-hook 'time-stamp)
# eval: (add-hook 'write-file-functions 'time-stamp)
# time-stamp-start: "timestamp='"
# time-stamp-format: "%:y-%02m-%02d"
# time-stamp-end: "'"

View File

@ -1,131 +0,0 @@
/* config.h.in. Generated from configure.ac by autoheader. */
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the `fdatasync' function. */
#undef HAVE_FDATASYNC
/* Define to 1 if you have the `gmtime_r' function. */
#undef HAVE_GMTIME_R
/* Define to 1 if the system has the type `int16_t'. */
#undef HAVE_INT16_T
/* Define to 1 if the system has the type `int32_t'. */
#undef HAVE_INT32_T
/* Define to 1 if the system has the type `int64_t'. */
#undef HAVE_INT64_T
/* Define to 1 if the system has the type `int8_t'. */
#undef HAVE_INT8_T
/* Define to 1 if the system has the type `intptr_t'. */
#undef HAVE_INTPTR_T
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `isnan' function. */
#undef HAVE_ISNAN
/* Define to 1 if you have the `localtime_r' function. */
#undef HAVE_LOCALTIME_R
/* Define to 1 if you have the `localtime_s' function. */
#undef HAVE_LOCALTIME_S
/* Define to 1 if you have the <malloc.h> header file. */
#undef HAVE_MALLOC_H
/* Define to 1 if you have the `malloc_usable_size' function. */
#undef HAVE_MALLOC_USABLE_SIZE
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the pread() function. */
#undef HAVE_PREAD
/* Define to 1 if you have the pread64() function. */
#undef HAVE_PREAD64
/* Define to 1 if you have the pwrite() function. */
#undef HAVE_PWRITE
/* Define to 1 if you have the pwrite64() function. */
#undef HAVE_PWRITE64
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the strchrnul() function */
#undef HAVE_STRCHRNUL
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if the system has the type `uint16_t'. */
#undef HAVE_UINT16_T
/* Define to 1 if the system has the type `uint32_t'. */
#undef HAVE_UINT32_T
/* Define to 1 if the system has the type `uint64_t'. */
#undef HAVE_UINT64_T
/* Define to 1 if the system has the type `uint8_t'. */
#undef HAVE_UINT8_T
/* Define to 1 if the system has the type `uintptr_t'. */
#undef HAVE_UINTPTR_T
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if you have the `usleep' function. */
#undef HAVE_USLEEP
/* Define to 1 if you have the utime() library function. */
#undef HAVE_UTIME
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Number of bits in a file offset, on hosts where this is settable. */
#undef _FILE_OFFSET_BITS
/* Define for large files, on AIX-style hosts. */
#undef _LARGE_FILES

2653
config.sub vendored Normal file → Executable file

File diff suppressed because it is too large Load Diff

8793
configure vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,679 +0,0 @@
# A Tk console widget for SQLite. Invoke sqlitecon::create with a window name,
# a prompt string, a title to set a new top-level window, and the SQLite
# database handle. For example:
#
# sqlitecon::create .sqlcon {sql:- } {SQL Console} db
#
# A toplevel window is created that allows you to type in SQL commands to
# be processed on the spot.
#
# A limited set of dot-commands are supported:
#
# .table
# .schema ?TABLE?
# .mode list|column|multicolumn|line
# .exit
#
# In addition, a new SQL function named "edit()" is created. This function
# takes a single text argument and returns a text result. Whenever the
# the function is called, it pops up a new toplevel window containing a
# text editor screen initialized to the argument. When the "OK" button
# is pressed, whatever revised text is in the text editor is returned as
# the result of the edit() function. This allows text fields of SQL tables
# to be edited quickly and easily as follows:
#
# UPDATE table1 SET dscr = edit(dscr) WHERE rowid=15;
#
# Create a namespace to work in
#
namespace eval ::sqlitecon {
# do nothing
}
# Create a console widget named $w. The prompt string is $prompt.
# The title at the top of the window is $title. The database connection
# object is $db
#
proc sqlitecon::create {w prompt title db} {
upvar #0 $w.t v
if {[winfo exists $w]} {destroy $w}
if {[info exists v]} {unset v}
toplevel $w
wm title $w $title
wm iconname $w $title
frame $w.mb -bd 2 -relief raised
pack $w.mb -side top -fill x
menubutton $w.mb.file -text File -menu $w.mb.file.m
menubutton $w.mb.edit -text Edit -menu $w.mb.edit.m
pack $w.mb.file $w.mb.edit -side left -padx 8 -pady 1
set m [menu $w.mb.file.m -tearoff 0]
$m add command -label {Close} -command "destroy $w"
sqlitecon::create_child $w $prompt $w.mb.edit.m
set v(db) $db
$db function edit ::sqlitecon::_edit
}
# This routine creates a console as a child window within a larger
# window. It also creates an edit menu named "$editmenu" if $editmenu!="".
# The calling function is responsible for posting the edit menu.
#
proc sqlitecon::create_child {w prompt editmenu} {
upvar #0 $w.t v
if {$editmenu!=""} {
set m [menu $editmenu -tearoff 0]
$m add command -label Cut -command "sqlitecon::Cut $w.t"
$m add command -label Copy -command "sqlitecon::Copy $w.t"
$m add command -label Paste -command "sqlitecon::Paste $w.t"
$m add command -label {Clear Screen} -command "sqlitecon::Clear $w.t"
$m add separator
$m add command -label {Save As...} -command "sqlitecon::SaveFile $w.t"
catch {$editmenu config -postcommand "sqlitecon::EnableEditMenu $w"}
}
scrollbar $w.sb -orient vertical -command "$w.t yview"
pack $w.sb -side right -fill y
text $w.t -font fixed -yscrollcommand "$w.sb set"
pack $w.t -side right -fill both -expand 1
bindtags $w.t Sqlitecon
set v(editmenu) $editmenu
set v(history) 0
set v(historycnt) 0
set v(current) -1
set v(prompt) $prompt
set v(prior) {}
set v(plength) [string length $v(prompt)]
set v(x) 0
set v(y) 0
set v(mode) column
set v(header) on
$w.t mark set insert end
$w.t tag config ok -foreground blue
$w.t tag config err -foreground red
$w.t insert end $v(prompt)
$w.t mark set out 1.0
after idle "focus $w.t"
}
bind Sqlitecon <1> {sqlitecon::Button1 %W %x %y}
bind Sqlitecon <B1-Motion> {sqlitecon::B1Motion %W %x %y}
bind Sqlitecon <B1-Leave> {sqlitecon::B1Leave %W %x %y}
bind Sqlitecon <B1-Enter> {sqlitecon::cancelMotor %W}
bind Sqlitecon <ButtonRelease-1> {sqlitecon::cancelMotor %W}
bind Sqlitecon <KeyPress> {sqlitecon::Insert %W %A}
bind Sqlitecon <Left> {sqlitecon::Left %W}
bind Sqlitecon <Control-b> {sqlitecon::Left %W}
bind Sqlitecon <Right> {sqlitecon::Right %W}
bind Sqlitecon <Control-f> {sqlitecon::Right %W}
bind Sqlitecon <BackSpace> {sqlitecon::Backspace %W}
bind Sqlitecon <Control-h> {sqlitecon::Backspace %W}
bind Sqlitecon <Delete> {sqlitecon::Delete %W}
bind Sqlitecon <Control-d> {sqlitecon::Delete %W}
bind Sqlitecon <Home> {sqlitecon::Home %W}
bind Sqlitecon <Control-a> {sqlitecon::Home %W}
bind Sqlitecon <End> {sqlitecon::End %W}
bind Sqlitecon <Control-e> {sqlitecon::End %W}
bind Sqlitecon <Return> {sqlitecon::Enter %W}
bind Sqlitecon <KP_Enter> {sqlitecon::Enter %W}
bind Sqlitecon <Up> {sqlitecon::Prior %W}
bind Sqlitecon <Control-p> {sqlitecon::Prior %W}
bind Sqlitecon <Down> {sqlitecon::Next %W}
bind Sqlitecon <Control-n> {sqlitecon::Next %W}
bind Sqlitecon <Control-k> {sqlitecon::EraseEOL %W}
bind Sqlitecon <<Cut>> {sqlitecon::Cut %W}
bind Sqlitecon <<Copy>> {sqlitecon::Copy %W}
bind Sqlitecon <<Paste>> {sqlitecon::Paste %W}
bind Sqlitecon <<Clear>> {sqlitecon::Clear %W}
# Insert a single character at the insertion cursor
#
proc sqlitecon::Insert {w a} {
$w insert insert $a
$w yview insert
}
# Move the cursor one character to the left
#
proc sqlitecon::Left {w} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
if {$col>$v(plength)} {
$w mark set insert "insert -1c"
}
}
# Erase the character to the left of the cursor
#
proc sqlitecon::Backspace {w} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
if {$col>$v(plength)} {
$w delete {insert -1c}
}
}
# Erase to the end of the line
#
proc sqlitecon::EraseEOL {w} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
if {$col>=$v(plength)} {
$w delete insert {insert lineend}
}
}
# Move the cursor one character to the right
#
proc sqlitecon::Right {w} {
$w mark set insert "insert +1c"
}
# Erase the character to the right of the cursor
#
proc sqlitecon::Delete w {
$w delete insert
}
# Move the cursor to the beginning of the current line
#
proc sqlitecon::Home w {
upvar #0 $w v
scan [$w index insert] %d.%d row col
$w mark set insert $row.$v(plength)
}
# Move the cursor to the end of the current line
#
proc sqlitecon::End w {
$w mark set insert {insert lineend}
}
# Add a line to the history
#
proc sqlitecon::addHistory {w line} {
upvar #0 $w v
if {$v(historycnt)>0} {
set last [lindex $v(history) [expr $v(historycnt)-1]]
if {[string compare $last $line]} {
lappend v(history) $line
incr v(historycnt)
}
} else {
set v(history) [list $line]
set v(historycnt) 1
}
set v(current) $v(historycnt)
}
# Called when "Enter" is pressed. Do something with the line
# of text that was entered.
#
proc sqlitecon::Enter w {
upvar #0 $w v
scan [$w index insert] %d.%d row col
set start $row.$v(plength)
set line [$w get $start "$start lineend"]
$w insert end \n
$w mark set out end
if {$v(prior)==""} {
set cmd $line
} else {
set cmd $v(prior)\n$line
}
if {[string index $cmd 0]=="." || [$v(db) complete $cmd]} {
regsub -all {\n} [string trim $cmd] { } cmd2
addHistory $w $cmd2
set rc [catch {DoCommand $w $cmd} res]
if {![winfo exists $w]} return
if {$rc} {
$w insert end $res\n err
} elseif {[string length $res]>0} {
$w insert end $res\n ok
}
set v(prior) {}
$w insert end $v(prompt)
} else {
set v(prior) $cmd
regsub -all {[^ ]} $v(prompt) . x
$w insert end $x
}
$w mark set insert end
$w mark set out {insert linestart}
$w yview insert
}
# Execute a single SQL command. Pay special attention to control
# directives that begin with "."
#
# The return value is the text output from the command, properly
# formatted.
#
proc sqlitecon::DoCommand {w cmd} {
upvar #0 $w v
set mode $v(mode)
set header $v(header)
if {[regexp {^(\.[a-z]+)} $cmd all word]} {
if {$word==".mode"} {
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(mode)
return {}
} elseif {$word==".exit"} {
destroy [winfo toplevel $w]
return {}
} elseif {$word==".header"} {
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(header)
return {}
} elseif {$word==".tables"} {
set mode multicolumn
set cmd {SELECT name FROM sqlite_master WHERE type='table'
UNION ALL
SELECT name FROM sqlite_temp_master WHERE type='table'}
$v(db) eval {PRAGMA database_list} {
if {$name!="temp" && $name!="main"} {
append cmd "UNION ALL SELECT name FROM $name.sqlite_master\
WHERE type='table'"
}
}
append cmd { ORDER BY 1}
} elseif {$word==".fullschema"} {
set pattern %
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
set mode list
set header 0
set cmd "SELECT sql FROM sqlite_master WHERE tbl_name LIKE '$pattern'
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
$v(db) eval {PRAGMA database_list} {
if {$name!="temp" && $name!="main"} {
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
}
}
} elseif {$word==".schema"} {
set pattern %
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
set mode list
set header 0
set cmd "SELECT sql FROM sqlite_master WHERE name LIKE '$pattern'
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
WHERE name LIKE '$pattern' AND sql NOT NULL"
$v(db) eval {PRAGMA database_list} {
if {$name!="temp" && $name!="main"} {
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
WHERE name LIKE '$pattern' AND sql NOT NULL"
}
}
} else {
return \
".exit\n.mode line|list|column\n.schema ?TABLENAME?\n.tables"
}
}
set res {}
if {$mode=="list"} {
$v(db) eval $cmd x {
set sep {}
foreach col $x(*) {
append res $sep$x($col)
set sep |
}
append res \n
}
if {[info exists x(*)] && $header} {
set sep {}
set hdr {}
foreach col $x(*) {
append hdr $sep$col
set sep |
}
set res $hdr\n$res
}
} elseif {[string range $mode 0 2]=="col"} {
set y {}
$v(db) eval $cmd x {
foreach col $x(*) {
if {![info exists cw($col)] || $cw($col)<[string length $x($col)]} {
set cw($col) [string length $x($col)]
}
lappend y $x($col)
}
}
if {[info exists x(*)] && $header} {
set hdr {}
set ln {}
set dash ---------------------------------------------------------------
append dash ------------------------------------------------------------
foreach col $x(*) {
if {![info exists cw($col)] || $cw($col)<[string length $col]} {
set cw($col) [string length $col]
}
lappend hdr $col
lappend ln [string range $dash 1 $cw($col)]
}
set y [concat $hdr $ln $y]
}
if {[info exists x(*)]} {
set format {}
set arglist {}
set arglist2 {}
set i 0
foreach col $x(*) {
lappend arglist x$i
append arglist2 " \$x$i"
incr i
append format " %-$cw($col)s"
}
set format [string trimleft $format]\n
if {[llength $arglist]>0} {
foreach $arglist $y "append res \[format [list $format] $arglist2\]"
}
}
} elseif {$mode=="multicolumn"} {
set y [$v(db) eval $cmd]
set max 0
foreach e $y {
if {$max<[string length $e]} {set max [string length $e]}
}
set ncol [expr {int(80/($max+2))}]
if {$ncol<1} {set ncol 1}
set nelem [llength $y]
set nrow [expr {($nelem+$ncol-1)/$ncol}]
set format "%-${max}s"
for {set i 0} {$i<$nrow} {incr i} {
set j $i
while 1 {
append res [format $format [lindex $y $j]]
incr j $nrow
if {$j>=$nelem} break
append res { }
}
append res \n
}
} else {
$v(db) eval $cmd x {
foreach col $x(*) {append res "$col = $x($col)\n"}
append res \n
}
}
return [string trimright $res]
}
# Change the line to the previous line
#
proc sqlitecon::Prior w {
upvar #0 $w v
if {$v(current)<=0} return
incr v(current) -1
set line [lindex $v(history) $v(current)]
sqlitecon::SetLine $w $line
}
# Change the line to the next line
#
proc sqlitecon::Next w {
upvar #0 $w v
if {$v(current)>=$v(historycnt)} return
incr v(current) 1
set line [lindex $v(history) $v(current)]
sqlitecon::SetLine $w $line
}
# Change the contents of the entry line
#
proc sqlitecon::SetLine {w line} {
upvar #0 $w v
scan [$w index insert] %d.%d row col
set start $row.$v(plength)
$w delete $start end
$w insert end $line
$w mark set insert end
$w yview insert
}
# Called when the mouse button is pressed at position $x,$y on
# the console widget.
#
proc sqlitecon::Button1 {w x y} {
global tkPriv
upvar #0 $w v
set v(mouseMoved) 0
set v(pressX) $x
set p [sqlitecon::nearestBoundry $w $x $y]
scan [$w index insert] %d.%d ix iy
scan $p %d.%d px py
if {$px==$ix} {
$w mark set insert $p
}
$w mark set anchor $p
focus $w
}
# Find the boundry between characters that is nearest
# to $x,$y
#
proc sqlitecon::nearestBoundry {w x y} {
set p [$w index @$x,$y]
set bb [$w bbox $p]
if {![string compare $bb ""]} {return $p}
if {($x-[lindex $bb 0])<([lindex $bb 2]/2)} {return $p}
$w index "$p + 1 char"
}
# This routine extends the selection to the point specified by $x,$y
#
proc sqlitecon::SelectTo {w x y} {
upvar #0 $w v
set cur [sqlitecon::nearestBoundry $w $x $y]
if {[catch {$w index anchor}]} {
$w mark set anchor $cur
}
set anchor [$w index anchor]
if {[$w compare $cur != $anchor] || (abs($v(pressX) - $x) >= 3)} {
if {$v(mouseMoved)==0} {
$w tag remove sel 0.0 end
}
set v(mouseMoved) 1
}
if {[$w compare $cur < anchor]} {
set first $cur
set last anchor
} else {
set first anchor
set last $cur
}
if {$v(mouseMoved)} {
$w tag remove sel 0.0 $first
$w tag add sel $first $last
$w tag remove sel $last end
update idletasks
}
}
# Called whenever the mouse moves while button-1 is held down.
#
proc sqlitecon::B1Motion {w x y} {
upvar #0 $w v
set v(y) $y
set v(x) $x
sqlitecon::SelectTo $w $x $y
}
# Called whenever the mouse leaves the boundries of the widget
# while button 1 is held down.
#
proc sqlitecon::B1Leave {w x y} {
upvar #0 $w v
set v(y) $y
set v(x) $x
sqlitecon::motor $w
}
# This routine is called to automatically scroll the window when
# the mouse drags offscreen.
#
proc sqlitecon::motor w {
upvar #0 $w v
if {![winfo exists $w]} return
if {$v(y)>=[winfo height $w]} {
$w yview scroll 1 units
} elseif {$v(y)<0} {
$w yview scroll -1 units
} else {
return
}
sqlitecon::SelectTo $w $v(x) $v(y)
set v(timer) [after 50 sqlitecon::motor $w]
}
# This routine cancels the scrolling motor if it is active
#
proc sqlitecon::cancelMotor w {
upvar #0 $w v
catch {after cancel $v(timer)}
catch {unset v(timer)}
}
# Do a Copy operation on the stuff currently selected.
#
proc sqlitecon::Copy w {
if {![catch {set text [$w get sel.first sel.last]}]} {
clipboard clear -displayof $w
clipboard append -displayof $w $text
}
}
# Return 1 if the selection exists and is contained
# entirely on the input line. Return 2 if the selection
# exists but is not entirely on the input line. Return 0
# if the selection does not exist.
#
proc sqlitecon::canCut w {
set r [catch {
scan [$w index sel.first] %d.%d s1x s1y
scan [$w index sel.last] %d.%d s2x s2y
scan [$w index insert] %d.%d ix iy
}]
if {$r==1} {return 0}
if {$s1x==$ix && $s2x==$ix} {return 1}
return 2
}
# Do a Cut operation if possible. Cuts are only allowed
# if the current selection is entirely contained on the
# current input line.
#
proc sqlitecon::Cut w {
if {[sqlitecon::canCut $w]==1} {
sqlitecon::Copy $w
$w delete sel.first sel.last
}
}
# Do a paste opeation.
#
proc sqlitecon::Paste w {
if {[sqlitecon::canCut $w]==1} {
$w delete sel.first sel.last
}
if {[catch {selection get -displayof $w -selection CLIPBOARD} topaste]
&& [catch {selection get -displayof $w -selection PRIMARY} topaste]} {
return
}
if {[info exists ::$w]} {
set prior 0
foreach line [split $topaste \n] {
if {$prior} {
sqlitecon::Enter $w
update
}
set prior 1
$w insert insert $line
}
} else {
$w insert insert $topaste
}
}
# Enable or disable entries in the Edit menu
#
proc sqlitecon::EnableEditMenu w {
upvar #0 $w.t v
set m $v(editmenu)
if {$m=="" || ![winfo exists $m]} return
switch [sqlitecon::canCut $w.t] {
0 {
$m entryconf Copy -state disabled
$m entryconf Cut -state disabled
}
1 {
$m entryconf Copy -state normal
$m entryconf Cut -state normal
}
2 {
$m entryconf Copy -state normal
$m entryconf Cut -state disabled
}
}
}
# Prompt the user for the name of a writable file. Then write the
# entire contents of the console screen to that file.
#
proc sqlitecon::SaveFile w {
set types {
{{Text Files} {.txt}}
{{All Files} *}
}
set f [tk_getSaveFile -filetypes $types -title "Write Screen To..."]
if {$f!=""} {
if {[catch {open $f w} fd]} {
tk_messageBox -type ok -icon error -message $fd
} else {
puts $fd [string trimright [$w get 1.0 end] \n]
close $fd
}
}
}
# Erase everything from the console above the insertion line.
#
proc sqlitecon::Clear w {
$w delete 1.0 {insert linestart}
}
# An in-line editor for SQL
#
proc sqlitecon::_edit {origtxt {title {}}} {
for {set i 0} {[winfo exists .ed$i]} {incr i} continue
set w .ed$i
toplevel $w
wm protocol $w WM_DELETE_WINDOW "$w.b.can invoke"
wm title $w {Inline SQL Editor}
frame $w.b
pack $w.b -side bottom -fill x
button $w.b.can -text Cancel -width 6 -command [list set ::$w 0]
button $w.b.ok -text OK -width 6 -command [list set ::$w 1]
button $w.b.cut -text Cut -width 6 -command [list ::sqlitecon::Cut $w.t]
button $w.b.copy -text Copy -width 6 -command [list ::sqlitecon::Copy $w.t]
button $w.b.paste -text Paste -width 6 -command [list ::sqlitecon::Paste $w.t]
set ::$w {}
pack $w.b.cut $w.b.copy $w.b.paste $w.b.can $w.b.ok\
-side left -padx 5 -pady 5 -expand 1
if {$title!=""} {
label $w.title -text $title
pack $w.title -side top -padx 5 -pady 5
}
text $w.t -bg white -fg black -yscrollcommand [list $w.sb set]
pack $w.t -side left -fill both -expand 1
scrollbar $w.sb -orient vertical -command [list $w.t yview]
pack $w.sb -side left -fill y
$w.t insert end $origtxt
vwait ::$w
if {[set ::$w]} {
set txt [string trimright [$w.t get 1.0 end]]
} else {
set txt $origtxt
}
destroy $w
return $txt
}

791
depcomp Executable file
View File

@ -0,0 +1,791 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2016-01-11.22; # UTC
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
Run PROGRAMS ARGS to compile a file, generating dependencies
as side-effects.
Environment variables:
depmode Dependency tracking mode.
source Source file read by 'PROGRAMS ARGS'.
object Object file output by 'PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "depcomp $scriptversion"
exit $?
;;
esac
# Get the directory component of the given path, and save it in the
# global variables '$dir'. Note that this directory component will
# be either empty or ending with a '/' character. This is deliberate.
set_dir_from ()
{
case $1 in
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
*) dir=;;
esac
}
# Get the suffix-stripped basename of the given path, and save it the
# global variable '$base'.
set_base_from ()
{
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
}
# If no dependency file was actually created by the compiler invocation,
# we still have to create a dummy depfile, to avoid errors with the
# Makefile "include basename.Plo" scheme.
make_dummy_depfile ()
{
echo "#dummy" > "$depfile"
}
# Factor out some common post-processing of the generated depfile.
# Requires the auxiliary global variable '$tmpdepfile' to be set.
aix_post_process_depfile ()
{
# If the compiler actually managed to produce a dependency file,
# post-process it.
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependency.h'.
# Do two passes, one to just change these to
# $object: dependency.h
# and one to simply output
# dependency.h:
# which is needed to avoid the deleted-header problem.
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
} > "$depfile"
rm -f "$tmpdepfile"
else
make_dummy_depfile
fi
}
# A tabulation character.
tab=' '
# A newline character.
nl='
'
# Character ranges might be problematic outside the C locale.
# These definitions help.
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
lower=abcdefghijklmnopqrstuvwxyz
digits=0123456789
alpha=${upper}${lower}
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1
fi
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
depfile=${depfile-`echo "$object" |
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Avoid interferences from the environment.
gccflag= dashmflag=
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
# here, because this file can only contain one case statement.
if test "$depmode" = hp; then
# HP compiler uses -M and no extra arg.
gccflag=-M
depmode=gcc
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
## it if -MD -MP comes after the -MF stuff. Hmm.
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
## the command line argument order; so add the flags where they
## appear in depend2.am. Note that the slowdown incurred here
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
for arg
do
case $arg in
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
*) set fnord "$@" "$arg" ;;
esac
shift # fnord
shift # $arg
done
"$@"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
mv "$tmpdepfile" "$depfile"
;;
gcc)
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
## (see the conditional assignment to $gccflag above).
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say). Also, it might not be
## supported by the other compilers which use the 'gcc' depmode.
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
gccflag=-MD,
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The second -e expression handles DOS-style file names with drive
# letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the "deleted header file" problem.
## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
## Some versions of gcc put a space before the ':'. On the theory
## that the space means something, we add a space to the output as
## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
sgi)
if test "$libtool" = yes; then
"$@" "-Wp,-MDupdate,$tmpdepfile"
else
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like '#:fec' to the end of the
# dependency line.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
| tr "$nl" ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile"
;;
xlc)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
aix)
# The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
tmpdepfile3=$dir.libs/$base.u
"$@" -Wc,-M
else
tmpdepfile1=$dir$base.u
tmpdepfile2=$dir$base.u
tmpdepfile3=$dir$base.u
"$@" -M
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
aix_post_process_depfile
;;
tcc)
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
# FIXME: That version still under development at the moment of writing.
# Make that this statement remains true also for stable, released
# versions.
# It will wrap lines (doesn't matter whether long or short) with a
# trailing '\', as in:
#
# foo.o : \
# foo.c \
# foo.h \
#
# It will put a trailing '\' even on the last line, and will use leading
# spaces rather than leading tabs (at least since its commit 0394caf7
# "Emit spaces for -MD").
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
# We have to change lines of the first kind to '$object: \'.
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
# And for each line of the second kind, we have to emit a 'dep.h:'
# dummy dependency, to avoid the deleted-header problem.
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile"
;;
## The order of this option in the case statement is important, since the
## shell code in configure will try each of these formats in the order
## listed in this file. A plain '-MD' option would be understood by many
## compilers, so we must ensure this comes after the gcc and icc options.
pgcc)
# Portland's C compiler understands '-MD'.
# Will always output deps to 'file.d' where file is the root name of the
# source file under compilation, even if file resides in a subdirectory.
# The object file name does not affect the name of the '.d' file.
# pgcc 10.2 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\' :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
set_dir_from "$object"
# Use the source, not the object, to determine the base name, since
# that's sadly what pgcc will do too.
set_base_from "$source"
tmpdepfile=$base.d
# For projects that build the same source file twice into different object
# files, the pgcc approach of using the *source* file root name can cause
# problems in parallel builds. Use a locking strategy to avoid stomping on
# the same $tmpdepfile.
lockdir=$base.d-lock
trap "
echo '$0: caught signal, cleaning up...' >&2
rmdir '$lockdir'
exit 1
" 1 2 13 15
numtries=100
i=$numtries
while test $i -gt 0; do
# mkdir is a portable test-and-set.
if mkdir "$lockdir" 2>/dev/null; then
# This process acquired the lock.
"$@" -MD
stat=$?
# Release the lock.
rmdir "$lockdir"
break
else
# If the lock is being held by a different process, wait
# until the winning process is done or we timeout.
while test -d "$lockdir" && test $i -gt 0; do
sleep 1
i=`expr $i - 1`
done
fi
i=`expr $i - 1`
done
trap - 1 2 13 15
if test $i -le 0; then
echo "$0: failed to acquire lock after $numtries attempts" >&2
echo "$0: check lockdir '$lockdir'" >&2
exit 1
fi
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp2)
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
# compilers, which have integrated preprocessors. The correct option
# to use with these is +Maked; it writes dependencies to a file named
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
"$@" -Wc,+Maked
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
"$@" +Maked
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
# Add 'dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
# Libtool generates 2 separate objects for the 2 libraries. These
# two compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir$base.o.d # libtool 1.5
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
# Same post-processing that is required for AIX mode.
aix_post_process_depfile
;;
msvc7)
if test "$libtool" = yes; then
showIncludes=-Wc,-showIncludes
else
showIncludes=-showIncludes
fi
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The first sed program below extracts the file names and escapes
# backslashes for cygpath. The second sed program outputs the file
# name when reading, but also accumulates all include files in the
# hold buffer in order to output them again at the end. This only
# works with sed implementations that can handle large buffers.
sed < "$tmpdepfile" -n '
/^Note: including file: *\(.*\)/ {
s//\1/
s/\\/\\\\/g
p
}' | $cygpath_u | sort -u | sed -n '
s/ /\\ /g
s/\(.*\)/'"$tab"'\1 \\/p
s/.\(.*\) \\/\1:/
H
$ {
s/.*/'"$tab"'/
G
p
}' >> "$depfile"
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
rm -f "$tmpdepfile"
;;
msvc7msys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
dashmstdout)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for ':'
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag |
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this sed invocation
# correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
dashXmstdout)
# This case only exists to satisfy depend.m4. It is never actually
# run, as this mode is specially recognized in the preamble.
exit 1
;;
makedepend)
"$@" || exit $?
# Remove any Libtool call
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# X makedepend
shift
cleared=no eat=no
for arg
do
case $cleared in
no)
set ""; shift
cleared=yes ;;
esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in
-D*|-I*)
set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object)
;;
*)
set fnord "$@" "$arg"; shift ;;
esac
done
obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
# makedepend may prepend the VPATH from the source file name to the object.
# No need to regex-escape $object, excess matching of '.' is harmless.
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process the last invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed '1,2d' "$tmpdepfile" \
| tr ' ' "$nl" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
cpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
"$@" -E \
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
| sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvisualcpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" "
for arg
do
case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
echo "$tab" >> "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none)
exec "$@"
;;
*)
echo "Unknown depmode $depmode" 1>&2
exit 1
;;
esac
exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

View File

@ -1,87 +0,0 @@
SQLite's OS layer contains the following definitions used in F2FS related
calls:
#define F2FS_IOCTL_MAGIC 0xf5
#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32)
#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
After opening a database file on Linux (including Android), SQLite determines
whether or not a file supports F2FS atomic commits as follows:
u32 flags = 0;
rc = ioctl(fd, F2FS_IOC_GET_FEATURES, &flags);
if( rc==0 && (flags & F2FS_FEATURE_ATOMIC_WRITE) ){
/* File supports F2FS atomic commits */
}else{
/* File does NOT support F2FS atomic commits */
}
where "fd" is the file-descriptor open on the database file.
Usually, when writing to a database file that supports atomic commits, SQLite
accumulates the entire transaction in heap memory, deferring all writes to the
db file until the transaction is committed.
When it is time to commit a transaction on a file that supports atomic
commits, SQLite does:
/* Take an F_WRLCK lock on the database file. This prevents any other
** SQLite clients from reading or writing the file until the lock
** is released. */
rc = fcntl(fd, F_SETLK, ...);
if( rc!=0 ) goto failed;
rc = ioctl(fd, F2FS_IOC_START_ATOMIC_WRITE);
if( rc!=0 ) goto fallback_to_legacy_journal_commit;
foreach (dirty page){
rc = write(fd, ...dirty page...);
if( rc!=0 ){
ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE);
goto fallback_to_legacy_journal_commit;
}
}
rc = ioctl(fd, F2FS_IOC_COMMIT_ATOMIC_WRITE);
if( rc!=0 ){
ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE);
goto fallback_to_legacy_journal_commit;
}
/* If we get there, the transaction has been successfully
** committed to persistent storage. The following call
** relinquishes the F_WRLCK lock. */
fcntl(fd, F_SETLK, ...);
Assumptions:
1. After either of the F2FS_IOC_ABORT_VOLATILE_WRITE calls return,
the database file is in the state that it was in before
F2FS_IOC_START_ATOMIC_WRITE was invoked. Even if the ioctl()
fails - we're ignoring the return code.
This is true regardless of the type of error that occurred in
ioctl() or write().
2. If the system fails before the F2FS_IOC_COMMIT_ATOMIC_WRITE is
completed, then following a reboot the database file is in the
state that it was in before F2FS_IOC_START_ATOMIC_WRITE was invoked.
Or, if the write was commited right before the system failed, in a
state indicating that all write() calls were successfully committed
to persistent storage before the failure occurred.
3. If the process crashes before the F2FS_IOC_COMMIT_ATOMIC_WRITE is
completed then the file is automatically restored to the state that
it was in before F2FS_IOC_START_ATOMIC_WRITE was called. This occurs
before the posix advisory lock is automatically dropped - there is
no chance that another client will be able to read the file in a
half-committed state before the rollback operation occurs.

File diff suppressed because it is too large Load Diff

View File

@ -1,76 +0,0 @@
*** Throughout this document, a page is deemed to have been synced
automatically as soon as it is written when PRAGMA synchronous=OFF.
Otherwise, the page is not synced until the xSync method of the VFS
is called successfully on the file containing the page.
*** Definition: A page of the database file is said to be "overwriteable" if
one or more of the following are true about the page:
(a) The original content of the page as it was at the beginning of
the transaction has been written into the rollback journal and
synced.
(b) The page was a freelist leaf page at the start of the transaction.
(c) The page number is greater than the largest page that existed in
the database file at the start of the transaction.
(1) A page of the database file is never overwritten unless one of the
following are true:
(a) The page and all other pages on the same sector are overwriteable.
(b) The atomic page write optimization is enabled, and the entire
transaction other than the update of the transaction sequence
number consists of a single page change.
(2) The content of a page written into the rollback journal exactly matches
both the content in the database when the rollback journal was written
and the content in the database at the beginning of the current
transaction.
(3) Writes to the database file are an integer multiple of the page size
in length and are aligned to a page boundary.
(4) Reads from the database file are either aligned on a page boundary and
an integer multiple of the page size in length or are taken from the
first 100 bytes of the database file.
(5) All writes to the database file are synced prior to the rollback journal
being deleted, truncated, or zeroed.
(6) If a master journal file is used, then all writes to the database file
are synced prior to the master journal being deleted.
*** Definition: Two databases (or the same database at two points it time)
are said to be "logically equivalent" if they give the same answer to
all queries. Note in particular the content of freelist leaf
pages can be changed arbitarily without effecting the logical equivalence
of the database.
(7) At any time, if any subset, including the empty set and the total set,
of the unsynced changes to a rollback journal are removed and the
journal is rolled back, the resulting database file will be logical
equivalent to the database file at the beginning of the transaction.
(8) When a transaction is rolled back, the xTruncate method of the VFS
is called to restore the database file to the same size it was at
the beginning of the transaction. (In some VFSes, the xTruncate
method is a no-op, but that does not change the fact the SQLite will
invoke it.)
(9) Whenever the database file is modified, at least one bit in the range
of bytes from 24 through 39 inclusive will be changed prior to releasing
the EXCLUSIVE lock.
(10) The pattern of bits in bytes 24 through 39 shall not repeat in less
than one billion transactions.
(11) A database file is well-formed at the beginning and at the conclusion
of every transaction.
(12) An EXCLUSIVE lock must be held on the database file before making
any changes to the database file.
(13) A SHARED lock must be held on the database file before reading any
content out of the database file.

View File

@ -1,142 +0,0 @@
# The new-security-options branch
## The problem that the [new-security-options](/timeline?r=new-security-options) branch tries to solve
An attacker might modify the schema of an SQLite database by adding
structures that cause code to run when some other application opens and
reads the database. For example, the attacker might replace a table
definition with a view. Or the attacker might add triggers to tables
or views, or add new CHECK constraints or generated columns or indexes
with expressions in the index list or in the WHERE clause. If the
added features invoke SQL functions or virtual tables with side effects,
that might cause harm to the system if run by a high-privilege victim.
Or, the added features might exfiltrate information if the database is
read by a high-privilege victim.
The changes in this branch strive to make it easier for high-privilege
applications to safely read SQLite database files that might have been
maliciously corrupted by an attacker.
## Overview of changes in [new-security-options](/timeline?r=new-security-options)
The basic idea is to tag every SQL function and virtual table with one
of three risk levels:
1. Innocuous
2. Normal
3. Direct-Only
Innocuous functions/vtabs are safe and can be used at any time.
Direct-only elements, in contrast, might have cause side-effects and
should only be used from top-level SQL, not from within triggers or views nor
in elements of the schema such as CHECK constraint, DEFAULT values,
generated columns, index expressions, or in the WHERE clause of a
partial index that are potentially under the control of an attacker.
Normal elements behave like Innocuous if TRUSTED\_SCHEMA=on
and behave like direct-only if TRUSTED\_SCHEMA=off.
Application-defined functions and virtual tables go in as Normal unless
the application takes deliberate steps to change the risk level.
For backwards compatibility, the default is TRUSTED\_SCHEMA=on. Documentation
will be updated to recommend applications turn TRUSTED\_SCHEMA to off.
An innocuous function or virtual table is one that can only read content
from the database file in which it resides, and can only alter the database
in which it resides. Most SQL functions are innocuous. For example, there
is no harm in an attacker running the abs() function.
Direct-only elements that have side-effects that go outside the database file
in which it lives, or return information from outside of the database file.
Examples of direct-only elements include:
1. The fts3\_tokenizer() function
2. The writefile() function
3. The readfile() function
4. The zipvfs virtual table
5. The csv virtual table
We do not want an attacker to be able to add these kinds of things to
the database schema and possibly trick a high-privilege application
from performing any of these actions. Therefore, functions and vtabs
with side-effects are marked as Direct-Only.
Legacy applications might add other risky functions or vtabs. Those will
go in as "Normal" by default. For optimal security, we want those risky
app-defined functions and vtabs to be direct-only, but making that the
default might break some legacy applications. Hence, all app-defined
functions and vtabs go in as Normal, but the application can switch them
over to "Direct-Only" behavior using a single pragma.
The restrictions on the use of functions and virtual tables do not apply
to TEMP. A TEMP VIEW or a TEMP TRIGGER can use any valid SQL function
or virtual table. The idea is that TEMP views and triggers must be
directly created by the application and are thus under the control of the
application. TEMP views and triggers cannot be created by an attacker who
corrupts the schema of a persistent database file. Hence TEMP views and
triggers are safe.
## Specific changes
1. New sqlite3\_db\_config() option SQLITE\_DBCONFIG\_TRUSTED\_SCHEMA for
turning TRUSTED\_SCHEMA on and off. It defaults to ON.
2. Compile-time option -DSQLITE\_TRUSTED\_SCHEMA=0 causes the default
TRUSTED\_SCHEMA setting to be off.
3. New pragma "PRAGMA trusted\_schema=(ON\|OFF);". This provides access
to the TRUSTED_SCHEMA setting for application coded using scripting
languages or other secondary languages where they are unable to make
calls to sqlite3\_db\_config().
4. New options for the "enc" parameter to sqlite3\_create\_function() and
its kin:
<ol type="a">
<li> _SQLITE\_INNOCUOUS_ &rarr; tags the new functions as Innocuous
<li> _SQLITE\_DIRECTONLY_ &rarr; tags the new functions as Direct-Only
</ol>
5. New options to sqlite3\_vtab\_config():
<ol type="a">
<li> _SQLITE\_VTAB\_INNOCUOUS_ &rarr; tags the vtab as Innocuous
<li> _SQLITE\_VTAB\_DIRECTONLY_ &rarr; tags the vtab as Direct-Only
</ol>
6. Change many of the functions and virtual tables in the SQLite source
tree to use one of the tags above.
7. Enhanced PRAGMA function\_list and virtual-table "pragma\_function\_list"
with additional columns. The columns now are:
<ul>
<li> _name_ &rarr; Name of the function
<li> _builtin_ &rarr; 1 for built-in functions. 0 otherwise.
<li> _type_ &rarr; 's'=Scalar, 'a'=Aggregate, 'w'=Window
<li> _enc_ &rarr; 'utf8', 'utf16le', or 'utf16be'
<li> _narg_ &rarr; number of argument
<li> _flags_ &rarr; Bitmask of SQLITE\_INNOCUOUS, SQLITE\_DIRECTONLY,
SQLITE\_DETERMINISTIC, SQLITE\_SUBTYPE, and
SQLITE\_FUNC\_INTERNAL flags.
</ul>
<p>The last four columns are new.
8. The function\_list PRAGMA now also shows all entries for each function.
So, for example, if a function can take either 2 or 3 arguments,
there are separate rows for the 2-argument and 3-argument versions of
the function.
## Additional Notes
The function_list enhancements allow the application to query the set
of SQL functions that meet various criteria. For example, to see all
SQL functions that are never allowed to be used in the schema or in
trigger or views:
~~~
SELECT DISTINCT name FROM pragma_function_list
WHERE (flags & 0x80000)!=0
ORDER BY name;
~~~
Doing the same is not possible for virtual tables, as a virtual table
might be Innocuous, Normal, or Direct-Only depending on the arguments
passed into the xConnect method.

View File

@ -1,49 +0,0 @@
20-11-2020
# Memory Allocation In vdbesort.c
Memory allocation is slightly different depending on:
* whether or not SQLITE_CONFIG_SMALL_MALLOC is set, and
* whether or not worker threads are enabled.
## SQLITE_CONFIG_SMALL_MALLOC=0
Assuming SQLITE_CONFIG_SMALL_MALLOC is not set, keys passed to the sorter are
added to an in-memory buffer. This buffer is grown using sqlite3Realloc() as
required it reaches the size configured for the main pager cache using "PRAGMA
cache_size". i.e. if the user has executed "PRAGMA main.cache_size = -2048",
then this buffer is allowed to grow up to 2MB in size.
Once the buffer has grown to its threshold, keys are sorted and written to
a temp file. If worker threads are not enabled, this is the only significant
allocation the sorter module makes. After keys are sorted and flushed out to
the temp file, the buffer is reused to accumulate the next batch of keys.
If worker threads are available, then the buffer is passed to a worker thread
to sort and flush once it is full, and a new buffer allocated to allow the
main thread to continue to accumulate keys. Buffers are reused once they
have been flushed, so in this case at most (nWorker+1) buffers are allocated
and used, where nWorker is the number of configured worker threads.
There are no other significant users of heap memory in the sorter module.
Once sorted buffers of keys have been flushed to disk, they are read back
either by mapping the file (via sqlite3_file.xFetch()) or else read back
in one page at a time.
All buffers are allocated by the main thread. A sorter object is associated
with a single database connection, to which it holds a pointer.
## SQLITE_CONFIG_SMALL_MALLOC=1
This case is similar to the above, except that instead of accumulating
multiple keys in a single large buffer, sqlite3VdbeSorterWrite() stores
keys in a regular heap-memory linked list (one allocation per element).
List elements are freed as they are flushed to disk, either by the main
thread or by a worker thread.
Each time a key is added the sorter (and an allocation made),
sqlite3HeapNearlyFull() is called. If it returns true, the current
list of keys is flushed to a temporary file, even if it has not yet
reached the size threshold.

View File

@ -1,130 +0,0 @@
The 5 states of an historical rollback lock as implemented by the
xLock, xUnlock, and xCheckReservedLock methods of the sqlite3_io_methods
objec are:
UNLOCKED
SHARED
RESERVED
PENDING
EXCLUSIVE
The wal-index file has a similar locking hierarchy implemented using
the xShmLock method of the sqlite3_vfs object, but with 7
states. Each connection to a wal-index file must be in one of
the following 7 states:
UNLOCKED
READ
READ_FULL
WRITE
PENDING
CHECKPOINT
RECOVER
These roughly correspond to the 5 states of a rollback lock except
that SHARED is split out into 2 states: READ and READ_FULL and
there is an extra RECOVER state used for wal-index reconstruction.
The meanings of the various wal-index locking states is as follows:
UNLOCKED - The wal-index is not in use.
READ - Some prefix of the wal-index is being read. Additional
wal-index information can be appended at any time. The
newly appended content will be ignored by the holder of
the READ lock.
READ_FULL - The entire wal-index is being read. No new information
can be added to the wal-index. The holder of a READ_FULL
lock promises never to read pages from the database file
that are available anywhere in the wal-index.
WRITE - It is OK to append to the wal-index file and to adjust
the header to indicate the new "last valid frame".
PENDING - Waiting on all READ locks to clear so that a
CHECKPOINT lock can be acquired.
CHECKPOINT - It is OK to write any WAL data into the database file
and zero the last valid frame field of the wal-index
header. The wal-index file itself may not be changed
other than to zero the last valid frame field in the
header.
RECOVER - Held during wal-index recovery. Used to prevent a
race if multiple clients try to recover a wal-index at
the same time.
A particular lock manager implementation may coalesce one or more of
the wal-index locking states, though with a reduction in concurrency.
For example, an implemention might implement only exclusive locking,
in which case all states would be equivalent to CHECKPOINT, meaning that
only one reader or one writer or one checkpointer could be active at a
time. Or, an implementation might combine READ and READ_FULL into
a single state equivalent to READ, meaning that a writer could
coexist with a reader, but no reader or writers could coexist with a
checkpointer.
The lock manager must obey the following rules:
(1) A READ cannot coexist with CHECKPOINT.
(2) A READ_FULL cannot coexist with WRITE.
(3) None of WRITE, PENDING, CHECKPOINT, or RECOVER can coexist.
The SQLite core will obey the next set of rules. These rules are
assertions on the behavior of the SQLite core which might be verified
during testing using an instrumented lock manager.
(5) No part of the wal-index will be read without holding either some
kind of SHM lock or an EXCLUSIVE lock on the original database.
The original database is the file named in the 2nd parameter to
the xShmOpen method.
(6) A holder of a READ_FULL will never read any page of the database
file that is contained anywhere in the wal-index.
(7) No part of the wal-index other than the header will be written nor
will the size of the wal-index grow without holding a WRITE or
an EXCLUSIVE on the original database file.
(8) The wal-index header will not be written without holding one of
WRITE, CHECKPOINT, or RECOVER on the wal-index or an EXCLUSIVE on
the original database files.
(9) A CHECKPOINT or RECOVER must be held on the wal-index, or an
EXCLUSIVE on the original database file, in order to reset the
last valid frame counter in the header of the wal-index back to zero.
(10) A WRITE can only increase the last valid frame pointer in the header.
The SQLite core will only ever send requests for UNLOCK, READ, WRITE,
CHECKPOINT, or RECOVER to the lock manager. The SQLite core will never
request a READ_FULL or PENDING lock though the lock manager may deliver
those locking states in response to READ and CHECKPOINT requests,
respectively, if and only if the requested READ or CHECKPOINT cannot
be delivered.
The following are the allowed lock transitions:
Original-State Request New-State
-------------- ---------- ----------
(11a) UNLOCK READ READ
(11b) UNLOCK READ READ_FULL
(11c) UNLOCK CHECKPOINT PENDING
(11d) UNLOCK CHECKPOINT CHECKPOINT
(11e) READ UNLOCK UNLOCK
(11f) READ WRITE WRITE
(11g) READ RECOVER RECOVER
(11h) READ_FULL UNLOCK UNLOCK
(11i) READ_FULL WRITE WRITE
(11j) READ_FULL RECOVER RECOVER
(11k) WRITE READ READ
(11l) PENDING UNLOCK UNLOCK
(11m) PENDING CHECKPOINT CHECKPOINT
(11n) CHECKPOINT UNLOCK UNLOCK
(11o) RECOVER READ READ
These 15 transitions are all that needs to be supported. The lock
manager implementation can assert that fact. The other 27 possible
transitions among the 7 locking states will never occur.

View File

@ -1,88 +0,0 @@
# Wal-Mode Blocking Locks
On some Unix-like systems, SQLite may be configured to use POSIX blocking locks
by:
* building the library with SQLITE\_ENABLE\_SETLK\_TIMEOUT defined, and
* configuring a timeout in ms using the sqlite3\_busy\_timeout() API.
Blocking locks may be advantageous as (a) waiting database clients do not
need to continuously poll the database lock, and (b) using blocking locks
facilitates transfer of OS priority between processes when a high priority
process is blocked by a lower priority one.
Only read/write clients use blocking locks. Clients that have read-only access
to the \*-shm file nevery use blocking locks.
Threads or processes that access a single database at a time never deadlock as
a result of blocking database locks. But it is of course possible for threads
that lock multiple databases simultaneously to do so. In most cases the OS will
detect the deadlock and return an error.
## Wal Recovery
Wal database "recovery" is a process required when the number of connected
database clients changes from zero to one. In this case, a client is
considered to connect to the database when it first reads data from it.
Before recovery commences, an exclusive WRITER lock is taken.
Without blocking locks, if two clients attempt recovery simultaneously, one
fails to obtain the WRITER lock and either invokes the busy-handler callback or
returns SQLITE\_BUSY to the user. With blocking locks configured, the second
client blocks on the WRITER lock.
## Database Readers
Usually, read-only are not blocked by any other database clients, so they
have no need of blocking locks.
If a read-only transaction is being opened on a snapshot, the CHECKPOINTER
lock is required briefly as part of opening the transaction (to check that a
checkpointer is not currently overwriting the snapshot being opened). A
blocking lock is used to obtain the CHECKPOINTER lock in this case. A snapshot
opener may therefore block on and transfer priority to a checkpointer in some
cases.
## Database Writers
A database writer must obtain the exclusive WRITER lock. It uses a blocking
lock to do so if any of the following are true:
* the transaction is an implicit one consisting of a single DML or DDL
statement, or
* the transaction is opened using BEGIN IMMEDIATE or BEGIN EXCLUSIVE, or
* the first SQL statement executed following the BEGIN command is a DML or
DDL statement (not a read-only statement like a SELECT).
In other words, in all cases except when an open read-transaction is upgraded
to a write-transaction. In that case a non-blocking lock is used.
## Database Checkpointers
Database checkpointers takes the following locks, in order:
* The exclusive CHECKPOINTER lock.
* The exclusive WRITER lock (FULL, RESTART and TRUNCATE only).
* Exclusive lock on read-mark slots 1-N. These are immediately released after being taken.
* Exclusive lock on read-mark 0.
* Exclusive lock on read-mark slots 1-N again. These are immediately released
after being taken (RESTART and TRUNCATE only).
All of the above use blocking locks.
## Summary
With blocking locks configured, the only cases in which clients should see an
SQLITE\_BUSY error are:
* if the OS does not grant a blocking lock before the configured timeout
expires, and
* when an open read-transaction is upgraded to a write-transaction.
In all other cases the blocking locks implementation should prevent clients
from having to handle SQLITE\_BUSY errors and facilitate appropriate transfer
of priorities between competing clients.
Clients that lock multiple databases simultaneously must be wary of deadlock.

View File

@ -1,8 +0,0 @@
## Loadable Extensions
Various [loadable extensions](https://www.sqlite.org/loadext.html) for
SQLite are found in subfolders.
Most subfolders are dedicated to a single loadable extension (for
example FTS5, or RTREE). But the misc/ subfolder contains a collection
of smaller single-file extensions.

View File

@ -1,170 +0,0 @@
NOTE (2012-11-29):
The functionality implemented by this extension has been superseded
by WAL-mode. This module is no longer supported or maintained. The
code is retained for historical reference only.
------------------------------------------------------------------------------
Normally, when SQLite writes to a database file, it waits until the write
operation is finished before returning control to the calling application.
Since writing to the file-system is usually very slow compared with CPU
bound operations, this can be a performance bottleneck. This directory
contains an extension that causes SQLite to perform all write requests
using a separate thread running in the background. Although this does not
reduce the overall system resources (CPU, disk bandwidth etc.) at all, it
allows SQLite to return control to the caller quickly even when writing to
the database, eliminating the bottleneck.
1. Functionality
1.1 How it Works
1.2 Limitations
1.3 Locking and Concurrency
2. Compilation and Usage
3. Porting
1. FUNCTIONALITY
With asynchronous I/O, write requests are handled by a separate thread
running in the background. This means that the thread that initiates
a database write does not have to wait for (sometimes slow) disk I/O
to occur. The write seems to happen very quickly, though in reality
it is happening at its usual slow pace in the background.
Asynchronous I/O appears to give better responsiveness, but at a price.
You lose the Durable property. With the default I/O backend of SQLite,
once a write completes, you know that the information you wrote is
safely on disk. With the asynchronous I/O, this is not the case. If
your program crashes or if a power loss occurs after the database
write but before the asynchronous write thread has completed, then the
database change might never make it to disk and the next user of the
database might not see your change.
You lose Durability with asynchronous I/O, but you still retain the
other parts of ACID: Atomic, Consistent, and Isolated. Many
appliations get along fine without the Durablity.
1.1 How it Works
Asynchronous I/O works by creating a special SQLite "vfs" structure
and registering it with sqlite3_vfs_register(). When files opened via
this vfs are written to (using the vfs xWrite() method), the data is not
written directly to disk, but is placed in the "write-queue" to be
handled by the background thread.
When files opened with the asynchronous vfs are read from
(using the vfs xRead() method), the data is read from the file on
disk and the write-queue, so that from the point of view of
the vfs reader the xWrite() appears to have already completed.
The special vfs is registered (and unregistered) by calls to the
API functions sqlite3async_initialize() and sqlite3async_shutdown().
See section "Compilation and Usage" below for details.
1.2 Limitations
In order to gain experience with the main ideas surrounding asynchronous
IO, this implementation is deliberately kept simple. Additional
capabilities may be added in the future.
For example, as currently implemented, if writes are happening at a
steady stream that exceeds the I/O capability of the background writer
thread, the queue of pending write operations will grow without bound.
If this goes on for long enough, the host system could run out of memory.
A more sophisticated module could to keep track of the quantity of
pending writes and stop accepting new write requests when the queue of
pending writes grows too large.
1.3 Locking and Concurrency
Multiple connections from within a single process that use this
implementation of asynchronous IO may access a single database
file concurrently. From the point of view of the user, if all
connections are from within a single process, there is no difference
between the concurrency offered by "normal" SQLite and SQLite
using the asynchronous backend.
If file-locking is enabled (it is enabled by default), then connections
from multiple processes may also read and write the database file.
However concurrency is reduced as follows:
* When a connection using asynchronous IO begins a database
transaction, the database is locked immediately. However the
lock is not released until after all relevant operations
in the write-queue have been flushed to disk. This means
(for example) that the database may remain locked for some
time after a "COMMIT" or "ROLLBACK" is issued.
* If an application using asynchronous IO executes transactions
in quick succession, other database users may be effectively
locked out of the database. This is because when a BEGIN
is executed, a database lock is established immediately. But
when the corresponding COMMIT or ROLLBACK occurs, the lock
is not released until the relevant part of the write-queue
has been flushed through. As a result, if a COMMIT is followed
by a BEGIN before the write-queue is flushed through, the database
is never unlocked,preventing other processes from accessing
the database.
File-locking may be disabled at runtime using the sqlite3async_control()
API (see below). This may improve performance when an NFS or other
network file-system, as the synchronous round-trips to the server be
required to establish file locks are avoided. However, if multiple
connections attempt to access the same database file when file-locking
is disabled, application crashes and database corruption is a likely
outcome.
2. COMPILATION AND USAGE
The asynchronous IO extension consists of a single file of C code
(sqlite3async.c), and a header file (sqlite3async.h) that defines the
C API used by applications to activate and control the modules
functionality.
To use the asynchronous IO extension, compile sqlite3async.c as
part of the application that uses SQLite. Then use the API defined
in sqlite3async.h to initialize and configure the module.
The asynchronous IO VFS API is described in detail in comments in
sqlite3async.h. Using the API usually consists of the following steps:
1. Register the asynchronous IO VFS with SQLite by calling the
sqlite3async_initialize() function.
2. Create a background thread to perform write operations and call
sqlite3async_run().
3. Use the normal SQLite API to read and write to databases via
the asynchronous IO VFS.
Refer to sqlite3async.h for details.
3. PORTING
Currently the asynchronous IO extension is compatible with win32 systems
and systems that support the pthreads interface, including Mac OSX, Linux,
and other varieties of Unix.
To port the asynchronous IO extension to another platform, the user must
implement mutex and condition variable primitives for the new platform.
Currently there is no externally available interface to allow this, but
modifying the code within sqlite3async.c to include the new platforms
concurrency primitives is relatively easy. Search within sqlite3async.c
for the comment string "PORTING FUNCTIONS" for details. Then implement
new versions of each of the following:
static void async_mutex_enter(int eMutex);
static void async_mutex_leave(int eMutex);
static void async_cond_wait(int eCond, int eMutex);
static void async_cond_signal(int eCond);
static void async_sched_yield(void);
The functionality required of each of the above functions is described
in comments in sqlite3async.c.

File diff suppressed because it is too large Load Diff

View File

@ -1,222 +0,0 @@
#ifndef __SQLITEASYNC_H_
#define __SQLITEASYNC_H_ 1
/*
** Make sure we can call this stuff from C++.
*/
#ifdef __cplusplus
extern "C" {
#endif
#define SQLITEASYNC_VFSNAME "sqlite3async"
/*
** THREAD SAFETY NOTES:
**
** Of the four API functions in this file, the following are not threadsafe:
**
** sqlite3async_initialize()
** sqlite3async_shutdown()
**
** Care must be taken that neither of these functions is called while
** another thread may be calling either any sqlite3async_XXX() function
** or an sqlite3_XXX() API function related to a database handle that
** is using the asynchronous IO VFS.
**
** These functions:
**
** sqlite3async_run()
** sqlite3async_control()
**
** are threadsafe. It is quite safe to call either of these functions even
** if another thread may also be calling one of them or an sqlite3_XXX()
** function related to a database handle that uses the asynchronous IO VFS.
*/
/*
** Initialize the asynchronous IO VFS and register it with SQLite using
** sqlite3_vfs_register(). If the asynchronous VFS is already initialized
** and registered, this function is a no-op. The asynchronous IO VFS
** is registered as "sqlite3async".
**
** The asynchronous IO VFS does not make operating system IO requests
** directly. Instead, it uses an existing VFS implementation for all
** required file-system operations. If the first parameter to this function
** is NULL, then the current default VFS is used for IO. If it is not
** NULL, then it must be the name of an existing VFS. In other words, the
** first argument to this function is passed to sqlite3_vfs_find() to
** locate the VFS to use for all real IO operations. This VFS is known
** as the "parent VFS".
**
** If the second parameter to this function is non-zero, then the
** asynchronous IO VFS is registered as the default VFS for all SQLite
** database connections within the process. Otherwise, the asynchronous IO
** VFS is only used by connections opened using sqlite3_open_v2() that
** specifically request VFS "sqlite3async".
**
** If a parent VFS cannot be located, then SQLITE_ERROR is returned.
** In the unlikely event that operating system specific initialization
** fails (win32 systems create the required critical section and event
** objects within this function), then SQLITE_ERROR is also returned.
** Finally, if the call to sqlite3_vfs_register() returns an error, then
** the error code is returned to the user by this function. In all three
** of these cases, intialization has failed and the asynchronous IO VFS
** is not registered with SQLite.
**
** Otherwise, if no error occurs, SQLITE_OK is returned.
*/
int sqlite3async_initialize(const char *zParent, int isDefault);
/*
** This function unregisters the asynchronous IO VFS using
** sqlite3_vfs_unregister().
**
** On win32 platforms, this function also releases the small number of
** critical section and event objects created by sqlite3async_initialize().
*/
void sqlite3async_shutdown(void);
/*
** This function may only be called when the asynchronous IO VFS is
** installed (after a call to sqlite3async_initialize()). It processes
** zero or more queued write operations before returning. It is expected
** (but not required) that this function will be called by a different
** thread than those threads that use SQLite. The "background thread"
** that performs IO.
**
** How many queued write operations are performed before returning
** depends on the global setting configured by passing the SQLITEASYNC_HALT
** verb to sqlite3async_control() (see below for details). By default
** this function never returns - it processes all pending operations and
** then blocks waiting for new ones.
**
** If multiple simultaneous calls are made to sqlite3async_run() from two
** or more threads, then the calls are serialized internally.
*/
void sqlite3async_run(void);
/*
** This function may only be called when the asynchronous IO VFS is
** installed (after a call to sqlite3async_initialize()). It is used
** to query or configure various parameters that affect the operation
** of the asynchronous IO VFS. At present there are three parameters
** supported:
**
** * The "halt" parameter, which configures the circumstances under
** which the sqlite3async_run() parameter is configured.
**
** * The "delay" parameter. Setting the delay parameter to a non-zero
** value causes the sqlite3async_run() function to sleep for the
** configured number of milliseconds between each queued write
** operation.
**
** * The "lockfiles" parameter. This parameter determines whether or
** not the asynchronous IO VFS locks the database files it operates
** on. Disabling file locking can improve throughput.
**
** This function is always passed two arguments. When setting the value
** of a parameter, the first argument must be one of SQLITEASYNC_HALT,
** SQLITEASYNC_DELAY or SQLITEASYNC_LOCKFILES. The second argument must
** be passed the new value for the parameter as type "int".
**
** When querying the current value of a paramter, the first argument must
** be one of SQLITEASYNC_GET_HALT, GET_DELAY or GET_LOCKFILES. The second
** argument to this function must be of type (int *). The current value
** of the queried parameter is copied to the memory pointed to by the
** second argument. For example:
**
** int eCurrentHalt;
** int eNewHalt = SQLITEASYNC_HALT_IDLE;
**
** sqlite3async_control(SQLITEASYNC_HALT, eNewHalt);
** sqlite3async_control(SQLITEASYNC_GET_HALT, &eCurrentHalt);
** assert( eNewHalt==eCurrentHalt );
**
** See below for more detail on each configuration parameter.
**
** SQLITEASYNC_HALT:
**
** This is used to set the value of the "halt" parameter. The second
** argument must be one of the SQLITEASYNC_HALT_XXX symbols defined
** below (either NEVER, IDLE and NOW).
**
** If the parameter is set to NEVER, then calls to sqlite3async_run()
** never return. This is the default setting. If the parameter is set
** to IDLE, then calls to sqlite3async_run() return as soon as the
** queue of pending write operations is empty. If the parameter is set
** to NOW, then calls to sqlite3async_run() return as quickly as
** possible, without processing any pending write requests.
**
** If an attempt is made to set this parameter to an integer value other
** than SQLITEASYNC_HALT_NEVER, IDLE or NOW, then sqlite3async_control()
** returns SQLITE_MISUSE and the current value of the parameter is not
** modified.
**
** Modifying the "halt" parameter affects calls to sqlite3async_run()
** made by other threads that are currently in progress.
**
** SQLITEASYNC_DELAY:
**
** This is used to set the value of the "delay" parameter. If set to
** a non-zero value, then after completing a pending write request, the
** sqlite3async_run() function sleeps for the configured number of
** milliseconds.
**
** If an attempt is made to set this parameter to a negative value,
** sqlite3async_control() returns SQLITE_MISUSE and the current value
** of the parameter is not modified.
**
** Modifying the "delay" parameter affects calls to sqlite3async_run()
** made by other threads that are currently in progress.
**
** SQLITEASYNC_LOCKFILES:
**
** This is used to set the value of the "lockfiles" parameter. This
** parameter must be set to either 0 or 1. If set to 1, then the
** asynchronous IO VFS uses the xLock() and xUnlock() methods of the
** parent VFS to lock database files being read and/or written. If
** the parameter is set to 0, then these locks are omitted.
**
** This parameter may only be set when there are no open database
** connections using the VFS and the queue of pending write requests
** is empty. Attempting to set it when this is not true, or to set it
** to a value other than 0 or 1 causes sqlite3async_control() to return
** SQLITE_MISUSE and the value of the parameter to remain unchanged.
**
** If this parameter is set to zero, then it is only safe to access the
** database via the asynchronous IO VFS from within a single process. If
** while writing to the database via the asynchronous IO VFS the database
** is also read or written from within another process, or via another
** connection that does not use the asynchronous IO VFS within the same
** process, the results are undefined (and may include crashes or database
** corruption).
**
** Alternatively, if this parameter is set to 1, then it is safe to access
** the database from multiple connections within multiple processes using
** either the asynchronous IO VFS or the parent VFS directly.
*/
int sqlite3async_control(int op, ...);
/*
** Values that can be used as the first argument to sqlite3async_control().
*/
#define SQLITEASYNC_HALT 1
#define SQLITEASYNC_GET_HALT 2
#define SQLITEASYNC_DELAY 3
#define SQLITEASYNC_GET_DELAY 4
#define SQLITEASYNC_LOCKFILES 5
#define SQLITEASYNC_GET_LOCKFILES 6
/*
** If the first argument to sqlite3async_control() is SQLITEASYNC_HALT,
** the second argument should be one of the following.
*/
#define SQLITEASYNC_HALT_NEVER 0 /* Never halt (default value) */
#define SQLITEASYNC_HALT_NOW 1 /* Halt as soon as possible */
#define SQLITEASYNC_HALT_IDLE 2 /* Halt when write-queue is empty */
#ifdef __cplusplus
} /* End of the 'extern "C"' block */
#endif
#endif /* ifndef __SQLITEASYNC_H_ */

View File

@ -1,83 +0,0 @@
## SQLite Expert Extension
This folder contains code for a simple system to propose useful indexes
given a database and a set of SQL queries. It works as follows:
1. The user database schema is copied to a temporary database.
1. All SQL queries are prepared against the temporary database.
Information regarding the WHERE and ORDER BY clauses, and other query
features that affect index selection are recorded.
1. The information gathered in step 2 is used to create candidate
indexes - indexes that the planner might have made use of in the previous
step, had they been available.
1. A subset of the data in the user database is used to generate statistics
for all existing indexes and the candidate indexes generated in step 3
above.
1. The SQL queries are prepared a second time. If the planner uses any
of the indexes created in step 3, they are recommended to the user.
# C API
The SQLite expert C API is defined in sqlite3expert.h. Most uses will proceed
as follows:
1. An sqlite3expert object is created by calling **sqlite3\_expert\_new()**.
A database handle opened by the user is passed as an argument.
1. The sqlite3expert object is configured with one or more SQL statements
by making one or more calls to **sqlite3\_expert\_sql()**. Each call may
specify a single SQL statement, or multiple statements separated by
semi-colons.
1. Optionally, the **sqlite3\_expert\_config()** API may be used to
configure the size of the data subset used to generate index statistics.
Using a smaller subset of the data can speed up the analysis.
1. **sqlite3\_expert\_analyze()** is called to run the analysis.
1. One or more calls are made to **sqlite3\_expert\_report()** to extract
components of the results of the analysis.
1. **sqlite3\_expert\_destroy()** is called to free all resources.
Refer to comments in sqlite3expert.h for further details.
# sqlite3_expert application
The file "expert.c" contains the code for a command line application that
uses the API described above. It can be compiled with (for example):
<pre>
gcc -O2 sqlite3.c expert.c sqlite3expert.c -o sqlite3_expert
</pre>
Assuming the database is named "test.db", it can then be run to analyze a
single query:
<pre>
./sqlite3_expert -sql &lt;sql-query&gt; test.db
</pre>
Or an entire text file worth of queries with:
<pre>
./sqlite3_expert -file &lt;text-file&gt; test.db
</pre>
By default, sqlite3\_expert generates index statistics using all the data in
the user database. For a large database, this may be prohibitively time
consuming. The "-sample" option may be used to configure sqlite3\_expert to
generate statistics based on an integer percentage of the user database as
follows:
<pre>
# Generate statistics based on 25% of the user database rows:
./sqlite3_expert -sample 25 -sql &lt;sql-query&gt; test.db
# Do not generate any statistics at all:
./sqlite3_expert -sample 0 -sql &lt;sql-query&gt; test.db
</pre>

View File

@ -1,156 +0,0 @@
/*
** 2017 April 07
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
*/
#include <sqlite3.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite3expert.h"
static void option_requires_argument(const char *zOpt){
fprintf(stderr, "Option requires an argument: %s\n", zOpt);
exit(-3);
}
static int option_integer_arg(const char *zVal){
return atoi(zVal);
}
static void usage(char **argv){
fprintf(stderr, "\n");
fprintf(stderr, "Usage %s ?OPTIONS? DATABASE\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "Options are:\n");
fprintf(stderr, " -sql SQL (analyze SQL statements passed as argument)\n");
fprintf(stderr, " -file FILE (read SQL statements from file FILE)\n");
fprintf(stderr, " -verbose LEVEL (integer verbosity level. default 1)\n");
fprintf(stderr, " -sample PERCENT (percent of db to sample. default 100)\n");
exit(-1);
}
static int readSqlFromFile(sqlite3expert *p, const char *zFile, char **pzErr){
FILE *in = fopen(zFile, "rb");
long nIn;
size_t nRead;
char *pBuf;
int rc;
if( in==0 ){
*pzErr = sqlite3_mprintf("failed to open file %s\n", zFile);
return SQLITE_ERROR;
}
fseek(in, 0, SEEK_END);
nIn = ftell(in);
rewind(in);
pBuf = sqlite3_malloc64( nIn+1 );
nRead = fread(pBuf, nIn, 1, in);
fclose(in);
if( nRead!=1 ){
sqlite3_free(pBuf);
*pzErr = sqlite3_mprintf("failed to read file %s\n", zFile);
return SQLITE_ERROR;
}
pBuf[nIn] = 0;
rc = sqlite3_expert_sql(p, pBuf, pzErr);
sqlite3_free(pBuf);
return rc;
}
int main(int argc, char **argv){
const char *zDb;
int rc = 0;
char *zErr = 0;
int i;
int iVerbose = 1; /* -verbose option */
sqlite3 *db = 0;
sqlite3expert *p = 0;
if( argc<2 ) usage(argv);
zDb = argv[argc-1];
if( zDb[0]=='-' ) usage(argv);
rc = sqlite3_open(zDb, &db);
if( rc!=SQLITE_OK ){
fprintf(stderr, "Cannot open db file: %s - %s\n", zDb, sqlite3_errmsg(db));
exit(-2);
}
p = sqlite3_expert_new(db, &zErr);
if( p==0 ){
fprintf(stderr, "Cannot run analysis: %s\n", zErr);
rc = 1;
}else{
for(i=1; i<(argc-1); i++){
char *zArg = argv[i];
int nArg;
if( zArg[0]=='-' && zArg[1]=='-' && zArg[2]!=0 ) zArg++;
nArg = (int)strlen(zArg);
if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-file", nArg) ){
if( ++i==(argc-1) ) option_requires_argument("-file");
rc = readSqlFromFile(p, argv[i], &zErr);
}
else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sql", nArg) ){
if( ++i==(argc-1) ) option_requires_argument("-sql");
rc = sqlite3_expert_sql(p, argv[i], &zErr);
}
else if( nArg>=3 && 0==sqlite3_strnicmp(zArg, "-sample", nArg) ){
int iSample;
if( ++i==(argc-1) ) option_requires_argument("-sample");
iSample = option_integer_arg(argv[i]);
sqlite3_expert_config(p, EXPERT_CONFIG_SAMPLE, iSample);
}
else if( nArg>=2 && 0==sqlite3_strnicmp(zArg, "-verbose", nArg) ){
if( ++i==(argc-1) ) option_requires_argument("-verbose");
iVerbose = option_integer_arg(argv[i]);
}
else{
usage(argv);
}
}
}
if( rc==SQLITE_OK ){
rc = sqlite3_expert_analyze(p, &zErr);
}
if( rc==SQLITE_OK ){
int nQuery = sqlite3_expert_count(p);
if( iVerbose>0 ){
const char *zCand = sqlite3_expert_report(p,0,EXPERT_REPORT_CANDIDATES);
fprintf(stdout, "-- Candidates -------------------------------\n");
fprintf(stdout, "%s\n", zCand);
}
for(i=0; i<nQuery; i++){
const char *zSql = sqlite3_expert_report(p, i, EXPERT_REPORT_SQL);
const char *zIdx = sqlite3_expert_report(p, i, EXPERT_REPORT_INDEXES);
const char *zEQP = sqlite3_expert_report(p, i, EXPERT_REPORT_PLAN);
if( zIdx==0 ) zIdx = "(no new indexes)\n";
if( iVerbose>0 ){
fprintf(stdout, "-- Query %d ----------------------------------\n",i+1);
fprintf(stdout, "%s\n\n", zSql);
}
fprintf(stdout, "%s\n%s\n", zIdx, zEQP);
}
}else{
fprintf(stderr, "Error: %s\n", zErr ? zErr : "?");
}
sqlite3_expert_destroy(p);
sqlite3_free(zErr);
return rc;
}

View File

@ -1,458 +0,0 @@
# 2009 Nov 11
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The focus of this file is testing the CLI shell tool. Specifically,
# the ".recommend" command.
#
#
# Test plan:
#
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
set testprefix expert1
if {[info commands sqlite3_expert_new]==""} {
finish_test
return
}
set CLI [test_binary_name sqlite3]
set CMD [test_binary_name sqlite3_expert]
proc squish {txt} {
regsub -all {[[:space:]]+} $txt { }
}
proc do_setup_rec_test {tn setup sql res} {
reset_db
if {[info exists ::set_main_db_name]} {
dbconfig_maindbname_icecube db
}
db eval $setup
uplevel [list do_rec_test $tn $sql $res]
}
foreach {tn setup} {
1 {
if {![file executable $CMD]} { continue }
proc do_rec_test {tn sql res} {
set res [squish [string trim $res]]
set tst [subst -nocommands {
squish [string trim [exec $::CMD -verbose 0 -sql {$sql;} test.db]]
}]
uplevel [list do_test $tn $tst $res]
}
}
2 {
if {[info commands sqlite3_expert_new]==""} { continue }
proc do_rec_test {tn sql res} {
set expert [sqlite3_expert_new db]
$expert sql $sql
$expert analyze
set result [list]
for {set i 0} {$i < [$expert count]} {incr i} {
set idx [string trim [$expert report $i indexes]]
if {$idx==""} {set idx "(no new indexes)"}
lappend result $idx
lappend result [string trim [$expert report $i plan]]
}
$expert destroy
set tst [subst -nocommands {set {} [squish [join {$result}]]}]
uplevel [list do_test $tn $tst [string trim [squish $res]]]
}
}
3 {
if {[info commands sqlite3_expert_new]==""} { continue }
set ::set_main_db_name 1
}
4 {
if {![file executable $CLI]} { continue }
proc do_rec_test {tn sql res} {
set res [squish [string trim $res]]
set tst [subst -nocommands {
squish [string trim [exec $::CLI test.db ".expert" {$sql;}]]
}]
uplevel [list do_test $tn $tst $res]
}
}
} {
eval $setup
do_setup_rec_test $tn.1 { CREATE TABLE t1(a, b, c) } {
SELECT * FROM t1
} {
(no new indexes)
SCAN TABLE t1
}
do_setup_rec_test $tn.2 {
CREATE TABLE t1(a, b, c);
} {
SELECT * FROM t1 WHERE b>?;
} {
CREATE INDEX t1_idx_00000062 ON t1(b);
SEARCH TABLE t1 USING INDEX t1_idx_00000062 (b>?)
}
do_setup_rec_test $tn.3 {
CREATE TABLE t1(a, b, c);
} {
SELECT * FROM t1 WHERE b COLLATE nocase BETWEEN ? AND ?
} {
CREATE INDEX t1_idx_3e094c27 ON t1(b COLLATE NOCASE);
SEARCH TABLE t1 USING INDEX t1_idx_3e094c27 (b>? AND b<?)
}
do_setup_rec_test $tn.4 {
CREATE TABLE t1(a, b, c);
} {
SELECT a FROM t1 ORDER BY b;
} {
CREATE INDEX t1_idx_00000062 ON t1(b);
SCAN TABLE t1 USING INDEX t1_idx_00000062
}
do_setup_rec_test $tn.5 {
CREATE TABLE t1(a, b, c);
} {
SELECT a FROM t1 WHERE a=? ORDER BY b;
} {
CREATE INDEX t1_idx_000123a7 ON t1(a, b);
SEARCH TABLE t1 USING COVERING INDEX t1_idx_000123a7 (a=?)
}
if 0 {
do_setup_rec_test $tn.6 {
CREATE TABLE t1(a, b, c);
} {
SELECT min(a) FROM t1
} {
CREATE INDEX t1_idx_00000061 ON t1(a);
SEARCH TABLE t1 USING COVERING INDEX t1_idx_00000061
}
}
do_setup_rec_test $tn.7 {
CREATE TABLE t1(a, b, c);
} {
SELECT * FROM t1 ORDER BY a, b, c;
} {
CREATE INDEX t1_idx_033e95fe ON t1(a, b, c);
SCAN TABLE t1 USING COVERING INDEX t1_idx_033e95fe
}
#do_setup_rec_test $tn.1.8 {
# CREATE TABLE t1(a, b, c);
#} {
# SELECT * FROM t1 ORDER BY a ASC, b COLLATE nocase DESC, c ASC;
#} {
# CREATE INDEX t1_idx_5be6e222 ON t1(a, b COLLATE NOCASE DESC, c);
# 0|0|0|SCAN TABLE t1 USING COVERING INDEX t1_idx_5be6e222
#}
do_setup_rec_test $tn.8.1 {
CREATE TABLE t1(a COLLATE NOCase, b, c);
} {
SELECT * FROM t1 WHERE a=?
} {
CREATE INDEX t1_idx_00000061 ON t1(a);
SEARCH TABLE t1 USING INDEX t1_idx_00000061 (a=?)
}
do_setup_rec_test $tn.8.2 {
CREATE TABLE t1(a, b COLLATE nocase, c);
} {
SELECT * FROM t1 ORDER BY a ASC, b DESC, c ASC;
} {
CREATE INDEX t1_idx_5cb97285 ON t1(a, b DESC, c);
SCAN TABLE t1 USING COVERING INDEX t1_idx_5cb97285
}
# Tables with names that require quotes.
#
do_setup_rec_test $tn.9.1 {
CREATE TABLE "t t"(a, b, c);
} {
SELECT * FROM "t t" WHERE a=?
} {
CREATE INDEX 't t_idx_00000061' ON 't t'(a);
SEARCH TABLE t t USING INDEX t t_idx_00000061 (a=?)
}
do_setup_rec_test $tn.9.2 {
CREATE TABLE "t t"(a, b, c);
} {
SELECT * FROM "t t" WHERE b BETWEEN ? AND ?
} {
CREATE INDEX 't t_idx_00000062' ON 't t'(b);
SEARCH TABLE t t USING INDEX t t_idx_00000062 (b>? AND b<?)
}
# Columns with names that require quotes.
#
do_setup_rec_test $tn.10.1 {
CREATE TABLE t3(a, "b b", c);
} {
SELECT * FROM t3 WHERE "b b" = ?
} {
CREATE INDEX t3_idx_00050c52 ON t3('b b');
SEARCH TABLE t3 USING INDEX t3_idx_00050c52 (b b=?)
}
do_setup_rec_test $tn.10.2 {
CREATE TABLE t3(a, "b b", c);
} {
SELECT * FROM t3 ORDER BY "b b"
} {
CREATE INDEX t3_idx_00050c52 ON t3('b b');
SCAN TABLE t3 USING INDEX t3_idx_00050c52
}
# Transitive constraints
#
do_setup_rec_test $tn.11.1 {
CREATE TABLE t5(a, b);
CREATE TABLE t6(c, d);
} {
SELECT * FROM t5, t6 WHERE a=? AND b=c AND c=?
} {
CREATE INDEX t5_idx_000123a7 ON t5(a, b);
CREATE INDEX t6_idx_00000063 ON t6(c);
SEARCH TABLE t6 USING INDEX t6_idx_00000063 (c=?)
SEARCH TABLE t5 USING COVERING INDEX t5_idx_000123a7 (a=? AND b=?)
}
# OR terms.
#
do_setup_rec_test $tn.12.1 {
CREATE TABLE t7(a, b);
} {
SELECT * FROM t7 WHERE a=? OR b=?
} {
CREATE INDEX t7_idx_00000062 ON t7(b);
CREATE INDEX t7_idx_00000061 ON t7(a);
MULTI-INDEX OR
INDEX 1
SEARCH TABLE t7 USING INDEX t7_idx_00000061 (a=?)
INDEX 2
SEARCH TABLE t7 USING INDEX t7_idx_00000062 (b=?)
}
# rowid terms.
#
do_setup_rec_test $tn.13.1 {
CREATE TABLE t8(a, b);
} {
SELECT * FROM t8 WHERE rowid=?
} {
(no new indexes)
SEARCH TABLE t8 USING INTEGER PRIMARY KEY (rowid=?)
}
do_setup_rec_test $tn.13.2 {
CREATE TABLE t8(a, b);
} {
SELECT * FROM t8 ORDER BY rowid
} {
(no new indexes)
SCAN TABLE t8
}
do_setup_rec_test $tn.13.3 {
CREATE TABLE t8(a, b);
} {
SELECT * FROM t8 WHERE a=? ORDER BY rowid
} {
CREATE INDEX t8_idx_00000061 ON t8(a);
SEARCH TABLE t8 USING INDEX t8_idx_00000061 (a=?)
}
# Triggers
#
do_setup_rec_test $tn.14 {
CREATE TABLE t9(a, b, c);
CREATE TABLE t10(a, b, c);
CREATE TRIGGER t9t AFTER INSERT ON t9 BEGIN
UPDATE t10 SET a=new.a WHERE b = new.b;
END;
} {
INSERT INTO t9 VALUES(?, ?, ?);
} {
CREATE INDEX t10_idx_00000062 ON t10(b);
SEARCH TABLE t10 USING INDEX t10_idx_00000062 (b=?)
}
do_setup_rec_test $tn.15 {
CREATE TABLE t1(a, b);
CREATE TABLE t2(c, d);
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
INSERT INTO t1 SELECT (i-1)/50, (i-1)/20 FROM s;
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
INSERT INTO t2 SELECT (i-1)/20, (i-1)/5 FROM s;
} {
SELECT * FROM t2, t1 WHERE b=? AND d=? AND t2.rowid=t1.rowid
} {
CREATE INDEX t2_idx_00000064 ON t2(d);
SEARCH TABLE t2 USING INDEX t2_idx_00000064 (d=?)
SEARCH TABLE t1 USING INTEGER PRIMARY KEY (rowid=?)
}
do_setup_rec_test $tn.16 {
CREATE TABLE t1(a, b);
} {
SELECT * FROM t1 WHERE b IS NOT NULL;
} {
(no new indexes)
SCAN TABLE t1
}
do_setup_rec_test $tn.17.1 {
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
} {
SELECT * FROM example WHERE a=?
} {
(no new indexes)
SEARCH TABLE example USING INDEX sqlite_autoindex_example_1 (A=?)
}
do_setup_rec_test $tn.17.2 {
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
} {
SELECT * FROM example WHERE b=?
} {
CREATE INDEX example_idx_00000042 ON example(B);
SEARCH TABLE example USING INDEX example_idx_00000042 (B=?)
}
do_setup_rec_test $tn.17.3 {
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
} {
SELECT * FROM example WHERE a=? AND b=?
} {
(no new indexes)
SEARCH TABLE example USING INDEX sqlite_autoindex_example_1 (A=? AND B=?)
}
do_setup_rec_test $tn.17.4 {
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
} {
SELECT * FROM example WHERE a=? AND b>?
} {
(no new indexes)
SEARCH TABLE example USING INDEX sqlite_autoindex_example_1 (A=? AND B>?)
}
do_setup_rec_test $tn.17.5 {
CREATE TABLE example (A INTEGER, B INTEGER, C INTEGER, PRIMARY KEY (A,B));
} {
SELECT * FROM example WHERE a>? AND b=?
} {
CREATE INDEX example_idx_0000cb3f ON example(B, A);
SEARCH TABLE example USING INDEX example_idx_0000cb3f (B=? AND A>?)
}
do_setup_rec_test $tn.18.0 {
CREATE TABLE SomeObject (
a INTEGER PRIMARY KEY,
x TEXT GENERATED ALWAYS AS(HEX(a)) VIRTUAL
);
} {
SELECT x FROM SomeObject;
} {
(no new indexes)
SCAN TABLE SomeObject
}
do_setup_rec_test $tn.18.1 {
CREATE TABLE SomeObject (
a INTEGER PRIMARY KEY,
x TEXT GENERATED ALWAYS AS(HEX(a)) VIRTUAL
);
} {
SELECT * FROM SomeObject WHERE x=?;
} {
CREATE INDEX SomeObject_idx_00000078 ON SomeObject(x);
SEARCH TABLE SomeObject USING COVERING INDEX SomeObject_idx_00000078 (x=?)
}
}
proc do_candidates_test {tn sql res} {
set res [squish [string trim $res]]
set expert [sqlite3_expert_new db]
$expert sql $sql
$expert analyze
set candidates [squish [string trim [$expert report 0 candidates]]]
$expert destroy
uplevel [list do_test $tn [list set {} $candidates] $res]
}
reset_db
do_execsql_test 5.0 {
CREATE TABLE t1(a, b);
CREATE TABLE t2(c, d);
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
INSERT INTO t1 SELECT (i-1)/50, (i-1)/20 FROM s;
WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100)
INSERT INTO t2 SELECT (i-1)/20, (i-1)/5 FROM s;
}
do_candidates_test 5.1 {
SELECT * FROM t1,t2 WHERE (b=? OR a=?) AND (c=? OR d=?)
} {
CREATE INDEX t1_idx_00000062 ON t1(b); -- stat1: 100 20
CREATE INDEX t1_idx_00000061 ON t1(a); -- stat1: 100 50
CREATE INDEX t2_idx_00000063 ON t2(c); -- stat1: 100 20
CREATE INDEX t2_idx_00000064 ON t2(d); -- stat1: 100 5
}
do_candidates_test 5.2 {
SELECT * FROM t1,t2 WHERE a=? AND b=? AND c=? AND d=?
} {
CREATE INDEX t1_idx_000123a7 ON t1(a, b); -- stat1: 100 50 17
CREATE INDEX t2_idx_0001295b ON t2(c, d); -- stat1: 100 20 5
}
do_execsql_test 5.3 {
CREATE INDEX t1_idx_00000061 ON t1(a); -- stat1: 100 50
CREATE INDEX t1_idx_00000062 ON t1(b); -- stat1: 100 20
CREATE INDEX t1_idx_000123a7 ON t1(a, b); -- stat1: 100 50 16
CREATE INDEX t2_idx_00000063 ON t2(c); -- stat1: 100 20
CREATE INDEX t2_idx_00000064 ON t2(d); -- stat1: 100 5
CREATE INDEX t2_idx_0001295b ON t2(c, d); -- stat1: 100 20 5
ANALYZE;
SELECT * FROM sqlite_stat1 ORDER BY 1, 2;
} {
t1 t1_idx_00000061 {100 50}
t1 t1_idx_00000062 {100 20}
t1 t1_idx_000123a7 {100 50 17}
t2 t2_idx_00000063 {100 20}
t2 t2_idx_00000064 {100 5}
t2 t2_idx_0001295b {100 20 5}
}
finish_test

File diff suppressed because it is too large Load Diff

View File

@ -1,168 +0,0 @@
/*
** 2017 April 07
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
*/
#if !defined(SQLITEEXPERT_H)
#define SQLITEEXPERT_H 1
#include "sqlite3.h"
typedef struct sqlite3expert sqlite3expert;
/*
** Create a new sqlite3expert object.
**
** If successful, a pointer to the new object is returned and (*pzErr) set
** to NULL. Or, if an error occurs, NULL is returned and (*pzErr) set to
** an English-language error message. In this case it is the responsibility
** of the caller to eventually free the error message buffer using
** sqlite3_free().
*/
sqlite3expert *sqlite3_expert_new(sqlite3 *db, char **pzErr);
/*
** Configure an sqlite3expert object.
**
** EXPERT_CONFIG_SAMPLE:
** By default, sqlite3_expert_analyze() generates sqlite_stat1 data for
** each candidate index. This involves scanning and sorting the entire
** contents of each user database table once for each candidate index
** associated with the table. For large databases, this can be
** prohibitively slow. This option allows the sqlite3expert object to
** be configured so that sqlite_stat1 data is instead generated based on a
** subset of each table, or so that no sqlite_stat1 data is used at all.
**
** A single integer argument is passed to this option. If the value is less
** than or equal to zero, then no sqlite_stat1 data is generated or used by
** the analysis - indexes are recommended based on the database schema only.
** Or, if the value is 100 or greater, complete sqlite_stat1 data is
** generated for each candidate index (this is the default). Finally, if the
** value falls between 0 and 100, then it represents the percentage of user
** table rows that should be considered when generating sqlite_stat1 data.
**
** Examples:
**
** // Do not generate any sqlite_stat1 data
** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 0);
**
** // Generate sqlite_stat1 data based on 10% of the rows in each table.
** sqlite3_expert_config(pExpert, EXPERT_CONFIG_SAMPLE, 10);
*/
int sqlite3_expert_config(sqlite3expert *p, int op, ...);
#define EXPERT_CONFIG_SAMPLE 1 /* int */
/*
** Specify zero or more SQL statements to be included in the analysis.
**
** Buffer zSql must contain zero or more complete SQL statements. This
** function parses all statements contained in the buffer and adds them
** to the internal list of statements to analyze. If successful, SQLITE_OK
** is returned and (*pzErr) set to NULL. Or, if an error occurs - for example
** due to a error in the SQL - an SQLite error code is returned and (*pzErr)
** may be set to point to an English language error message. In this case
** the caller is responsible for eventually freeing the error message buffer
** using sqlite3_free().
**
** If an error does occur while processing one of the statements in the
** buffer passed as the second argument, none of the statements in the
** buffer are added to the analysis.
**
** This function must be called before sqlite3_expert_analyze(). If a call
** to this function is made on an sqlite3expert object that has already
** been passed to sqlite3_expert_analyze() SQLITE_MISUSE is returned
** immediately and no statements are added to the analysis.
*/
int sqlite3_expert_sql(
sqlite3expert *p, /* From a successful sqlite3_expert_new() */
const char *zSql, /* SQL statement(s) to add */
char **pzErr /* OUT: Error message (if any) */
);
/*
** This function is called after the sqlite3expert object has been configured
** with all SQL statements using sqlite3_expert_sql() to actually perform
** the analysis. Once this function has been called, it is not possible to
** add further SQL statements to the analysis.
**
** If successful, SQLITE_OK is returned and (*pzErr) is set to NULL. Or, if
** an error occurs, an SQLite error code is returned and (*pzErr) set to
** point to a buffer containing an English language error message. In this
** case it is the responsibility of the caller to eventually free the buffer
** using sqlite3_free().
**
** If an error does occur within this function, the sqlite3expert object
** is no longer useful for any purpose. At that point it is no longer
** possible to add further SQL statements to the object or to re-attempt
** the analysis. The sqlite3expert object must still be freed using a call
** sqlite3_expert_destroy().
*/
int sqlite3_expert_analyze(sqlite3expert *p, char **pzErr);
/*
** Return the total number of statements loaded using sqlite3_expert_sql().
** The total number of SQL statements may be different from the total number
** to calls to sqlite3_expert_sql().
*/
int sqlite3_expert_count(sqlite3expert*);
/*
** Return a component of the report.
**
** This function is called after sqlite3_expert_analyze() to extract the
** results of the analysis. Each call to this function returns either a
** NULL pointer or a pointer to a buffer containing a nul-terminated string.
** The value passed as the third argument must be one of the EXPERT_REPORT_*
** #define constants defined below.
**
** For some EXPERT_REPORT_* parameters, the buffer returned contains
** information relating to a specific SQL statement. In these cases that
** SQL statement is identified by the value passed as the second argument.
** SQL statements are numbered from 0 in the order in which they are parsed.
** If an out-of-range value (less than zero or equal to or greater than the
** value returned by sqlite3_expert_count()) is passed as the second argument
** along with such an EXPERT_REPORT_* parameter, NULL is always returned.
**
** EXPERT_REPORT_SQL:
** Return the text of SQL statement iStmt.
**
** EXPERT_REPORT_INDEXES:
** Return a buffer containing the CREATE INDEX statements for all recommended
** indexes for statement iStmt. If there are no new recommeded indexes, NULL
** is returned.
**
** EXPERT_REPORT_PLAN:
** Return a buffer containing the EXPLAIN QUERY PLAN output for SQL query
** iStmt after the proposed indexes have been added to the database schema.
**
** EXPERT_REPORT_CANDIDATES:
** Return a pointer to a buffer containing the CREATE INDEX statements
** for all indexes that were tested (for all SQL statements). The iStmt
** parameter is ignored for EXPERT_REPORT_CANDIDATES calls.
*/
const char *sqlite3_expert_report(sqlite3expert*, int iStmt, int eReport);
/*
** Values for the third argument passed to sqlite3_expert_report().
*/
#define EXPERT_REPORT_SQL 1
#define EXPERT_REPORT_INDEXES 2
#define EXPERT_REPORT_PLAN 3
#define EXPERT_REPORT_CANDIDATES 4
/*
** Free an (sqlite3expert*) handle and all associated resources. There
** should be one call to this function for each successful call to
** sqlite3-expert_new().
*/
void sqlite3_expert_destroy(sqlite3expert*);
#endif /* !defined(SQLITEEXPERT_H) */

View File

@ -1,220 +0,0 @@
/*
** 2017 April 07
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
*/
#if defined(SQLITE_TEST)
#include "sqlite3expert.h"
#include <assert.h>
#include <string.h>
#if defined(INCLUDE_SQLITE_TCL_H)
# include "sqlite_tcl.h"
#else
# include "tcl.h"
# ifndef SQLITE_TCLAPI
# define SQLITE_TCLAPI
# endif
#endif
#ifndef SQLITE_OMIT_VIRTUALTABLE
/*
** Extract an sqlite3* db handle from the object passed as the second
** argument. If successful, set *pDb to point to the db handle and return
** TCL_OK. Otherwise, return TCL_ERROR.
*/
static int dbHandleFromObj(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **pDb){
Tcl_CmdInfo info;
if( 0==Tcl_GetCommandInfo(interp, Tcl_GetString(pObj), &info) ){
Tcl_AppendResult(interp, "no such handle: ", Tcl_GetString(pObj), 0);
return TCL_ERROR;
}
*pDb = *(sqlite3 **)info.objClientData;
return TCL_OK;
}
/*
** Tclcmd: $expert sql SQL
** $expert analyze
** $expert count
** $expert report STMT EREPORT
** $expert destroy
*/
static int SQLITE_TCLAPI testExpertCmd(
void *clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
sqlite3expert *pExpert = (sqlite3expert*)clientData;
struct Subcmd {
const char *zSub;
int nArg;
const char *zMsg;
} aSub[] = {
{ "sql", 1, "TABLE", }, /* 0 */
{ "analyze", 0, "", }, /* 1 */
{ "count", 0, "", }, /* 2 */
{ "report", 2, "STMT EREPORT", }, /* 3 */
{ "destroy", 0, "", }, /* 4 */
{ 0 }
};
int iSub;
int rc = TCL_OK;
char *zErr = 0;
if( objc<2 ){
Tcl_WrongNumArgs(interp, 1, objv, "SUBCOMMAND ...");
return TCL_ERROR;
}
rc = Tcl_GetIndexFromObjStruct(interp,
objv[1], aSub, sizeof(aSub[0]), "sub-command", 0, &iSub
);
if( rc!=TCL_OK ) return rc;
if( objc!=2+aSub[iSub].nArg ){
Tcl_WrongNumArgs(interp, 2, objv, aSub[iSub].zMsg);
return TCL_ERROR;
}
switch( iSub ){
case 0: { /* sql */
char *zArg = Tcl_GetString(objv[2]);
rc = sqlite3_expert_sql(pExpert, zArg, &zErr);
break;
}
case 1: { /* analyze */
rc = sqlite3_expert_analyze(pExpert, &zErr);
break;
}
case 2: { /* count */
int n = sqlite3_expert_count(pExpert);
Tcl_SetObjResult(interp, Tcl_NewIntObj(n));
break;
}
case 3: { /* report */
const char *aEnum[] = {
"sql", "indexes", "plan", "candidates", 0
};
int iEnum;
int iStmt;
const char *zReport;
if( Tcl_GetIntFromObj(interp, objv[2], &iStmt)
|| Tcl_GetIndexFromObj(interp, objv[3], aEnum, "report", 0, &iEnum)
){
return TCL_ERROR;
}
assert( EXPERT_REPORT_SQL==1 );
assert( EXPERT_REPORT_INDEXES==2 );
assert( EXPERT_REPORT_PLAN==3 );
assert( EXPERT_REPORT_CANDIDATES==4 );
zReport = sqlite3_expert_report(pExpert, iStmt, 1+iEnum);
Tcl_SetObjResult(interp, Tcl_NewStringObj(zReport, -1));
break;
}
default: /* destroy */
assert( iSub==4 );
Tcl_DeleteCommand(interp, Tcl_GetString(objv[0]));
break;
}
if( rc!=TCL_OK ){
if( zErr ){
Tcl_SetObjResult(interp, Tcl_NewStringObj(zErr, -1));
}else{
extern const char *sqlite3ErrName(int);
Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1));
}
}
sqlite3_free(zErr);
return rc;
}
static void SQLITE_TCLAPI testExpertDel(void *clientData){
sqlite3expert *pExpert = (sqlite3expert*)clientData;
sqlite3_expert_destroy(pExpert);
}
/*
** sqlite3_expert_new DB
*/
static int SQLITE_TCLAPI test_sqlite3_expert_new(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
static int iCmd = 0;
sqlite3 *db;
char *zCmd = 0;
char *zErr = 0;
sqlite3expert *pExpert;
int rc = TCL_OK;
if( objc!=2 ){
Tcl_WrongNumArgs(interp, 1, objv, "DB");
return TCL_ERROR;
}
if( dbHandleFromObj(interp, objv[1], &db) ){
return TCL_ERROR;
}
zCmd = sqlite3_mprintf("sqlite3expert%d", ++iCmd);
if( zCmd==0 ){
Tcl_AppendResult(interp, "out of memory", (char*)0);
return TCL_ERROR;
}
pExpert = sqlite3_expert_new(db, &zErr);
if( pExpert==0 ){
Tcl_AppendResult(interp, zErr, (char*)0);
rc = TCL_ERROR;
}else{
void *p = (void*)pExpert;
Tcl_CreateObjCommand(interp, zCmd, testExpertCmd, p, testExpertDel);
Tcl_SetObjResult(interp, Tcl_NewStringObj(zCmd, -1));
}
sqlite3_free(zCmd);
sqlite3_free(zErr);
return rc;
}
#endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */
int TestExpert_Init(Tcl_Interp *interp){
#ifndef SQLITE_OMIT_VIRTUALTABLE
struct Cmd {
const char *zCmd;
Tcl_ObjCmdProc *xProc;
} aCmd[] = {
{ "sqlite3_expert_new", test_sqlite3_expert_new },
};
int i;
for(i=0; i<sizeof(aCmd)/sizeof(struct Cmd); i++){
struct Cmd *p = &aCmd[i];
Tcl_CreateObjCommand(interp, p->zCmd, p->xProc, 0, 0);
}
#endif
return TCL_OK;
}
#endif

View File

@ -1,2 +0,0 @@
This folder contains source code to the first full-text search
extension for SQLite.

View File

@ -1,404 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "ft_hash.h"
void *malloc_and_zero(int n){
void *p = malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants HASH_INT, HASH_POINTER,
** HASH_BINARY, or HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer. CopyKey only makes
** sense for HASH_STRING and HASH_BINARY and is ignored
** for other key classes.
*/
void HashInit(Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
pNew->keyClass = keyClass;
#if 0
if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
#endif
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
pNew->xMalloc = malloc_and_zero;
pNew->xFree = free;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void HashClear(Hash *pH){
HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree(elem);
elem = next_elem;
}
pH->count = 0;
}
#if 0 /* NOT USED */
/*
** Hash and comparison functions when the mode is HASH_INT
*/
static int intHash(const void *pKey, int nKey){
return nKey ^ (nKey<<8) ^ (nKey>>8);
}
static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
return n2 - n1;
}
#endif
#if 0 /* NOT USED */
/*
** Hash and comparison functions when the mode is HASH_POINTER
*/
static int ptrHash(const void *pKey, int nKey){
uptr x = Addr(pKey);
return x ^ (x<<8) ^ (x>>8);
}
static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( pKey1==pKey2 ) return 0;
if( pKey1<pKey2 ) return -1;
return 1;
}
#endif
/*
** Hash and comparison functions when the mode is HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
#if 0 /* HASH_INT and HASH_POINTER are never used */
switch( keyClass ){
case HASH_INT: return &intHash;
case HASH_POINTER: return &ptrHash;
case HASH_STRING: return &strHash;
case HASH_BINARY: return &binHash;;
default: break;
}
return 0;
#else
if( keyClass==HASH_STRING ){
return &strHash;
}else{
assert( keyClass==HASH_BINARY );
return &binHash;
}
#endif
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
#if 0 /* HASH_INT and HASH_POINTER are never used */
switch( keyClass ){
case HASH_INT: return &intCompare;
case HASH_POINTER: return &ptrCompare;
case HASH_STRING: return &strCompare;
case HASH_BINARY: return &binCompare;
default: break;
}
return 0;
#else
if( keyClass==HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==HASH_BINARY );
return &binCompare;
}
#endif
}
/* Link an element into the hash table
*/
static void insertElement(
Hash *pH, /* The complete hash table */
struct _ht *pEntry, /* The entry into which pNew is inserted */
HashElem *pNew /* The element to be inserted */
){
HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(Hash *pH, int new_size){
struct _ht *new_ht; /* The new hash table */
HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
if( new_ht==0 ) return;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static HashElem *findElementGivenHash(
const Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
Hash *pH, /* The pH containing "elem" */
HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *HashFind(const Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
HashElem *elem; /* Used to loop thru the element list */
HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = pH->xMalloc( nKey );
if( new_elem->pKey==0 ){
pH->xFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
pH->xFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}

View File

@ -1,111 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _HASH_H_
#define _HASH_H_
/* Forward declarations of structures. */
typedef struct Hash Hash;
typedef struct HashElem HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
HashElem *first; /* The first element of the array */
void *(*xMalloc)(int); /* malloc() function to use */
void (*xFree)(void *); /* free() function to use */
int htsize; /* Number of buckets in the hash table */
struct _ht { /* the hash table */
int count; /* Number of entries with this hash */
HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct HashElem {
HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 4 different modes of operation for a hash table:
**
** HASH_INT nKey is used as the key and pKey is ignored.
**
** HASH_POINTER pKey is used as the key and nKey is ignored.
**
** HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made for HASH_STRING and HASH_BINARY
** if the copyKey parameter to HashInit is 1.
*/
/* #define HASH_INT 1 // NOT USED */
/* #define HASH_POINTER 2 // NOT USED */
#define HASH_STRING 3
#define HASH_BINARY 4
/*
** Access routines. To delete, insert a NULL pointer.
*/
void HashInit(Hash*, int keytype, int copyKey);
void *HashInsert(Hash*, const void *pKey, int nKey, void *pData);
void *HashFind(const Hash*, const void *pKey, int nKey);
void HashClear(Hash*);
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** Hash h;
** HashElem *p;
** ...
** for(p=HashFirst(&h); p; p=HashNext(p)){
** SomeStructure *pData = HashData(p);
** // do something with pData
** }
*/
#define HashFirst(H) ((H)->first)
#define HashNext(E) ((E)->next)
#define HashData(E) ((E)->data)
#define HashKey(E) ((E)->pKey)
#define HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define HashCount(H) ((H)->count)
#endif /* _HASH_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +0,0 @@
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts1Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View File

@ -1,369 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include "fts1_hash.h"
static void *malloc_and_zero(int n){
void *p = malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
pNew->xMalloc = malloc_and_zero;
pNew->xFree = free;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts1HashClear(fts1Hash *pH){
fts1HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
fts1HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS1_HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS1_HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
if( keyClass==FTS1_HASH_STRING ){
return &strHash;
}else{
assert( keyClass==FTS1_HASH_BINARY );
return &binHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS1_HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==FTS1_HASH_BINARY );
return &binCompare;
}
}
/* Link an element into the hash table
*/
static void insertElement(
fts1Hash *pH, /* The complete hash table */
struct _fts1ht *pEntry, /* The entry into which pNew is inserted */
fts1HashElem *pNew /* The element to be inserted */
){
fts1HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(fts1Hash *pH, int new_size){
struct _fts1ht *new_ht; /* The new hash table */
fts1HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) );
if( new_ht==0 ) return;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static fts1HashElem *findElementGivenHash(
const fts1Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
fts1HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts1ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
fts1Hash *pH, /* The pH containing "elem" */
fts1HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts1ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts1HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
fts1HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts1HashInsert(
fts1Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
fts1HashElem *elem; /* Used to loop thru the element list */
fts1HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = pH->xMalloc( nKey );
if( new_elem->pKey==0 ){
pH->xFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
pH->xFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

View File

@ -1,112 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS1_HASH_H_
#define _FTS1_HASH_H_
/* Forward declarations of structures. */
typedef struct fts1Hash fts1Hash;
typedef struct fts1HashElem fts1HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct fts1Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
fts1HashElem *first; /* The first element of the array */
void *(*xMalloc)(int); /* malloc() function to use */
void (*xFree)(void *); /* free() function to use */
int htsize; /* Number of buckets in the hash table */
struct _fts1ht { /* the hash table */
int count; /* Number of entries with this hash */
fts1HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct fts1HashElem {
fts1HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS1_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS1_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts1HashInit is 1.
*/
#define FTS1_HASH_STRING 1
#define FTS1_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey);
void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey);
void sqlite3Fts1HashClear(fts1Hash*);
/*
** Shorthand for the functions above
*/
#define fts1HashInit sqlite3Fts1HashInit
#define fts1HashInsert sqlite3Fts1HashInsert
#define fts1HashFind sqlite3Fts1HashFind
#define fts1HashClear sqlite3Fts1HashClear
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** fts1Hash h;
** fts1HashElem *p;
** ...
** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){
** SomeStructure *pData = fts1HashData(p);
** // do something with pData
** }
*/
#define fts1HashFirst(H) ((H)->first)
#define fts1HashNext(E) ((E)->next)
#define fts1HashData(E) ((E)->data)
#define fts1HashKey(E) ((E)->pKey)
#define fts1HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts1HashCount(H) ((H)->count)
#endif /* _FTS1_HASH_H_ */

View File

@ -1,643 +0,0 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts1_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlit3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module porterTokenizerModule;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
t = (porter_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
c = (porter_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
free(c->zToken);
free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
z[0]!=0 && isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
z[1]!=0 && isVowel(z+1) &&
z[2]!=0 && isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
int c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j, c;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char isIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30]))
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
c->nAllocated = n+20;
c->zToken = realloc(c->zToken, c->nAllocated);
if( c->zToken==NULL ) return SQLITE_NOMEM;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts1PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

View File

@ -1,90 +0,0 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS1_TOKENIZER_H_
#define _FTS1_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface.
*/
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
struct sqlite3_tokenizer_module {
int iVersion; /* currently 0 */
/*
** Create and destroy a tokenizer. argc/argv are passed down from
** the fulltext virtual table creation to allow customization.
*/
int (*xCreate)(int argc, const char *const*argv,
sqlite3_tokenizer **ppTokenizer);
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Tokenize a particular input. Call xOpen() to prepare to
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
** xClose() to free any internal state. The pInput passed to
** xOpen() must exist until the cursor is closed. The ppToken
** result from xNext() is only valid until the next call to xNext()
** or until xClose() is called.
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif /* _FTS1_TOKENIZER_H_ */

View File

@ -1,221 +0,0 @@
/*
** The author disclaims copyright to this source code.
**
*************************************************************************
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts1_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module simpleTokenizerModule;
static int isDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !isalnum(i);
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
free(c->pToken);
free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && isDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
c->nTokenAllocated = n+20;
c->pToken = realloc(c->pToken, c->nTokenAllocated);
if( c->pToken==NULL ) return SQLITE_NOMEM;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts1SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +0,0 @@
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int fulltext_init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View File

@ -1,174 +0,0 @@
/*
** The author disclaims copyright to this source code.
**
*************************************************************************
** Implementation of the "simple" full-text-search tokenizer.
*/
#include <assert.h>
#if !defined(__APPLE__)
#include <malloc.h>
#else
#include <stdlib.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "tokenizer.h"
/* Duplicate a string; the caller must free() the returned string.
* (We don't use strdup() since it's not part of the standard C library and
* may not be available everywhere.) */
/* TODO(shess) Copied from fulltext.c, consider util.c for such
** things. */
static char *string_dup(const char *s){
char *str = malloc(strlen(s) + 1);
strcpy(str, s);
return str;
}
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
const char *zDelim; /* token delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
const char *pCurrent; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nTokenBytes; /* actual size of current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */
static int simpleCreate(
int argc, const char **argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
t->zDelim = string_dup(argv[1]);
} else {
/* Build a string excluding alphanumeric ASCII characters */
char zDelim[0x80]; /* nul-terminated, so nul not a member */
int i, j;
for(i=1, j=0; i<0x80; i++){
if( !isalnum(i) ){
zDelim[j++] = i;
}
}
zDelim[j++] = '\0';
assert( j<=sizeof(zDelim) );
t->zDelim = string_dup(zDelim);
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
simple_tokenizer *t = (simple_tokenizer *) pTokenizer;
free((void *) t->zDelim);
free(t);
return SQLITE_OK;
}
static int simpleOpen(
sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));
c->pInput = pInput;
c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;
c->pCurrent = c->pInput; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nTokenBytes = 0;
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
if( NULL!=c->zToken ){
free(c->zToken);
}
free(c);
return SQLITE_OK;
}
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
int ii;
while( c->pCurrent-c->pInput<c->nBytes ){
int n = (int) strcspn(c->pCurrent, t->zDelim);
if( n>0 ){
if( n+1>c->nTokenAllocated ){
c->zToken = realloc(c->zToken, n+1);
}
for(ii=0; ii<n; ii++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
char ch = c->pCurrent[ii];
c->zToken[ii] = (unsigned char)ch<0x80 ? tolower((unsigned char)ch):ch;
}
c->zToken[n] = '\0';
*ppToken = c->zToken;
*pnBytes = n;
*piStartOffset = (int) (c->pCurrent-c->pInput);
*piEndOffset = *piStartOffset+n;
*piPosition = c->iToken++;
c->pCurrent += n + 1;
return SQLITE_OK;
}
c->pCurrent += n + 1;
/* TODO(shess) could strspn() to skip delimiters en masse. Needs
** to happen in two places, though, which is annoying.
*/
}
return SQLITE_DONE;
}
static sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
void get_simple_tokenizer_module(
sqlite3_tokenizer_module **ppModule
){
*ppModule = &simpleTokenizerModule;
}

View File

@ -1,89 +0,0 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _TOKENIZER_H_
#define _TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface.
*/
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
struct sqlite3_tokenizer_module {
int iVersion; /* currently 0 */
/*
** Create and destroy a tokenizer. argc/argv are passed down from
** the fulltext virtual table creation to allow customization.
*/
int (*xCreate)(int argc, const char **argv,
sqlite3_tokenizer **ppTokenizer);
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Tokenize a particular input. Call xOpen() to prepare to
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
** xClose() to free any internal state. The pInput passed to
** xOpen() must exist until the cursor is closed. The ppToken
** result from xNext() is only valid until the next call to xNext()
** or until xClose() is called.
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition);
};
struct sqlite3_tokenizer {
sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
#endif /* _TOKENIZER_H_ */

View File

@ -1,133 +0,0 @@
1. FTS2 Tokenizers
When creating a new full-text table, FTS2 allows the user to select
the text tokenizer implementation to be used when indexing text
by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
statement:
CREATE VIRTUAL TABLE <table-name> USING fts2(
<columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
);
The built-in tokenizers (valid values to pass as <tokenizer name>) are
"simple" and "porter".
<tokenizer-args> should consist of zero or more white-space separated
arguments to pass to the selected tokenizer implementation. The
interpretation of the arguments, if any, depends on the individual
tokenizer.
2. Custom Tokenizers
FTS2 allows users to provide custom tokenizer implementations. The
interface used to create a new tokenizer is defined and described in
the fts2_tokenizer.h source file.
Registering a new FTS2 tokenizer is similar to registering a new
virtual table module with SQLite. The user passes a pointer to a
structure containing pointers to various callback functions that
make up the implementation of the new tokenizer type. For tokenizers,
the structure (defined in fts2_tokenizer.h) is called
"sqlite3_tokenizer_module".
FTS2 does not expose a C-function that users call to register new
tokenizer types with a database handle. Instead, the pointer must
be encoded as an SQL blob value and passed to FTS2 through the SQL
engine by evaluating a special scalar function, "fts2_tokenizer()".
The fts2_tokenizer() function may be called with one or two arguments,
as follows:
SELECT fts2_tokenizer(<tokenizer-name>);
SELECT fts2_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
Where <tokenizer-name> is a string identifying the tokenizer and
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
structure encoded as an SQL blob. If the second argument is present,
it is registered as tokenizer <tokenizer-name> and a copy of it
returned. If only one argument is passed, a pointer to the tokenizer
implementation currently registered as <tokenizer-name> is returned,
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
(error) is raised.
SECURITY: If the fts2 extension is used in an environment where potentially
malicious users may execute arbitrary SQL (i.e. gears), they should be
prevented from invoking the fts2_tokenizer() function, possibly using the
authorisation callback.
See "Sample code" below for an example of calling the fts2_tokenizer()
function from C code.
3. ICU Library Tokenizers
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
symbol defined, then there exists a built-in tokenizer named "icu"
implemented using the ICU library. The first argument passed to the
xCreate() method (see fts2_tokenizer.h) of this tokenizer may be
an ICU locale identifier. For example "tr_TR" for Turkish as used
in Turkey, or "en_AU" for English as used in Australia. For example:
"CREATE VIRTUAL TABLE thai_text USING fts2(text, tokenizer icu th_TH)"
The ICU tokenizer implementation is very simple. It splits the input
text according to the ICU rules for finding word boundaries and discards
any tokens that consist entirely of white-space. This may be suitable
for some applications in some locales, but not all. If more complex
processing is required, for example to implement stemming or
discard punctuation, this can be done by creating a tokenizer
implementation that uses the ICU tokenizer as part of its implementation.
When using the ICU tokenizer this way, it is safe to overwrite the
contents of the strings returned by the xNext() method (see
fts2_tokenizer.h).
4. Sample code.
The following two code samples illustrate the way C code should invoke
the fts2_tokenizer() scalar function:
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}

View File

@ -1,4 +0,0 @@
This folder contains source code to the second full-text search
extension for SQLite. While the API is the same, this version uses a
substantially different storage schema from fts1, so tables will need
to be rebuilt.

File diff suppressed because it is too large Load Diff

View File

@ -1,26 +0,0 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This header file is used by programs that want to link against the
** FTS2 library. All it does is declare the sqlite3Fts2Init() interface.
*/
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts2Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View File

@ -1,376 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_hash.h"
/*
** Malloc and Free functions
*/
static void *fts2HashMalloc(int n){
void *p = sqlite3_malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
static void fts2HashFree(void *p){
sqlite3_free(p);
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts2HashClear(fts2Hash *pH){
fts2HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
fts2HashFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
fts2HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
fts2HashFree(elem->pKey);
}
fts2HashFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS2_HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS2_HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
if( keyClass==FTS2_HASH_STRING ){
return &strHash;
}else{
assert( keyClass==FTS2_HASH_BINARY );
return &binHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS2_HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==FTS2_HASH_BINARY );
return &binCompare;
}
}
/* Link an element into the hash table
*/
static void insertElement(
fts2Hash *pH, /* The complete hash table */
struct _fts2ht *pEntry, /* The entry into which pNew is inserted */
fts2HashElem *pNew /* The element to be inserted */
){
fts2HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(fts2Hash *pH, int new_size){
struct _fts2ht *new_ht; /* The new hash table */
fts2HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) );
if( new_ht==0 ) return;
fts2HashFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static fts2HashElem *findElementGivenHash(
const fts2Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
fts2HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts2ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
fts2Hash *pH, /* The pH containing "elem" */
fts2HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts2ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
fts2HashFree(elem->pKey);
}
fts2HashFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts2HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
fts2HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts2HashInsert(
fts2Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
fts2HashElem *elem; /* Used to loop thru the element list */
fts2HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = fts2HashMalloc( nKey );
if( new_elem->pKey==0 ){
fts2HashFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
fts2HashFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@ -1,110 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS2_HASH_H_
#define _FTS2_HASH_H_
/* Forward declarations of structures. */
typedef struct fts2Hash fts2Hash;
typedef struct fts2HashElem fts2HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct fts2Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
fts2HashElem *first; /* The first element of the array */
int htsize; /* Number of buckets in the hash table */
struct _fts2ht { /* the hash table */
int count; /* Number of entries with this hash */
fts2HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct fts2HashElem {
fts2HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS2_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS2_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts2HashInit is 1.
*/
#define FTS2_HASH_STRING 1
#define FTS2_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey);
void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey);
void sqlite3Fts2HashClear(fts2Hash*);
/*
** Shorthand for the functions above
*/
#define fts2HashInit sqlite3Fts2HashInit
#define fts2HashInsert sqlite3Fts2HashInsert
#define fts2HashFind sqlite3Fts2HashFind
#define fts2HashClear sqlite3Fts2HashClear
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** fts2Hash h;
** fts2HashElem *p;
** ...
** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){
** SomeStructure *pData = fts2HashData(p);
** // do something with pData
** }
*/
#define fts2HashFirst(H) ((H)->first)
#define fts2HashNext(E) ((E)->next)
#define fts2HashData(E) ((E)->data)
#define fts2HashKey(E) ((E)->pKey)
#define fts2HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts2HashCount(H) ((H)->count)
#endif /* _FTS2_HASH_H_ */

View File

@ -1,260 +0,0 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts2 based on the ICU library.
**
** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#ifdef SQLITE_ENABLE_ICU
#include <assert.h>
#include <string.h>
#include "fts2_tokenizer.h"
#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>
typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;
struct IcuTokenizer {
sqlite3_tokenizer base;
char *zLocale;
};
struct IcuCursor {
sqlite3_tokenizer_cursor base;
UBreakIterator *pIter; /* ICU break-iterator object */
int nChar; /* Number of UChar elements in pInput */
UChar *aChar; /* Copy of input using utf-16 encoding */
int *aOffset; /* Offsets of each character in utf-8 input */
int nBuffer;
char *zBuffer;
int iToken;
};
/*
** Create a new tokenizer instance.
*/
static int icuCreate(
int argc, /* Number of entries in argv[] */
const char * const *argv, /* Tokenizer creation arguments */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
){
IcuTokenizer *p;
int n = 0;
if( argc>0 ){
n = strlen(argv[0])+1;
}
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, sizeof(IcuTokenizer));
if( n ){
p->zLocale = (char *)&p[1];
memcpy(p->zLocale, argv[0], n);
}
*ppTokenizer = (sqlite3_tokenizer *)p;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int icuOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, /* Input string */
int nInput, /* Length of zInput in bytes */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
IcuCursor *pCsr;
const int32_t opt = U_FOLD_CASE_DEFAULT;
UErrorCode status = U_ZERO_ERROR;
int nChar;
UChar32 c;
int iInput = 0;
int iOut = 0;
*ppCursor = 0;
if( nInput<0 ){
nInput = strlen(zInput);
}
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
while( c>0 ){
int isError = 0;
c = u_foldCase(c, opt);
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
if( isError ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->aOffset[iOut] = iInput;
if( iInput<nInput ){
U8_NEXT(zInput, iInput, nInput, c);
}else{
c = 0;
}
}
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
if( !U_SUCCESS(status) ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->nChar = iOut;
ubrk_first(pCsr->pIter);
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
IcuCursor *pCsr = (IcuCursor *)pCursor;
ubrk_close(pCsr->pIter);
sqlite3_free(pCsr->zBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
IcuCursor *pCsr = (IcuCursor *)pCursor;
int iStart = 0;
int iEnd = 0;
int nByte = 0;
while( iStart==iEnd ){
UChar32 c;
iStart = ubrk_current(pCsr->pIter);
iEnd = ubrk_next(pCsr->pIter);
if( iEnd==UBRK_DONE ){
return SQLITE_DONE;
}
while( iStart<iEnd ){
int iWhite = iStart;
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
break;
}
}
assert(iStart<=iEnd);
}
do {
UErrorCode status = U_ZERO_ERROR;
if( nByte ){
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
if( !zNew ){
return SQLITE_NOMEM;
}
pCsr->zBuffer = zNew;
pCsr->nBuffer = nByte;
}
u_strToUTF8(
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
&status /* Output success/failure */
);
} while( nByte>pCsr->nBuffer );
*ppToken = pCsr->zBuffer;
*pnBytes = nByte;
*piStartOffset = pCsr->aOffset[iStart];
*piEndOffset = pCsr->aOffset[iEnd];
*piPosition = pCsr->iToken++;
return SQLITE_OK;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
};
/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts2IcuTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &icuTokenizerModule;
}
#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@ -1,644 +0,0 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlit3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module porterTokenizerModule;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
sqlite3_free(c->zToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
z[0]!=0 && isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
z[1]!=0 && isVowel(z+1) &&
z[2]!=0 && isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
int c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j, c;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char porterIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
c->nAllocated = n+20;
c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
if( c->zToken==NULL ) return SQLITE_NOMEM;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts2PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@ -1,375 +0,0 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is part of an SQLite module implementing full-text search.
** This particular file implements the generic tokenizer interface.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_hash.h"
#include "fts2_tokenizer.h"
#include <assert.h>
/*
** Implementation of the SQL scalar function for accessing the underlying
** hash table. This function may be called as follows:
**
** SELECT <function-name>(<key-name>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void scalarFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
void *pPtr = 0;
const unsigned char *zName;
int nName;
assert( argc==1 || argc==2 );
pHash = (fts2Hash *)sqlite3_user_data(context);
zName = sqlite3_value_text(argv[0]);
nName = sqlite3_value_bytes(argv[0])+1;
if( argc==2 ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
pPtr = *(void **)sqlite3_value_blob(argv[1]);
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
if( pOld==pPtr ){
sqlite3_result_error(context, "out of memory", -1);
return;
}
}else{
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
}
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
#ifdef SQLITE_TEST
#if defined(INCLUDE_SQLITE_TCL_H)
# include "sqlite_tcl.h"
#else
# include "tcl.h"
#endif
#include <string.h>
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
** SELECT <function-name>(<key-name>, <input-string>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example,
** using the built-in "simple" tokenizer:
**
** SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
** "{0 i I 1 dont don't 2 see see 3 how how}"
**
*/
static void testFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
sqlite3_tokenizer_module *p;
sqlite3_tokenizer *pTokenizer = 0;
sqlite3_tokenizer_cursor *pCsr = 0;
const char *zErr = 0;
const char *zName;
int nName;
const char *zInput;
int nInput;
const char *zArg = 0;
const char *zToken;
int nToken;
int iStart;
int iEnd;
int iPos;
Tcl_Obj *pRet;
assert( argc==2 || argc==3 );
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[argc-1]);
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
if( argc==3 ){
zArg = (const char *)sqlite3_value_text(argv[1]);
}
pHash = (fts2Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
zToken = &zInput[iStart];
nToken = iEnd-iStart;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
}
if( SQLITE_OK!=p->xClose(pCsr) ){
zErr = "error in xClose()";
goto finish;
}
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
zErr = "error in xDestroy()";
goto finish;
}
finish:
if( zErr ){
sqlite3_result_error(context, zErr, -1);
}else{
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
}
Tcl_DecrRefCount(pRet);
}
static
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
static
int queryFts2Tokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
/*
** Implementation of the scalar function fts2_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts2_tokenizer() function
** can be used as designed by the C-code in the queryFts2Tokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:
**
** SELECT fts2_tokenizer_internal_test();
**
*/
static void intTestFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
int rc;
const sqlite3_tokenizer_module *p1;
const sqlite3_tokenizer_module *p2;
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
/* Test the query function */
sqlite3Fts2SimpleTokenizerModule(&p1);
rc = queryFts2Tokenizer(db, "simple", &p2);
assert( rc==SQLITE_OK );
assert( p1==p2 );
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_ERROR );
assert( p2==0 );
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
/* Test the storage function */
rc = registerTokenizer(db, "nosuchtokenizer", p1);
assert( rc==SQLITE_OK );
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_OK );
assert( p2==p1 );
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}
#endif
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialized to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header
** comment above struct HashTableVtab) to the database schema. Both
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts2InitHashTable(
sqlite3 *db,
fts2Hash *pHash,
const char *zName
){
int rc = SQLITE_OK;
void *p = (void *)pHash;
const int any = SQLITE_ANY;
char *zTest = 0;
char *zTest2 = 0;
#ifdef SQLITE_TEST
void *pdb = (void *)db;
zTest = sqlite3_mprintf("%s_test", zName);
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
if( !zTest || !zTest2 ){
rc = SQLITE_NOMEM;
}
#endif
if( rc!=SQLITE_OK
|| (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
#endif
);
sqlite3_free(zTest);
sqlite3_free(zTest2);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@ -1,145 +0,0 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS2_TOKENIZER_H_
#define _FTS2_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface. When a new tokenizer
** implementation is registered, the caller provides a pointer to
** an sqlite3_tokenizer_module containing pointers to the callback
** functions that make up an implementation.
**
** When an fts2 table is created, it passes any arguments passed to
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
** implementation. The xCreate() function in turn returns an
** sqlite3_tokenizer structure representing the specific tokenizer to
** be used for the fts2 table (customized by the tokenizer clause arguments).
**
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
** method is called. It returns an sqlite3_tokenizer_cursor object
** that may be used to tokenize a specific input buffer based on
** the tokenization rules supplied by a specific sqlite3_tokenizer
** object.
*/
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module {
/*
** Structure version. Should always be set to 0.
*/
int iVersion;
/*
** Create a new tokenizer. The values in the argv[] array are the
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
** TABLE statement that created the fts2 table. For example, if
** the following SQL is executed:
**
** CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2)
**
** then argc is set to 2, and the argv[] array contains pointers
** to the strings "arg1" and "arg2".
**
** This method should return either SQLITE_OK (0), or an SQLite error
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
** to point at the newly created tokenizer structure. The generic
** sqlite3_tokenizer.pModule variable should not be initialized by
** this callback. The caller will do so.
*/
int (*xCreate)(
int argc, /* Size of argv array */
const char *const*argv, /* Tokenizer argument strings */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
);
/*
** Destroy an existing tokenizer. The fts2 module calls this method
** exactly once for each successful call to xCreate().
*/
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Create a tokenizer cursor to tokenize an input buffer. The caller
** is responsible for ensuring that the input buffer remains valid
** until the cursor is closed (using the xClose() method).
*/
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
/*
** Destroy an existing tokenizer cursor. The fts2 module calls this
** method exactly once for each successful call to xOpen().
*/
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
/*
** Retrieve the next token from the tokenizer cursor pCursor. This
** method should either return SQLITE_OK and set the values of the
** "OUT" variables identified below, or SQLITE_DONE to indicate that
** the end of the buffer has been reached, or an SQLite error code.
**
** *ppToken should be set to point at a buffer containing the
** normalized version of the token (i.e. after any case-folding and/or
** stemming has been performed). *pnBytes should be set to the length
** of this buffer in bytes. The input text that generated the token is
** identified by the byte offsets returned in *piStartOffset and
** *piEndOffset.
**
** The buffer *ppToken is set to point at is managed by the tokenizer
** implementation. It is only required to be valid until the next call
** to xNext() or xClose().
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
#endif /* _FTS2_TOKENIZER_H_ */

View File

@ -1,233 +0,0 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module simpleTokenizerModule;
static int simpleDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
sqlite3_free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') ||
(i>='a' && i<='z'));
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
sqlite3_free(c->pToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
c->nTokenAllocated = n+20;
c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated);
if( c->pToken==NULL ) return SQLITE_NOMEM;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch;
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts2SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@ -1,116 +0,0 @@
#!/usr/bin/tclsh
#
# This script builds a single C code file holding all of FTS2 code.
# The name of the output file is fts2amal.c. To build this file,
# first do:
#
# make target_source
#
# The make target above moves all of the source code files into
# a subdirectory named "tsrc". (This script expects to find the files
# there and will not work if they are not found.)
#
# After the "tsrc" directory has been created and populated, run
# this script:
#
# tclsh mkfts2amal.tcl
#
# The amalgamated FTS2 code will be written into fts2amal.c
#
# Open the output file and write a header comment at the beginning
# of the file.
#
set out [open fts2amal.c w]
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
puts $out [subst \
{/******************************************************************************
** This file is an amalgamation of separate C source files from the SQLite
** Full Text Search extension 2 (fts2). By combining all the individual C
** code files into this single large file, the entire code can be compiled
** as a one translation unit. This allows many compilers to do optimizations
** that would not be possible if the files were compiled separately. It also
** makes the code easier to import into other projects.
**
** This amalgamation was generated on $today.
*/}]
# These are the header files used by FTS2. The first time any of these
# files are seen in a #include statement in the C code, include the complete
# text of the file in-line. The file only needs to be included once.
#
foreach hdr {
fts2.h
fts2_hash.h
fts2_tokenizer.h
sqlite3.h
sqlite3ext.h
} {
set available_hdr($hdr) 1
}
# 78 stars used for comment formatting.
set s78 \
{*****************************************************************************}
# Insert a comment into the code
#
proc section_comment {text} {
global out s78
set n [string length $text]
set nstar [expr {60 - $n}]
set stars [string range $s78 0 $nstar]
puts $out "/************** $text $stars/"
}
# Read the source file named $filename and write it into the
# sqlite3.c output file. If any #include statements are seen,
# process them approprately.
#
proc copy_file {filename} {
global seen_hdr available_hdr out
set tail [file tail $filename]
section_comment "Begin file $tail"
set in [open $filename r]
while {![eof $in]} {
set line [gets $in]
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
if {[info exists available_hdr($hdr)]} {
if {$available_hdr($hdr)} {
section_comment "Include $hdr in the middle of $tail"
copy_file tsrc/$hdr
section_comment "Continuing where we left off in $tail"
}
} elseif {![info exists seen_hdr($hdr)]} {
set seen_hdr($hdr) 1
puts $out $line
}
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
puts $out "#if 0"
} elseif {[regexp {^#line} $line]} {
# Skip #line directives.
} else {
puts $out $line
}
}
close $in
section_comment "End of $tail"
}
# Process the source files. Process files containing commonly
# used subroutines first in order to help the compiler find
# inlining opportunities.
#
foreach file {
fts2.c
fts2_hash.c
fts2_porter.c
fts2_tokenizer.c
fts2_tokenizer1.c
fts2_icu.c
} {
copy_file tsrc/$file
}
close $out

View File

@ -1,176 +0,0 @@
FTS4 CONTENT OPTION
Normally, in order to create a full-text index on a dataset, the FTS4
module stores a copy of all indexed documents in a specially created
database table.
As of SQLite version 3.7.9, FTS4 supports a new option - "content" -
designed to extend FTS4 to support the creation of full-text indexes where:
* The indexed documents are not stored within the SQLite database
at all (a "contentless" FTS4 table), or
* The indexed documents are stored in a database table created and
managed by the user (an "external content" FTS4 table).
Because the indexed documents themselves are usually much larger than
the full-text index, the content option can sometimes be used to achieve
significant space savings.
CONTENTLESS FTS4 TABLES
In order to create an FTS4 table that does not store a copy of the indexed
documents at all, the content option should be set to an empty string.
For example, the following SQL creates such an FTS4 table with three
columns - "a", "b", and "c":
CREATE VIRTUAL TABLE t1 USING fts4(content="", a, b, c);
Data can be inserted into such an FTS4 table using an INSERT statements.
However, unlike ordinary FTS4 tables, the user must supply an explicit
integer docid value. For example:
-- This statement is Ok:
INSERT INTO t1(docid, a, b, c) VALUES(1, 'a b c', 'd e f', 'g h i');
-- This statement causes an error, as no docid value has been provided:
INSERT INTO t1(a, b, c) VALUES('j k l', 'm n o', 'p q r');
It is not possible to UPDATE or DELETE a row stored in a contentless FTS4
table. Attempting to do so is an error.
Contentless FTS4 tables also support SELECT statements. However, it is
an error to attempt to retrieve the value of any table column other than
the docid column. The auxiliary function matchinfo() may be used, but
snippet() and offsets() may not. For example:
-- The following statements are Ok:
SELECT docid FROM t1 WHERE t1 MATCH 'xxx';
SELECT docid FROM t1 WHERE a MATCH 'xxx';
SELECT matchinfo(t1) FROM t1 WHERE t1 MATCH 'xxx';
-- The following statements all cause errors, as the value of columns
-- other than docid are required to evaluate them.
SELECT * FROM t1;
SELECT a, b FROM t1 WHERE t1 MATCH 'xxx';
SELECT docid FROM t1 WHERE a LIKE 'xxx%';
SELECT snippet(t1) FROM t1 WHERE t1 MATCH 'xxx';
Errors related to attempting to retrieve column values other than docid
are runtime errors that occur within sqlite3_step(). In some cases, for
example if the MATCH expression in a SELECT query matches zero rows, there
may be no error at all even if a statement does refer to column values
other than docid.
EXTERNAL CONTENT FTS4 TABLES
An "external content" FTS4 table is similar to a contentless table, except
that if evaluation of a query requires the value of a column other than
docid, FTS4 attempts to retrieve that value from a table (or view, or
virtual table) nominated by the user (hereafter referred to as the "content
table"). The FTS4 module never writes to the content table, and writing
to the content table does not affect the full-text index. It is the
responsibility of the user to ensure that the content table and the
full-text index are consistent.
An external content FTS4 table is created by setting the content option
to the name of a table (or view, or virtual table) that may be queried by
FTS4 to retrieve column values when required. If the nominated table does
not exist, then an external content table behaves in the same way as
a contentless table. For example:
CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c);
CREATE VIRTUAL TABLE t3 USING fts4(content="t2", a, c);
Assuming the nominated table does exist, then its columns must be the same
as or a superset of those defined for the FTS table.
When a users query on the FTS table requires a column value other than
docid, FTS attempts to read this value from the corresponding column of
the row in the content table with a rowid value equal to the current FTS
docid. Or, if such a row cannot be found in the content table, a NULL
value is used instead. For example:
CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c, d);
CREATE VIRTUAL TABLE t3 USING fts4(content="t2", b, c);
INSERT INTO t2 VALUES(2, 'a b', 'c d', 'e f');
INSERT INTO t2 VALUES(3, 'g h', 'i j', 'k l');
INSERT INTO t3(docid, b, c) SELECT id, b, c FROM t2;
-- The following query returns a single row with two columns containing
-- the text values "i j" and "k l".
--
-- The query uses the full-text index to discover that the MATCH
-- term matches the row with docid=3. It then retrieves the values
-- of columns b and c from the row with rowid=3 in the content table
-- to return.
--
SELECT * FROM t3 WHERE t3 MATCH 'k';
-- Following the UPDATE, the query still returns a single row, this
-- time containing the text values "xxx" and "yyy". This is because the
-- full-text index still indicates that the row with docid=3 matches
-- the FTS4 query 'k', even though the documents stored in the content
-- table have been modified.
--
UPDATE t2 SET b = 'xxx', c = 'yyy' WHERE rowid = 3;
SELECT * FROM t3 WHERE t3 MATCH 'k';
-- Following the DELETE below, the query returns one row containing two
-- NULL values. NULL values are returned because FTS is unable to find
-- a row with rowid=3 within the content table.
--
DELETE FROM t2;
SELECT * FROM t3 WHERE t3 MATCH 'k';
When a row is deleted from an external content FTS4 table, FTS4 needs to
retrieve the column values of the row being deleted from the content table.
This is so that FTS4 can update the full-text index entries for each token
that occurs within the deleted row to indicate that that row has been
deleted. If the content table row cannot be found, or if it contains values
inconsistent with the contents of the FTS index, the results can be difficult
to predict. The FTS index may be left containing entries corresponding to the
deleted row, which can lead to seemingly nonsensical results being returned
by subsequent SELECT queries. The same applies when a row is updated, as
internally an UPDATE is the same as a DELETE followed by an INSERT.
Instead of writing separately to the full-text index and the content table,
some users may wish to use database triggers to keep the full-text index
up to date with respect to the set of documents stored in the content table.
For example, using the tables from earlier examples:
CREATE TRIGGER t2_bu BEFORE UPDATE ON t2 BEGIN
DELETE FROM t3 WHERE docid=old.rowid;
END;
CREATE TRIGGER t2_bd BEFORE DELETE ON t2 BEGIN
DELETE FROM t3 WHERE docid=old.rowid;
END;
CREATE TRIGGER t2_bu AFTER UPDATE ON t2 BEGIN
INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
END;
CREATE TRIGGER t2_bd AFTER INSERT ON t2 BEGIN
INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
END;
The DELETE trigger must be fired before the actual delete takes place
on the content table. This is so that FTS4 can still retrieve the original
values in order to update the full-text index. And the INSERT trigger must
be fired after the new row is inserted, so as to handle the case where the
rowid is assigned automatically within the system. The UPDATE trigger must
be split into two parts, one fired before and one after the update of the
content table, for the same reasons.
FTS4 features a special command similar to the 'optimize' command that
deletes the entire full-text index and rebuilds it based on the current
set of documents in the content table. Assuming again that "t3" is the
name of the external content FTS4 table, the command is:
INSERT INTO t3(t3) VALUES('rebuild');
This command may also be used with ordinary FTS4 tables, although it may
only be useful if the full-text index has somehow become corrupt. It is an
error to attempt to rebuild the full-text index maintained by a contentless
FTS4 table.

View File

@ -1,209 +0,0 @@
1. OVERVIEW
This README file describes the syntax of the arguments that may be passed to
the FTS3 MATCH operator used for full-text queries. For example, if table
"t1" is an Fts3 virtual table, the following SQL query:
SELECT * FROM t1 WHERE <col> MATCH <full-text query>
may be used to retrieve all rows that match a specified for full-text query.
The text "<col>" should be replaced by either the name of the fts3 table
(in this case "t1"), or by the name of one of the columns of the fts3
table. <full-text-query> should be replaced by an SQL expression that
computes to a string containing an Fts3 query.
If the left-hand-side of the MATCH operator is set to the name of the
fts3 table, then by default the query may be matched against any column
of the table. If it is set to a column name, then by default the query
may only match the specified column. In both cases this may be overriden
as part of the query text (see sections 2 and 3 below).
As of SQLite version 3.6.8, Fts3 supports two slightly different query
formats; the standard syntax, which is used by default, and the enhanced
query syntax which can be selected by compiling with the pre-processor
symbol SQLITE_ENABLE_FTS3_PARENTHESIS defined.
-DSQLITE_ENABLE_FTS3_PARENTHESIS
2. STANDARD QUERY SYNTAX
When using the standard Fts3 query syntax, a query usually consists of a
list of terms (words) separated by white-space characters. To match a
query, a row (or column) of an Fts3 table must contain each of the specified
terms. For example, the following query:
<col> MATCH 'hello world'
matches rows (or columns, if <col> is the name of a column name) that
contain at least one instance of the token "hello", and at least one
instance of the token "world". Tokens may be grouped into phrases using
quotation marks. In this case, a matching row or column must contain each
of the tokens in the phrase in the order specified, with no intervening
tokens. For example, the query:
<col> MATCH '"hello world" joe"
matches the first of the following two documents, but not the second or
third:
"'Hello world', said Joe."
"One should always greet the world with a cheery hello, thought Joe."
"How many hello world programs could their be?"
As well as grouping tokens together by phrase, the binary NEAR operator
may be used to search for rows that contain two or more specified tokens
or phrases within a specified proximity of each other. The NEAR operator
must always be specified in upper case. The word "near" in lower or mixed
case is treated as an ordinary token. For example, the following query:
<col> MATCH 'engineering NEAR consultancy'
matches rows that contain both the "engineering" and "consultancy" tokens
in the same column with not more than 10 other words between them. It does
not matter which of the two terms occurs first in the document, only that
they be seperated by only 10 tokens or less. The user may also specify
a different required proximity by adding "/N" immediately after the NEAR
operator, where N is an integer. For example:
<col> MATCH 'engineering NEAR/5 consultancy'
searches for a row containing an instance of each specified token seperated
by not more than 5 other tokens. More than one NEAR operator can be used
in as sequence. For example this query:
<col> MATCH 'reliable NEAR/2 engineering NEAR/5 consultancy'
searches for a row that contains an instance of the token "reliable"
seperated by not more than two tokens from an instance of "engineering",
which is in turn separated by not more than 5 other tokens from an
instance of the term "consultancy". Phrases enclosed in quotes may
also be used as arguments to the NEAR operator.
Similar to the NEAR operator, one or more tokens or phrases may be
separated by OR operators. In this case, only one of the specified tokens
or phrases must appear in the document. For example, the query:
<col> MATCH 'hello OR world'
matches rows that contain either the term "hello", or the term "world",
or both. Note that unlike in many programming languages, the OR operator
has a higher precedence than the AND operators implied between white-space
separated tokens. The following query matches documents that contain the
term 'sqlite' and at least one of the terms 'fantastic' or 'impressive',
not those that contain both 'sqlite' and 'fantastic' or 'impressive':
<col> MATCH 'sqlite fantastic OR impressive'
Any token that is part of an Fts3 query expression, whether or not it is
part of a phrase enclosed in quotes, may have a '*' character appended to
it. In this case, the token matches all terms that begin with the characters
of the token, not just those that exactly match it. For example, the
following query:
<col> MATCH 'sql*'
matches all rows that contain the term "SQLite", as well as those that
contain "SQL".
A token that is not part of a quoted phrase may be preceded by a '-'
character, which indicates that matching rows must not contain the
specified term. For example, the following:
<col> MATCH '"database engine" -sqlite'
matches rows that contain the phrase "database engine" but do not contain
the term "sqlite". If the '-' character occurs inside a quoted phrase,
it is ignored. It is possible to use both the '-' prefix and the '*' postfix
on a single term. At this time, all Fts3 queries must contain at least
one term or phrase that is not preceded by the '-' prefix.
Regardless of whether or not a table name or column name is used on the
left hand side of the MATCH operator, a specific column of the fts3 table
may be associated with each token in a query by preceding a token with
a column name followed by a ':' character. For example, regardless of what
is specified for <col>, the following query requires that column "col1"
of the table contains the term "hello", and that column "col2" of the
table contains the term "world". If the table does not contain columns
named "col1" and "col2", then an error is returned and the query is
not run.
<col> MATCH 'col1:hello col2:world'
It is not possible to associate a specific table column with a quoted
phrase or a term preceded by a '-' operator. A '*' character may be
appended to a term associated with a specific column for prefix matching.
3. ENHANCED QUERY SYNTAX
The enhanced query syntax is quite similar to the standard query syntax,
with the following four differences:
1) Parenthesis are supported. When using the enhanced query syntax,
parenthesis may be used to overcome the built-in precedence of the
supplied binary operators. For example, the following query:
<col> MATCH '(hello world) OR (simple example)'
matches documents that contain both "hello" and "world", and documents
that contain both "simple" and "example". It is not possible to forumlate
such a query using the standard syntax.
2) Instead of separating tokens and phrases by whitespace, an AND operator
may be explicitly specified. This does not change query processing at
all, but may be used to improve readability. For example, the following
query is handled identically to the one above:
<col> MATCH '(hello AND world) OR (simple AND example)'
As with the OR and NEAR operators, the AND operator must be specified
in upper case. The word "and" specified in lower or mixed case is
handled as a regular token.
3) The '-' token prefix is not supported. Instead, a new binary operator,
NOT, is included. The NOT operator requires that the query specified
as its left-hand operator matches, but that the query specified as the
right-hand operator does not. For example, to query for all rows that
contain the term "example" but not the term "simple", the following
query could be used:
<col> MATCH 'example NOT simple'
As for all other operators, the NOT operator must be specified in
upper case. Otherwise it will be treated as a regular token.
4) Unlike in the standard syntax, where the OR operator has a higher
precedence than the implicit AND operator, when using the enhanced
syntax implicit and explict AND operators have a higher precedence
than OR operators. Using the enhanced syntax, the following two
queries are equivalent:
<col> MATCH 'sqlite fantastic OR impressive'
<col> MATCH '(sqlite AND fantastic) OR impressive'
however, when using the standard syntax, the query:
<col> MATCH 'sqlite fantastic OR impressive'
is equivalent to the enhanced syntax query:
<col> MATCH 'sqlite AND (fantastic OR impressive)'
The precedence of all enhanced syntax operators, in order from highest
to lowest, is:
NEAR (highest precedence, tightest grouping)
NOT
AND
OR (lowest precedence, loosest grouping)
Using the advanced syntax, it is possible to specify expressions enclosed
in parenthesis as operands to the NOT, AND and OR operators. However both
the left and right hand side operands of NEAR operators must be either
tokens or phrases. Attempting the following query will return an error:
<col> MATCH 'sqlite NEAR (fantastic OR impressive)'
Queries of this form must be re-written as:
<col> MATCH 'sqlite NEAR fantastic OR sqlite NEAR impressive'

View File

@ -1,135 +0,0 @@
1. FTS3 Tokenizers
When creating a new full-text table, FTS3 allows the user to select
the text tokenizer implementation to be used when indexing text
by specifying a "tokenize" clause as part of the CREATE VIRTUAL TABLE
statement:
CREATE VIRTUAL TABLE <table-name> USING fts3(
<columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]]
);
The built-in tokenizers (valid values to pass as <tokenizer name>) are
"simple", "porter" and "unicode".
<tokenizer-args> should consist of zero or more white-space separated
arguments to pass to the selected tokenizer implementation. The
interpretation of the arguments, if any, depends on the individual
tokenizer.
2. Custom Tokenizers
FTS3 allows users to provide custom tokenizer implementations. The
interface used to create a new tokenizer is defined and described in
the fts3_tokenizer.h source file.
Registering a new FTS3 tokenizer is similar to registering a new
virtual table module with SQLite. The user passes a pointer to a
structure containing pointers to various callback functions that
make up the implementation of the new tokenizer type. For tokenizers,
the structure (defined in fts3_tokenizer.h) is called
"sqlite3_tokenizer_module".
FTS3 does not expose a C-function that users call to register new
tokenizer types with a database handle. Instead, the pointer must
be encoded as an SQL blob value and passed to FTS3 through the SQL
engine by evaluating a special scalar function, "fts3_tokenizer()".
The fts3_tokenizer() function may be called with one or two arguments,
as follows:
SELECT fts3_tokenizer(<tokenizer-name>);
SELECT fts3_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
Where <tokenizer-name> is a string identifying the tokenizer and
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
structure encoded as an SQL blob. If the second argument is present,
it is registered as tokenizer <tokenizer-name> and a copy of it
returned. If only one argument is passed, a pointer to the tokenizer
implementation currently registered as <tokenizer-name> is returned,
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
(error) is raised.
SECURITY: If the fts3 extension is used in an environment where potentially
malicious users may execute arbitrary SQL (i.e. gears), they should be
prevented from invoking the fts3_tokenizer() function. The
fts3_tokenizer() function is disabled by default. It is only enabled
by SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER. Do not enable it in
security sensitive environments.
See "Sample code" below for an example of calling the fts3_tokenizer()
function from C code.
3. ICU Library Tokenizers
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
symbol defined, then there exists a built-in tokenizer named "icu"
implemented using the ICU library. The first argument passed to the
xCreate() method (see fts3_tokenizer.h) of this tokenizer may be
an ICU locale identifier. For example "tr_TR" for Turkish as used
in Turkey, or "en_AU" for English as used in Australia. For example:
"CREATE VIRTUAL TABLE thai_text USING fts3(text, tokenizer icu th_TH)"
The ICU tokenizer implementation is very simple. It splits the input
text according to the ICU rules for finding word boundaries and discards
any tokens that consist entirely of white-space. This may be suitable
for some applications in some locales, but not all. If more complex
processing is required, for example to implement stemming or
discard punctuation, this can be done by creating a tokenizer
implementation that uses the ICU tokenizer as part of its implementation.
When using the ICU tokenizer this way, it is safe to overwrite the
contents of the strings returned by the xNext() method (see
fts3_tokenizer.h).
4. Sample code.
The following two code samples illustrate the way C code should invoke
the fts3_tokenizer() scalar function:
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}

View File

@ -1,4 +0,0 @@
This folder contains source code to the second full-text search
extension for SQLite. While the API is the same, this version uses a
substantially different storage schema from fts1, so tables will need
to be rebuilt.

File diff suppressed because it is too large Load Diff

View File

@ -1,26 +0,0 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This header file is used by programs that want to link against the
** FTS3 library. All it does is declare the sqlite3Fts3Init() interface.
*/
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts3Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View File

@ -1,652 +0,0 @@
/*
** 2009 Nov 12
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#ifndef _FTSINT_H
#define _FTSINT_H
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
# define NDEBUG 1
#endif
/* FTS3/FTS4 require virtual tables */
#ifdef SQLITE_OMIT_VIRTUALTABLE
# undef SQLITE_ENABLE_FTS3
# undef SQLITE_ENABLE_FTS4
#endif
/*
** FTS4 is really an extension for FTS3. It is enabled using the
** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all
** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
*/
#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
# define SQLITE_ENABLE_FTS3
#endif
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
/* If not building as part of the core, include sqlite3ext.h. */
#ifndef SQLITE_CORE
# include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#endif
#include "sqlite3.h"
#include "fts3_tokenizer.h"
#include "fts3_hash.h"
/*
** This constant determines the maximum depth of an FTS expression tree
** that the library will create and use. FTS uses recursion to perform
** various operations on the query tree, so the disadvantage of a large
** limit is that it may allow very large queries to use large amounts
** of stack space (perhaps causing a stack overflow).
*/
#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH
# define SQLITE_FTS3_MAX_EXPR_DEPTH 12
#endif
/*
** This constant controls how often segments are merged. Once there are
** FTS3_MERGE_COUNT segments of level N, they are merged into a single
** segment of level N+1.
*/
#define FTS3_MERGE_COUNT 16
/*
** This is the maximum amount of data (in bytes) to store in the
** Fts3Table.pendingTerms hash table. Normally, the hash table is
** populated as documents are inserted/updated/deleted in a transaction
** and used to create a new segment when the transaction is committed.
** However if this limit is reached midway through a transaction, a new
** segment is created and the hash table cleared immediately.
*/
#define FTS3_MAX_PENDING_DATA (1*1024*1024)
/*
** Macro to return the number of elements in an array. SQLite has a
** similar macro called ArraySize(). Use a different name to avoid
** a collision when building an amalgamation with built-in FTS3.
*/
#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
#ifndef MIN
# define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
# define MAX(x,y) ((x)>(y)?(x):(y))
#endif
/*
** Maximum length of a varint encoded integer. The varint format is different
** from that used by SQLite, so the maximum length is 10, not 9.
*/
#define FTS3_VARINT_MAX 10
#define FTS3_BUFFER_PADDING 8
/*
** FTS4 virtual tables may maintain multiple indexes - one index of all terms
** in the document set and zero or more prefix indexes. All indexes are stored
** as one or more b+-trees in the %_segments and %_segdir tables.
**
** It is possible to determine which index a b+-tree belongs to based on the
** value stored in the "%_segdir.level" column. Given this value L, the index
** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
** level values between 0 and 1023 (inclusive) belong to index 0, all levels
** between 1024 and 2047 to index 1, and so on.
**
** It is considered impossible for an index to use more than 1024 levels. In
** theory though this may happen, but only after at least
** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
*/
#define FTS3_SEGDIR_MAXLEVEL 1024
#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
/*
** The testcase() macro is only used by the amalgamation. If undefined,
** make it a no-op.
*/
#ifndef testcase
# define testcase(X)
#endif
/*
** Terminator values for position-lists and column-lists.
*/
#define POS_COLUMN (1) /* Column-list terminator */
#define POS_END (0) /* Position-list terminator */
/*
** The assert_fts3_nc() macro is similar to the assert() macro, except that it
** is used for assert() conditions that are true only if it can be
** guranteed that the database is not corrupt.
*/
#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
extern int sqlite3_fts3_may_be_corrupt;
# define assert_fts3_nc(x) assert(sqlite3_fts3_may_be_corrupt || (x))
#else
# define assert_fts3_nc(x) assert(x)
#endif
/*
** This section provides definitions to allow the
** FTS3 extension to be compiled outside of the
** amalgamation.
*/
#ifndef SQLITE_AMALGAMATION
/*
** Macros indicating that conditional expressions are always true or
** false.
*/
#ifdef SQLITE_COVERAGE_TEST
# define ALWAYS(x) (1)
# define NEVER(X) (0)
#elif defined(SQLITE_DEBUG)
# define ALWAYS(x) sqlite3Fts3Always((x)!=0)
# define NEVER(x) sqlite3Fts3Never((x)!=0)
int sqlite3Fts3Always(int b);
int sqlite3Fts3Never(int b);
#else
# define ALWAYS(x) (x)
# define NEVER(x) (x)
#endif
/*
** Internal types used by SQLite.
*/
typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */
typedef short int i16; /* 2-byte (or larger) signed integer */
typedef unsigned int u32; /* 4-byte unsigned integer */
typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
typedef sqlite3_int64 i64; /* 8-byte signed integer */
/*
** Macro used to suppress compiler warnings for unused parameters.
*/
#define UNUSED_PARAMETER(x) (void)(x)
/*
** Activate assert() only if SQLITE_TEST is enabled.
*/
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
# define NDEBUG 1
#endif
/*
** The TESTONLY macro is used to enclose variable declarations or
** other bits of code that are needed to support the arguments
** within testcase() and assert() macros.
*/
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
# define TESTONLY(X) X
#else
# define TESTONLY(X)
#endif
#define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
#define deliberate_fall_through
#endif /* SQLITE_AMALGAMATION */
#ifdef SQLITE_DEBUG
int sqlite3Fts3Corrupt(void);
# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
#else
# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
#endif
typedef struct Fts3Table Fts3Table;
typedef struct Fts3Cursor Fts3Cursor;
typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;
typedef struct Fts3PhraseToken Fts3PhraseToken;
typedef struct Fts3Doclist Fts3Doclist;
typedef struct Fts3SegFilter Fts3SegFilter;
typedef struct Fts3DeferredToken Fts3DeferredToken;
typedef struct Fts3SegReader Fts3SegReader;
typedef struct Fts3MultiSegReader Fts3MultiSegReader;
typedef struct MatchinfoBuffer MatchinfoBuffer;
/*
** A connection to a fulltext index is an instance of the following
** structure. The xCreate and xConnect methods create an instance
** of this structure and xDestroy and xDisconnect free that instance.
** All other methods receive a pointer to the structure as one of their
** arguments.
*/
struct Fts3Table {
sqlite3_vtab base; /* Base class used by SQLite core */
sqlite3 *db; /* The database connection */
const char *zDb; /* logical database name */
const char *zName; /* virtual table name */
int nColumn; /* number of named columns in virtual table */
char **azColumn; /* column names. malloced */
u8 *abNotindexed; /* True for 'notindexed' columns */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
char *zContentTbl; /* content=xxx option, or NULL */
char *zLanguageid; /* languageid=xxx option, or NULL */
int nAutoincrmerge; /* Value configured by 'automerge' */
u32 nLeafAdd; /* Number of leaf blocks added this trans */
int bLock; /* Used to prevent recursive content= tbls */
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
sqlite3_stmt *aStmt[40];
sqlite3_stmt *pSeekStmt; /* Cache for fts3CursorSeekStmt() */
char *zReadExprlist;
char *zWriteExprlist;
int nNodeSize; /* Soft limit for node size */
u8 bFts4; /* True for FTS4, false for FTS3 */
u8 bHasStat; /* True if %_stat table exists (2==unknown) */
u8 bHasDocsize; /* True if %_docsize table exists */
u8 bDescIdx; /* True if doclists are in reverse order */
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
int nPgsz; /* Page size for host database */
char *zSegmentsTbl; /* Name of %_segments table */
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
/*
** The following array of hash tables is used to buffer pending index
** updates during transactions. All pending updates buffered at any one
** time must share a common language-id (see the FTS4 langid= feature).
** The current language id is stored in variable iPrevLangid.
**
** A single FTS4 table may have multiple full-text indexes. For each index
** there is an entry in the aIndex[] array. Index 0 is an index of all the
** terms that appear in the document set. Each subsequent index in aIndex[]
** is an index of prefixes of a specific length.
**
** Variable nPendingData contains an estimate the memory consumed by the
** pending data structures, including hash table overhead, but not including
** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
** tables are flushed to disk. Variable iPrevDocid is the docid of the most
** recently inserted record.
*/
int nIndex; /* Size of aIndex[] */
struct Fts3Index {
int nPrefix; /* Prefix length (0 for main terms index) */
Fts3Hash hPending; /* Pending terms table for this index */
} *aIndex;
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
int iPrevLangid; /* Langid of recently inserted document */
int bPrevDelete; /* True if last operation was a delete */
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
/* State variables used for validating that the transaction control
** methods of the virtual table are called at appropriate times. These
** values do not contribute to FTS functionality; they are used for
** verifying the operation of the SQLite core.
*/
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
#endif
#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
/* True to disable the incremental doclist optimization. This is controled
** by special insert command 'test-no-incr-doclist'. */
int bNoIncrDoclist;
/* Number of segments in a level */
int nMergeCount;
#endif
};
/* Macro to find the number of segments to merge */
#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
# define MergeCount(P) ((P)->nMergeCount)
#else
# define MergeCount(P) FTS3_MERGE_COUNT
#endif
/*
** When the core wants to read from the virtual table, it creates a
** virtual table cursor (an instance of the following structure) using
** the xOpen method. Cursors are destroyed using the xClose method.
*/
struct Fts3Cursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
i16 eSearch; /* Search strategy (see below) */
u8 isEof; /* True if at End Of Results */
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
u8 bSeekStmt; /* True if pStmt is a seek */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
Fts3Expr *pExpr; /* Parsed MATCH query string */
int iLangid; /* Language being queried for */
int nPhrase; /* Number of matchable phrases in query */
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
char *pNextId; /* Pointer into the body of aDoclist */
char *aDoclist; /* List of docids for full-text queries */
int nDoclist; /* Size of buffer at aDoclist */
u8 bDesc; /* True to sort in descending order */
int eEvalmode; /* An FTS3_EVAL_XX constant */
int nRowAvg; /* Average size of database rows, in pages */
sqlite3_int64 nDoc; /* Documents in table */
i64 iMinDocid; /* Minimum docid to return */
i64 iMaxDocid; /* Maximum docid to return */
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */
};
#define FTS3_EVAL_FILTER 0
#define FTS3_EVAL_NEXT 1
#define FTS3_EVAL_MATCHINFO 2
/*
** The Fts3Cursor.eSearch member is always set to one of the following.
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index
** of the column to be searched. For example, in
**
** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
** SELECT docid FROM ex1 WHERE b MATCH 'one two three';
**
** Because the LHS of the MATCH operator is 2nd column "b",
** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a,
** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1"
** indicating that all columns should be searched,
** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
*/
#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
/*
** The lower 16-bits of the sqlite3_index_info.idxNum value set by
** the xBestIndex() method contains the Fts3Cursor.eSearch value described
** above. The upper 16-bits contain a combination of the following
** bits, used to describe extra constraints on full-text searches.
*/
#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */
#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */
#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */
struct Fts3Doclist {
char *aAll; /* Array containing doclist (or NULL) */
int nAll; /* Size of a[] in bytes */
char *pNextDocid; /* Pointer to next docid */
sqlite3_int64 iDocid; /* Current docid (if pList!=0) */
int bFreeList; /* True if pList should be sqlite3_free()d */
char *pList; /* Pointer to position list following iDocid */
int nList; /* Length of position list */
};
/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence. A single token is the base case and the most common case.
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
** nToken will be the number of tokens in the string.
*/
struct Fts3PhraseToken {
char *z; /* Text of the token */
int n; /* Number of bytes in buffer z */
int isPrefix; /* True if token ends with a "*" character */
int bFirst; /* True if token must appear at position 0 */
/* Variables above this point are populated when the expression is
** parsed (by code in fts3_expr.c). Below this point the variables are
** used when evaluating the expression. */
Fts3DeferredToken *pDeferred; /* Deferred token object for this token */
Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */
};
struct Fts3Phrase {
/* Cache of doclist for this phrase. */
Fts3Doclist doclist;
int bIncr; /* True if doclist is loaded incrementally */
int iDoclistToken;
/* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an
** OR condition. */
char *pOrPoslist;
i64 iOrDocid;
/* Variables below this point are populated by fts3_expr.c when parsing
** a MATCH expression. Everything above is part of the evaluation phase.
*/
int nToken; /* Number of tokens in the phrase */
int iColumn; /* Index of column this phrase must match */
Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
};
/*
** A tree of these objects forms the RHS of a MATCH operator.
**
** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist
** points to a malloced buffer, size nDoclist bytes, containing the results
** of this phrase query in FTS3 doclist format. As usual, the initial
** "Length" field found in doclists stored on disk is omitted from this
** buffer.
**
** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
** where nCol is the number of columns in the queried FTS table. The array
** is populated as follows:
**
** aMI[iCol*3 + 0] = Undefined
** aMI[iCol*3 + 1] = Number of occurrences
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
**
** The aMI array is allocated using sqlite3_malloc(). It should be freed
** when the expression node is.
*/
struct Fts3Expr {
int eType; /* One of the FTSQUERY_XXX values defined below */
int nNear; /* Valid if eType==FTSQUERY_NEAR */
Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
Fts3Expr *pLeft; /* Left operand */
Fts3Expr *pRight; /* Right operand */
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
/* The following are used by the fts3_eval.c module. */
sqlite3_int64 iDocid; /* Current docid */
u8 bEof; /* True this expression is at EOF already */
u8 bStart; /* True if iDocid is valid */
u8 bDeferred; /* True if this expression is entirely deferred */
/* The following are used by the fts3_snippet.c module. */
int iPhrase; /* Index of this phrase in matchinfo() results */
u32 *aMI; /* See above */
};
/*
** Candidate values for Fts3Query.eType. Note that the order of the first
** four values is in order of precedence when parsing expressions. For
** example, the following:
**
** "a OR b AND c NOT d NEAR e"
**
** is equivalent to:
**
** "a OR (b AND (c NOT (d NEAR e)))"
*/
#define FTSQUERY_NEAR 1
#define FTSQUERY_NOT 2
#define FTSQUERY_AND 3
#define FTSQUERY_OR 4
#define FTSQUERY_PHRASE 5
/* fts3_write.c */
int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);
int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
int sqlite3Fts3SegReaderPending(
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
#else
# define sqlite3Fts3FreeDeferredTokens(x)
# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
# define sqlite3Fts3FreeDeferredDoclists(x)
# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
#endif
void sqlite3Fts3SegmentsClose(Fts3Table *);
int sqlite3Fts3MaxLevel(Fts3Table *, int *);
/* Special values interpreted by sqlite3SegReaderCursor() */
#define FTS3_SEGCURSOR_PENDING -1
#define FTS3_SEGCURSOR_ALL -2
int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
int sqlite3Fts3SegReaderCursor(Fts3Table *,
int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX 0x00000008
#define FTS3_SEGMENT_SCAN 0x00000010
#define FTS3_SEGMENT_FIRST 0x00000020
/* Type passed as 4th argument to SegmentReaderIterate() */
struct Fts3SegFilter {
const char *zTerm;
int nTerm;
int iCol;
int flags;
};
struct Fts3MultiSegReader {
/* Used internally by sqlite3Fts3SegReaderXXX() calls */
Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */
int nSegment; /* Size of apSegment array */
int nAdvance; /* How many seg-readers to advance */
Fts3SegFilter *pFilter; /* Pointer to filter object */
char *aBuffer; /* Buffer to merge doclists in */
int nBuffer; /* Allocated size of aBuffer[] in bytes */
int iColFilter; /* If >=0, filter for this column */
int bRestart;
/* Used by fts3.c only. */
int nCost; /* Cost of running iterator */
int bLookup; /* True if a lookup of a single entry. */
/* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
char *zTerm; /* Pointer to term buffer */
int nTerm; /* Size of zTerm in bytes */
char *aDoclist; /* Pointer to doclist buffer */
int nDoclist; /* Size of aDoclist[] in bytes */
};
int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
#define fts3GetVarint32(p, piVal) ( \
(*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \
)
/* fts3.c */
void sqlite3Fts3ErrMsg(char**,const char*,...);
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarintU(const char *, sqlite_uint64 *);
int sqlite3Fts3GetVarintBounded(const char*,const char*,sqlite3_int64*);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc);
int sqlite3Fts3ReadInt(const char *z, int *pnOut);
/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
sqlite3_tokenizer **, char **
);
int sqlite3Fts3IsIdChar(char);
/* fts3_snippet.c */
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p);
/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
char **, int, int, int, const char *, int, Fts3Expr **, char **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash*);
int sqlite3Fts3InitTerm(sqlite3 *db);
#endif
int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
sqlite3_tokenizer_cursor **
);
/* fts3_aux.c */
int sqlite3Fts3InitAux(sqlite3 *db);
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
int sqlite3Fts3MsrIncrStart(
Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
int sqlite3Fts3MsrIncrNext(
Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);
int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
/* fts3_tokenize_vtab.c */
int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
#ifndef SQLITE_DISABLE_FTS3_UNICODE
int sqlite3FtsUnicodeFold(int, int);
int sqlite3FtsUnicodeIsalnum(int);
int sqlite3FtsUnicodeIsdiacritic(int);
#endif
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
#endif /* _FTSINT_H */

View File

@ -1,551 +0,0 @@
/*
** 2011 Jan 27
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <string.h>
#include <assert.h>
typedef struct Fts3auxTable Fts3auxTable;
typedef struct Fts3auxCursor Fts3auxCursor;
struct Fts3auxTable {
sqlite3_vtab base; /* Base class used by SQLite core */
Fts3Table *pFts3Tab;
};
struct Fts3auxCursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
Fts3MultiSegReader csr; /* Must be right after "base" */
Fts3SegFilter filter;
char *zStop;
int nStop; /* Byte-length of string zStop */
int iLangid; /* Language id to query */
int isEof; /* True if cursor is at EOF */
sqlite3_int64 iRowid; /* Current rowid */
int iCol; /* Current value of 'col' column */
int nStat; /* Size of aStat[] array */
struct Fts3auxColstats {
sqlite3_int64 nDoc; /* 'documents' values for current csr row */
sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
} *aStat;
};
/*
** Schema of the terms table.
*/
#define FTS3_AUX_SCHEMA \
"CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
/*
** This function does all the work for both the xConnect and xCreate methods.
** These tables have no persistent representation of their own, so xConnect
** and xCreate are identical operations.
*/
static int fts3auxConnectMethod(
sqlite3 *db, /* Database connection */
void *pUnused, /* Unused */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
char const *zDb; /* Name of database (e.g. "main") */
char const *zFts3; /* Name of fts3 table */
int nDb; /* Result of strlen(zDb) */
int nFts3; /* Result of strlen(zFts3) */
sqlite3_int64 nByte; /* Bytes of space to allocate here */
int rc; /* value returned by declare_vtab() */
Fts3auxTable *p; /* Virtual table object to return */
UNUSED_PARAMETER(pUnused);
/* The user should invoke this in one of two forms:
**
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
*/
if( argc!=4 && argc!=5 ) goto bad_args;
zDb = argv[1];
nDb = (int)strlen(zDb);
if( argc==5 ){
if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
zDb = argv[3];
nDb = (int)strlen(zDb);
zFts3 = argv[4];
}else{
goto bad_args;
}
}else{
zFts3 = argv[3];
}
nFts3 = (int)strlen(zFts3);
rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
if( rc!=SQLITE_OK ) return rc;
nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
p = (Fts3auxTable *)sqlite3_malloc64(nByte);
if( !p ) return SQLITE_NOMEM;
memset(p, 0, nByte);
p->pFts3Tab = (Fts3Table *)&p[1];
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
p->pFts3Tab->db = db;
p->pFts3Tab->nIndex = 1;
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
*ppVtab = (sqlite3_vtab *)p;
return SQLITE_OK;
bad_args:
sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor");
return SQLITE_ERROR;
}
/*
** This function does the work for both the xDisconnect and xDestroy methods.
** These tables have no persistent representation of their own, so xDisconnect
** and xDestroy are identical operations.
*/
static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
Fts3auxTable *p = (Fts3auxTable *)pVtab;
Fts3Table *pFts3 = p->pFts3Tab;
int i;
/* Free any prepared statements held */
for(i=0; i<SizeofArray(pFts3->aStmt); i++){
sqlite3_finalize(pFts3->aStmt[i]);
}
sqlite3_free(pFts3->zSegmentsTbl);
sqlite3_free(p);
return SQLITE_OK;
}
#define FTS4AUX_EQ_CONSTRAINT 1
#define FTS4AUX_GE_CONSTRAINT 2
#define FTS4AUX_LE_CONSTRAINT 4
/*
** xBestIndex - Analyze a WHERE and ORDER BY clause.
*/
static int fts3auxBestIndexMethod(
sqlite3_vtab *pVTab,
sqlite3_index_info *pInfo
){
int i;
int iEq = -1;
int iGe = -1;
int iLe = -1;
int iLangid = -1;
int iNext = 1; /* Next free argvIndex value */
UNUSED_PARAMETER(pVTab);
/* This vtab delivers always results in "ORDER BY term ASC" order. */
if( pInfo->nOrderBy==1
&& pInfo->aOrderBy[0].iColumn==0
&& pInfo->aOrderBy[0].desc==0
){
pInfo->orderByConsumed = 1;
}
/* Search for equality and range constraints on the "term" column.
** And equality constraints on the hidden "languageid" column. */
for(i=0; i<pInfo->nConstraint; i++){
if( pInfo->aConstraint[i].usable ){
int op = pInfo->aConstraint[i].op;
int iCol = pInfo->aConstraint[i].iColumn;
if( iCol==0 ){
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
}
if( iCol==4 ){
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
}
}
}
if( iEq>=0 ){
pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
pInfo->estimatedCost = 5;
}else{
pInfo->idxNum = 0;
pInfo->estimatedCost = 20000;
if( iGe>=0 ){
pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
if( iLe>=0 ){
pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
}
if( iLangid>=0 ){
pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
pInfo->estimatedCost--;
}
return SQLITE_OK;
}
/*
** xOpen - Open a cursor.
*/
static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
Fts3auxCursor *pCsr; /* Pointer to cursor object to return */
UNUSED_PARAMETER(pVTab);
pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
if( !pCsr ) return SQLITE_NOMEM;
memset(pCsr, 0, sizeof(Fts3auxCursor));
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
return SQLITE_OK;
}
/*
** xClose - Close a cursor.
*/
static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
sqlite3Fts3SegmentsClose(pFts3);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free((void *)pCsr->filter.zTerm);
sqlite3_free(pCsr->zStop);
sqlite3_free(pCsr->aStat);
sqlite3_free(pCsr);
return SQLITE_OK;
}
static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
if( nSize>pCsr->nStat ){
struct Fts3auxColstats *aNew;
aNew = (struct Fts3auxColstats *)sqlite3_realloc64(pCsr->aStat,
sizeof(struct Fts3auxColstats) * nSize
);
if( aNew==0 ) return SQLITE_NOMEM;
memset(&aNew[pCsr->nStat], 0,
sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
);
pCsr->aStat = aNew;
pCsr->nStat = nSize;
}
return SQLITE_OK;
}
/*
** xNext - Advance the cursor to the next row, if any.
*/
static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
int rc;
/* Increment our pretend rowid value. */
pCsr->iRowid++;
for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
}
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
if( rc==SQLITE_ROW ){
int i = 0;
int nDoclist = pCsr->csr.nDoclist;
char *aDoclist = pCsr->csr.aDoclist;
int iCol;
int eState = 0;
if( pCsr->zStop ){
int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n);
if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){
pCsr->isEof = 1;
return SQLITE_OK;
}
}
if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
iCol = 0;
while( i<nDoclist ){
sqlite3_int64 v = 0;
i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
switch( eState ){
/* State 0. In this state the integer just read was a docid. */
case 0:
pCsr->aStat[0].nDoc++;
eState = 1;
iCol = 0;
break;
/* State 1. In this state we are expecting either a 1, indicating
** that the following integer will be a column number, or the
** start of a position list for column 0.
**
** The only difference between state 1 and state 2 is that if the
** integer encountered in state 1 is not 0 or 1, then we need to
** increment the column 0 "nDoc" count for this term.
*/
case 1:
assert( iCol==0 );
if( v>1 ){
pCsr->aStat[1].nDoc++;
}
eState = 2;
/* fall through */
case 2:
if( v==0 ){ /* 0x00. Next integer will be a docid. */
eState = 0;
}else if( v==1 ){ /* 0x01. Next integer will be a column number. */
eState = 3;
}else{ /* 2 or greater. A position. */
pCsr->aStat[iCol+1].nOcc++;
pCsr->aStat[0].nOcc++;
}
break;
/* State 3. The integer just read is a column number. */
default: assert( eState==3 );
iCol = (int)v;
if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
pCsr->aStat[iCol+1].nDoc++;
eState = 2;
break;
}
}
pCsr->iCol = 0;
rc = SQLITE_OK;
}else{
pCsr->isEof = 1;
}
return rc;
}
/*
** xFilter - Initialize a cursor to point at the start of its data.
*/
static int fts3auxFilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
int rc;
int isScan = 0;
int iLangVal = 0; /* Language id to query */
int iEq = -1; /* Index of term=? value in apVal */
int iGe = -1; /* Index of term>=? value in apVal */
int iLe = -1; /* Index of term<=? value in apVal */
int iLangid = -1; /* Index of languageid=? value in apVal */
int iNext = 0;
UNUSED_PARAMETER(nVal);
UNUSED_PARAMETER(idxStr);
assert( idxStr==0 );
assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
|| idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
|| idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
);
if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
iEq = iNext++;
}else{
isScan = 1;
if( idxNum & FTS4AUX_GE_CONSTRAINT ){
iGe = iNext++;
}
if( idxNum & FTS4AUX_LE_CONSTRAINT ){
iLe = iNext++;
}
}
if( iNext<nVal ){
iLangid = iNext++;
}
/* In case this cursor is being reused, close and zero it. */
testcase(pCsr->filter.zTerm);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free((void *)pCsr->filter.zTerm);
sqlite3_free(pCsr->aStat);
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
if( iEq>=0 || iGe>=0 ){
const unsigned char *zStr = sqlite3_value_text(apVal[0]);
assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
if( zStr ){
pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
pCsr->filter.nTerm = (int)strlen(pCsr->filter.zTerm);
}
}
if( iLe>=0 ){
pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
pCsr->nStop = (int)strlen(pCsr->zStop);
}
if( iLangid>=0 ){
iLangVal = sqlite3_value_int(apVal[iLangid]);
/* If the user specified a negative value for the languageid, use zero
** instead. This works, as the "languageid=?" constraint will also
** be tested by the VDBE layer. The test will always be false (since
** this module will not return a row with a negative languageid), and
** so the overall query will return zero rows. */
if( iLangVal<0 ) iLangVal = 0;
}
pCsr->iLangid = iLangVal;
rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
}
if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
return rc;
}
/*
** xEof - Return true if the cursor is at EOF, or false otherwise.
*/
static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
return pCsr->isEof;
}
/*
** xColumn - Return a column value.
*/
static int fts3auxColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
assert( p->isEof==0 );
switch( iCol ){
case 0: /* term */
sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
break;
case 1: /* col */
if( p->iCol ){
sqlite3_result_int(pCtx, p->iCol-1);
}else{
sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
}
break;
case 2: /* documents */
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
break;
case 3: /* occurrences */
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
break;
default: /* languageid */
assert( iCol==4 );
sqlite3_result_int(pCtx, p->iLangid);
break;
}
return SQLITE_OK;
}
/*
** xRowid - Return the current rowid for the cursor.
*/
static int fts3auxRowidMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite_int64 *pRowid /* OUT: Rowid value */
){
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
*pRowid = pCsr->iRowid;
return SQLITE_OK;
}
/*
** Register the fts3aux module with database connection db. Return SQLITE_OK
** if successful or an error code if sqlite3_create_module() fails.
*/
int sqlite3Fts3InitAux(sqlite3 *db){
static const sqlite3_module fts3aux_module = {
0, /* iVersion */
fts3auxConnectMethod, /* xCreate */
fts3auxConnectMethod, /* xConnect */
fts3auxBestIndexMethod, /* xBestIndex */
fts3auxDisconnectMethod, /* xDisconnect */
fts3auxDisconnectMethod, /* xDestroy */
fts3auxOpenMethod, /* xOpen */
fts3auxCloseMethod, /* xClose */
fts3auxFilterMethod, /* xFilter */
fts3auxNextMethod, /* xNext */
fts3auxEofMethod, /* xEof */
fts3auxColumnMethod, /* xColumn */
fts3auxRowidMethod, /* xRowid */
0, /* xUpdate */
0, /* xBegin */
0, /* xSync */
0, /* xCommit */
0, /* xRollback */
0, /* xFindFunction */
0, /* xRename */
0, /* xSavepoint */
0, /* xRelease */
0, /* xRollbackTo */
0 /* xShadowName */
};
int rc; /* Return code */
rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

File diff suppressed because it is too large Load Diff

View File

@ -1,383 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "fts3_hash.h"
/*
** Malloc and Free functions
*/
static void *fts3HashMalloc(sqlite3_int64 n){
void *p = sqlite3_malloc64(n);
if( p ){
memset(p, 0, n);
}
return p;
}
static void fts3HashFree(void *p){
sqlite3_free(p);
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts3HashClear(Fts3Hash *pH){
Fts3HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
fts3HashFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
Fts3HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
fts3HashFree(elem->pKey);
}
fts3HashFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS3_HASH_STRING
*/
static int fts3StrHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
unsigned h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return (int)(h & 0x7fffffff);
}
static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS3_HASH_BINARY
*/
static int fts3BinHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "ftsHashFunction". The function takes a
** single parameter "keyClass". The return value of ftsHashFunction()
** is a pointer to another function. Specifically, the return value
** of ftsHashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*ftsHashFunction(int keyClass))(const void*,int){
if( keyClass==FTS3_HASH_STRING ){
return &fts3StrHash;
}else{
assert( keyClass==FTS3_HASH_BINARY );
return &fts3BinHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS3_HASH_STRING ){
return &fts3StrCompare;
}else{
assert( keyClass==FTS3_HASH_BINARY );
return &fts3BinCompare;
}
}
/* Link an element into the hash table
*/
static void fts3HashInsertElement(
Fts3Hash *pH, /* The complete hash table */
struct _fts3ht *pEntry, /* The entry into which pNew is inserted */
Fts3HashElem *pNew /* The element to be inserted */
){
Fts3HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
**
** Return non-zero if a memory allocation error occurs.
*/
static int fts3Rehash(Fts3Hash *pH, int new_size){
struct _fts3ht *new_ht; /* The new hash table */
Fts3HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
if( new_ht==0 ) return 1;
fts3HashFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = ftsHashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
fts3HashInsertElement(pH, &new_ht[h], elem);
}
return 0;
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static Fts3HashElem *fts3FindElementByHash(
const Fts3Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
Fts3HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts3ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = ftsCompareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void fts3RemoveElementByHash(
Fts3Hash *pH, /* The pH containing "elem" */
Fts3HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts3ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
fts3HashFree(elem->pKey);
}
fts3HashFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts3HashClear(pH);
}
}
Fts3HashElem *sqlite3Fts3HashFindElem(
const Fts3Hash *pH,
const void *pKey,
int nKey
){
int h; /* A hash on key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = ftsHashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
}
/*
** Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
Fts3HashElem *pElem; /* The element that matches key (if any) */
pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey);
return pElem ? pElem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts3HashInsert(
Fts3Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
Fts3HashElem *elem; /* Used to loop thru the element list */
Fts3HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = ftsHashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = fts3FindElementByHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
fts3RemoveElementByHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
if( (pH->htsize==0 && fts3Rehash(pH,8))
|| (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2))
){
pH->count = 0;
return data;
}
assert( pH->htsize>0 );
new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = fts3HashMalloc( nKey );
if( new_elem->pKey==0 ){
fts3HashFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
fts3HashInsertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View File

@ -1,112 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS3_HASH_H_
#define _FTS3_HASH_H_
/* Forward declarations of structures. */
typedef struct Fts3Hash Fts3Hash;
typedef struct Fts3HashElem Fts3HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct Fts3Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
Fts3HashElem *first; /* The first element of the array */
int htsize; /* Number of buckets in the hash table */
struct _fts3ht { /* the hash table */
int count; /* Number of entries with this hash */
Fts3HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct Fts3HashElem {
Fts3HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS3_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS3_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts3HashInit is 1.
*/
#define FTS3_HASH_STRING 1
#define FTS3_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
void sqlite3Fts3HashClear(Fts3Hash*);
Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int);
/*
** Shorthand for the functions above
*/
#define fts3HashInit sqlite3Fts3HashInit
#define fts3HashInsert sqlite3Fts3HashInsert
#define fts3HashFind sqlite3Fts3HashFind
#define fts3HashClear sqlite3Fts3HashClear
#define fts3HashFindElem sqlite3Fts3HashFindElem
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** Fts3Hash h;
** Fts3HashElem *p;
** ...
** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
** SomeStructure *pData = fts3HashData(p);
** // do something with pData
** }
*/
#define fts3HashFirst(H) ((H)->first)
#define fts3HashNext(E) ((E)->next)
#define fts3HashData(E) ((E)->data)
#define fts3HashKey(E) ((E)->pKey)
#define fts3HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts3HashCount(H) ((H)->count)
#endif /* _FTS3_HASH_H_ */

View File

@ -1,262 +0,0 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts3 based on the ICU library.
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#ifdef SQLITE_ENABLE_ICU
#include <assert.h>
#include <string.h>
#include "fts3_tokenizer.h"
#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>
typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;
struct IcuTokenizer {
sqlite3_tokenizer base;
char *zLocale;
};
struct IcuCursor {
sqlite3_tokenizer_cursor base;
UBreakIterator *pIter; /* ICU break-iterator object */
int nChar; /* Number of UChar elements in pInput */
UChar *aChar; /* Copy of input using utf-16 encoding */
int *aOffset; /* Offsets of each character in utf-8 input */
int nBuffer;
char *zBuffer;
int iToken;
};
/*
** Create a new tokenizer instance.
*/
static int icuCreate(
int argc, /* Number of entries in argv[] */
const char * const *argv, /* Tokenizer creation arguments */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
){
IcuTokenizer *p;
int n = 0;
if( argc>0 ){
n = strlen(argv[0])+1;
}
p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, sizeof(IcuTokenizer));
if( n ){
p->zLocale = (char *)&p[1];
memcpy(p->zLocale, argv[0], n);
}
*ppTokenizer = (sqlite3_tokenizer *)p;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int icuOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, /* Input string */
int nInput, /* Length of zInput in bytes */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
IcuCursor *pCsr;
const int32_t opt = U_FOLD_CASE_DEFAULT;
UErrorCode status = U_ZERO_ERROR;
int nChar;
UChar32 c;
int iInput = 0;
int iOut = 0;
*ppCursor = 0;
if( zInput==0 ){
nInput = 0;
zInput = "";
}else if( nInput<0 ){
nInput = strlen(zInput);
}
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc64(
sizeof(IcuCursor) + /* IcuCursor */
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
while( c>0 ){
int isError = 0;
c = u_foldCase(c, opt);
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
if( isError ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->aOffset[iOut] = iInput;
if( iInput<nInput ){
U8_NEXT(zInput, iInput, nInput, c);
}else{
c = 0;
}
}
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
if( !U_SUCCESS(status) ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->nChar = iOut;
ubrk_first(pCsr->pIter);
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
IcuCursor *pCsr = (IcuCursor *)pCursor;
ubrk_close(pCsr->pIter);
sqlite3_free(pCsr->zBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
IcuCursor *pCsr = (IcuCursor *)pCursor;
int iStart = 0;
int iEnd = 0;
int nByte = 0;
while( iStart==iEnd ){
UChar32 c;
iStart = ubrk_current(pCsr->pIter);
iEnd = ubrk_next(pCsr->pIter);
if( iEnd==UBRK_DONE ){
return SQLITE_DONE;
}
while( iStart<iEnd ){
int iWhite = iStart;
U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
break;
}
}
assert(iStart<=iEnd);
}
do {
UErrorCode status = U_ZERO_ERROR;
if( nByte ){
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
if( !zNew ){
return SQLITE_NOMEM;
}
pCsr->zBuffer = zNew;
pCsr->nBuffer = nByte;
}
u_strToUTF8(
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
&status /* Output success/failure */
);
} while( nByte>pCsr->nBuffer );
*ppToken = pCsr->zBuffer;
*pnBytes = nByte;
*piStartOffset = pCsr->aOffset[iStart];
*piEndOffset = pCsr->aOffset[iEnd];
*piPosition = pCsr->iToken++;
return SQLITE_OK;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
0, /* xLanguageid */
};
/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts3IcuTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &icuTokenizerModule;
}
#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View File

@ -1,662 +0,0 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fts3_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlite3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
UNUSED_PARAMETER(pTokenizer);
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
sqlite3_free(c->zToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1];
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
isVowel(z+1) &&
isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
char c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
char c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
if( !stem(&z, "lanoita", "ate", m_gt_0) ){
stem(&z, "lanoit", "tion", m_gt_0);
}
break;
case 'c':
if( !stem(&z, "icne", "ence", m_gt_0) ){
stem(&z, "icna", "ance", m_gt_0);
}
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
if( !stem(&z, "ilb", "ble", m_gt_0)
&& !stem(&z, "illa", "al", m_gt_0)
&& !stem(&z, "iltne", "ent", m_gt_0)
&& !stem(&z, "ile", "e", m_gt_0)
){
stem(&z, "ilsuo", "ous", m_gt_0);
}
break;
case 'o':
if( !stem(&z, "noitazi", "ize", m_gt_0)
&& !stem(&z, "noita", "ate", m_gt_0)
){
stem(&z, "rota", "ate", m_gt_0);
}
break;
case 's':
if( !stem(&z, "msila", "al", m_gt_0)
&& !stem(&z, "ssenevi", "ive", m_gt_0)
&& !stem(&z, "ssenluf", "ful", m_gt_0)
){
stem(&z, "ssensuo", "ous", m_gt_0);
}
break;
case 't':
if( !stem(&z, "itila", "al", m_gt_0)
&& !stem(&z, "itivi", "ive", m_gt_0)
){
stem(&z, "itilib", "ble", m_gt_0);
}
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
if( !stem(&z, "etaci", "ic", m_gt_0)
&& !stem(&z, "evita", "", m_gt_0)
){
stem(&z, "ezila", "al", m_gt_0);
}
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
if( !stem(&z, "laci", "ic", m_gt_0) ){
stem(&z, "luf", "", m_gt_0);
}
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
if( !stem(&z, "tneme", "", m_gt_1)
&& !stem(&z, "tnem", "", m_gt_1)
){
stem(&z, "tne", "", m_gt_1);
}
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
if( !stem(&z, "eta", "", m_gt_1) ){
stem(&z, "iti", "", m_gt_1);
}
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = (int)strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char porterIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
char *pNew;
c->nAllocated = n+20;
pNew = sqlite3_realloc(c->zToken, c->nAllocated);
if( !pNew ) return SQLITE_NOMEM;
c->zToken = pNew;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
0
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts3PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

File diff suppressed because it is too large Load Diff

View File

@ -1,374 +0,0 @@
/*
** 2011 Jan 27
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file is not part of the production FTS code. It is only used for
** testing. It contains a virtual table implementation that provides direct
** access to the full-text index of an FTS table.
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#ifdef SQLITE_TEST
#include <string.h>
#include <assert.h>
#include <stdlib.h>
typedef struct Fts3termTable Fts3termTable;
typedef struct Fts3termCursor Fts3termCursor;
struct Fts3termTable {
sqlite3_vtab base; /* Base class used by SQLite core */
int iIndex; /* Index for Fts3Table.aIndex[] */
Fts3Table *pFts3Tab;
};
struct Fts3termCursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
Fts3MultiSegReader csr; /* Must be right after "base" */
Fts3SegFilter filter;
int isEof; /* True if cursor is at EOF */
char *pNext;
sqlite3_int64 iRowid; /* Current 'rowid' value */
sqlite3_int64 iDocid; /* Current 'docid' value */
int iCol; /* Current 'col' value */
int iPos; /* Current 'pos' value */
};
/*
** Schema of the terms table.
*/
#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, docid, col, pos)"
/*
** This function does all the work for both the xConnect and xCreate methods.
** These tables have no persistent representation of their own, so xConnect
** and xCreate are identical operations.
*/
static int fts3termConnectMethod(
sqlite3 *db, /* Database connection */
void *pCtx, /* Non-zero for an fts4prefix table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
char const *zDb; /* Name of database (e.g. "main") */
char const *zFts3; /* Name of fts3 table */
int nDb; /* Result of strlen(zDb) */
int nFts3; /* Result of strlen(zFts3) */
sqlite3_int64 nByte; /* Bytes of space to allocate here */
int rc; /* value returned by declare_vtab() */
Fts3termTable *p; /* Virtual table object to return */
int iIndex = 0;
UNUSED_PARAMETER(pCtx);
if( argc==5 ){
iIndex = atoi(argv[4]);
argc--;
}
/* The user should specify a single argument - the name of an fts3 table. */
if( argc!=4 ){
sqlite3Fts3ErrMsg(pzErr,
"wrong number of arguments to fts4term constructor"
);
return SQLITE_ERROR;
}
zDb = argv[1];
nDb = (int)strlen(zDb);
zFts3 = argv[3];
nFts3 = (int)strlen(zFts3);
rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
if( rc!=SQLITE_OK ) return rc;
nByte = sizeof(Fts3termTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
p = (Fts3termTable *)sqlite3_malloc64(nByte);
if( !p ) return SQLITE_NOMEM;
memset(p, 0, (size_t)nByte);
p->pFts3Tab = (Fts3Table *)&p[1];
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
p->pFts3Tab->db = db;
p->pFts3Tab->nIndex = iIndex+1;
p->iIndex = iIndex;
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
*ppVtab = (sqlite3_vtab *)p;
return SQLITE_OK;
}
/*
** This function does the work for both the xDisconnect and xDestroy methods.
** These tables have no persistent representation of their own, so xDisconnect
** and xDestroy are identical operations.
*/
static int fts3termDisconnectMethod(sqlite3_vtab *pVtab){
Fts3termTable *p = (Fts3termTable *)pVtab;
Fts3Table *pFts3 = p->pFts3Tab;
int i;
/* Free any prepared statements held */
for(i=0; i<SizeofArray(pFts3->aStmt); i++){
sqlite3_finalize(pFts3->aStmt[i]);
}
sqlite3_free(pFts3->zSegmentsTbl);
sqlite3_free(p);
return SQLITE_OK;
}
#define FTS4AUX_EQ_CONSTRAINT 1
#define FTS4AUX_GE_CONSTRAINT 2
#define FTS4AUX_LE_CONSTRAINT 4
/*
** xBestIndex - Analyze a WHERE and ORDER BY clause.
*/
static int fts3termBestIndexMethod(
sqlite3_vtab *pVTab,
sqlite3_index_info *pInfo
){
UNUSED_PARAMETER(pVTab);
/* This vtab naturally does "ORDER BY term, docid, col, pos". */
if( pInfo->nOrderBy ){
int i;
for(i=0; i<pInfo->nOrderBy; i++){
if( pInfo->aOrderBy[i].iColumn!=i || pInfo->aOrderBy[i].desc ) break;
}
if( i==pInfo->nOrderBy ){
pInfo->orderByConsumed = 1;
}
}
return SQLITE_OK;
}
/*
** xOpen - Open a cursor.
*/
static int fts3termOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
Fts3termCursor *pCsr; /* Pointer to cursor object to return */
UNUSED_PARAMETER(pVTab);
pCsr = (Fts3termCursor *)sqlite3_malloc(sizeof(Fts3termCursor));
if( !pCsr ) return SQLITE_NOMEM;
memset(pCsr, 0, sizeof(Fts3termCursor));
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
return SQLITE_OK;
}
/*
** xClose - Close a cursor.
*/
static int fts3termCloseMethod(sqlite3_vtab_cursor *pCursor){
Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
sqlite3Fts3SegmentsClose(pFts3);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** xNext - Advance the cursor to the next row, if any.
*/
static int fts3termNextMethod(sqlite3_vtab_cursor *pCursor){
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
int rc;
sqlite3_int64 v;
/* Increment our pretend rowid value. */
pCsr->iRowid++;
/* Advance to the next term in the full-text index. */
if( pCsr->csr.aDoclist==0
|| pCsr->pNext>=&pCsr->csr.aDoclist[pCsr->csr.nDoclist-1]
){
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
if( rc!=SQLITE_ROW ){
pCsr->isEof = 1;
return rc;
}
pCsr->iCol = 0;
pCsr->iPos = 0;
pCsr->iDocid = 0;
pCsr->pNext = pCsr->csr.aDoclist;
/* Read docid */
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &pCsr->iDocid);
}
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
if( v==0 ){
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
pCsr->iDocid += v;
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
pCsr->iCol = 0;
pCsr->iPos = 0;
}
if( v==1 ){
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
pCsr->iCol += (int)v;
pCsr->iPos = 0;
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
}
pCsr->iPos += (int)(v - 2);
return SQLITE_OK;
}
/*
** xFilter - Initialize a cursor to point at the start of its data.
*/
static int fts3termFilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
Fts3termTable *p = (Fts3termTable *)pCursor->pVtab;
Fts3Table *pFts3 = p->pFts3Tab;
int rc;
UNUSED_PARAMETER(nVal);
UNUSED_PARAMETER(idxNum);
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(apVal);
assert( idxStr==0 && idxNum==0 );
/* In case this cursor is being reused, close and zero it. */
testcase(pCsr->filter.zTerm);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
}
if( rc==SQLITE_OK ){
rc = fts3termNextMethod(pCursor);
}
return rc;
}
/*
** xEof - Return true if the cursor is at EOF, or false otherwise.
*/
static int fts3termEofMethod(sqlite3_vtab_cursor *pCursor){
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
return pCsr->isEof;
}
/*
** xColumn - Return a column value.
*/
static int fts3termColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
Fts3termCursor *p = (Fts3termCursor *)pCursor;
assert( iCol>=0 && iCol<=3 );
switch( iCol ){
case 0:
sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
break;
case 1:
sqlite3_result_int64(pCtx, p->iDocid);
break;
case 2:
sqlite3_result_int64(pCtx, p->iCol);
break;
default:
sqlite3_result_int64(pCtx, p->iPos);
break;
}
return SQLITE_OK;
}
/*
** xRowid - Return the current rowid for the cursor.
*/
static int fts3termRowidMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite_int64 *pRowid /* OUT: Rowid value */
){
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
*pRowid = pCsr->iRowid;
return SQLITE_OK;
}
/*
** Register the fts3term module with database connection db. Return SQLITE_OK
** if successful or an error code if sqlite3_create_module() fails.
*/
int sqlite3Fts3InitTerm(sqlite3 *db){
static const sqlite3_module fts3term_module = {
0, /* iVersion */
fts3termConnectMethod, /* xCreate */
fts3termConnectMethod, /* xConnect */
fts3termBestIndexMethod, /* xBestIndex */
fts3termDisconnectMethod, /* xDisconnect */
fts3termDisconnectMethod, /* xDestroy */
fts3termOpenMethod, /* xOpen */
fts3termCloseMethod, /* xClose */
fts3termFilterMethod, /* xFilter */
fts3termNextMethod, /* xNext */
fts3termEofMethod, /* xEof */
fts3termColumnMethod, /* xColumn */
fts3termRowidMethod, /* xRowid */
0, /* xUpdate */
0, /* xBegin */
0, /* xSync */
0, /* xCommit */
0, /* xRollback */
0, /* xFindFunction */
0, /* xRename */
0, /* xSavepoint */
0, /* xRelease */
0, /* xRollbackTo */
0 /* xShadowName */
};
int rc; /* Return code */
rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0);
return rc;
}
#endif
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View File

@ -1,621 +0,0 @@
/*
** 2011 Jun 13
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.
**
** As of March 2012, it also contains a version 1 tokenizer used for testing
** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
*/
#if defined(INCLUDE_SQLITE_TCL_H)
# include "sqlite_tcl.h"
#else
# include "tcl.h"
# ifndef SQLITE_TCLAPI
# define SQLITE_TCLAPI
# endif
#endif
#include <string.h>
#include <assert.h>
#if defined(SQLITE_TEST)
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
#include "fts3Int.h"
#define NM_MAX_TOKEN 12
typedef struct NearPhrase NearPhrase;
typedef struct NearDocument NearDocument;
typedef struct NearToken NearToken;
struct NearDocument {
int nToken; /* Length of token in bytes */
NearToken *aToken; /* Token array */
};
struct NearToken {
int n; /* Length of token in bytes */
const char *z; /* Pointer to token string */
};
struct NearPhrase {
int nNear; /* Preceding NEAR value */
int nToken; /* Number of tokens in this phrase */
NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
};
static int nm_phrase_match(
NearPhrase *p,
NearToken *aToken
){
int ii;
for(ii=0; ii<p->nToken; ii++){
NearToken *pToken = &p->aToken[ii];
if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
if( aToken[ii].n<(pToken->n-1) ) return 0;
if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
}else{
if( aToken[ii].n!=pToken->n ) return 0;
if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
}
}
return 1;
}
static int nm_near_chain(
int iDir, /* Direction to iterate through aPhrase[] */
NearDocument *pDoc, /* Document to match against */
int iPos, /* Position at which iPhrase was found */
int nPhrase, /* Size of phrase array */
NearPhrase *aPhrase, /* Phrase array */
int iPhrase /* Index of phrase found */
){
int iStart;
int iStop;
int ii;
int nNear;
int iPhrase2;
NearPhrase *p;
NearPhrase *pPrev;
assert( iDir==1 || iDir==-1 );
if( iDir==1 ){
if( (iPhrase+1)==nPhrase ) return 1;
nNear = aPhrase[iPhrase+1].nNear;
}else{
if( iPhrase==0 ) return 1;
nNear = aPhrase[iPhrase].nNear;
}
pPrev = &aPhrase[iPhrase];
iPhrase2 = iPhrase+iDir;
p = &aPhrase[iPhrase2];
iStart = iPos - nNear - p->nToken;
iStop = iPos + nNear + pPrev->nToken;
if( iStart<0 ) iStart = 0;
if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
for(ii=iStart; ii<=iStop; ii++){
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
}
}
return 0;
}
static int nm_match_count(
NearDocument *pDoc, /* Document to match against */
int nPhrase, /* Size of phrase array */
NearPhrase *aPhrase, /* Phrase array */
int iPhrase /* Index of phrase to count matches for */
){
int nOcc = 0;
int ii;
NearPhrase *p = &aPhrase[iPhrase];
for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
/* Test forward NEAR chain (i>iPhrase) */
if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
/* Test reverse NEAR chain (i<iPhrase) */
if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
/* This is a real match. Increment the counter. */
nOcc++;
}
}
return nOcc;
}
/*
** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
*/
static int SQLITE_TCLAPI fts3_near_match_cmd(
ClientData clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
int nTotal = 0;
int rc;
int ii;
int nPhrase;
NearPhrase *aPhrase = 0;
NearDocument doc = {0, 0};
Tcl_Obj **apDocToken;
Tcl_Obj *pRet;
Tcl_Obj *pPhrasecount = 0;
Tcl_Obj **apExprToken;
int nExprToken;
UNUSED_PARAMETER(clientData);
/* Must have 3 or more arguments. */
if( objc<3 || (objc%2)==0 ){
Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
rc = TCL_ERROR;
goto near_match_out;
}
for(ii=3; ii<objc; ii+=2){
enum NM_enum { NM_PHRASECOUNTS };
struct TestnmSubcmd {
char *zName;
enum NM_enum eOpt;
} aOpt[] = {
{ "-phrasecountvar", NM_PHRASECOUNTS },
{ 0, 0 }
};
int iOpt;
if( Tcl_GetIndexFromObjStruct(
interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)
){
return TCL_ERROR;
}
switch( aOpt[iOpt].eOpt ){
case NM_PHRASECOUNTS:
pPhrasecount = objv[ii+1];
break;
}
}
rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
if( rc!=TCL_OK ) goto near_match_out;
doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
for(ii=0; ii<doc.nToken; ii++){
doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
}
rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
if( rc!=TCL_OK ) goto near_match_out;
nPhrase = (nExprToken + 1) / 2;
aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
for(ii=0; ii<nPhrase; ii++){
Tcl_Obj *pPhrase = apExprToken[ii*2];
Tcl_Obj **apToken;
int nToken;
int jj;
rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
if( rc!=TCL_OK ) goto near_match_out;
if( nToken>NM_MAX_TOKEN ){
Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
rc = TCL_ERROR;
goto near_match_out;
}
for(jj=0; jj<nToken; jj++){
NearToken *pT = &aPhrase[ii].aToken[jj];
pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
}
aPhrase[ii].nToken = nToken;
}
for(ii=1; ii<nPhrase; ii++){
Tcl_Obj *pNear = apExprToken[2*ii-1];
int nNear;
rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
if( rc!=TCL_OK ) goto near_match_out;
aPhrase[ii].nNear = nNear;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
for(ii=0; ii<nPhrase; ii++){
int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
nTotal += nOcc;
}
if( pPhrasecount ){
Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
}
Tcl_DecrRefCount(pRet);
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
near_match_out:
ckfree((char *)aPhrase);
ckfree((char *)doc.aToken);
return rc;
}
/*
** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
**
** Normally, FTS uses hard-coded values to determine the minimum doclist
** size eligible for incremental loading, and the size of the chunks loaded
** when a doclist is incrementally loaded. This command allows the built-in
** values to be overridden for testing purposes.
**
** If present, the first argument is the chunksize in bytes to load doclists
** in. The second argument is the minimum doclist size in bytes to use
** incremental loading with.
**
** Whether or not the arguments are present, this command returns a list of
** two integers - the initial chunksize and threshold when the command is
** invoked. This can be used to restore the default behavior after running
** tests. For example:
**
** # Override incr-load settings for testing:
** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
**
** .... run tests ....
**
** # Restore initial incr-load settings:
** eval fts3_configure_incr_load $cfg
*/
static int SQLITE_TCLAPI fts3_configure_incr_load_cmd(
ClientData clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
#ifdef SQLITE_ENABLE_FTS3
extern int test_fts3_node_chunksize;
extern int test_fts3_node_chunk_threshold;
Tcl_Obj *pRet;
if( objc!=1 && objc!=3 ){
Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
return TCL_ERROR;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
Tcl_ListObjAppendElement(
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
Tcl_ListObjAppendElement(
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
if( objc==3 ){
int iArg1;
int iArg2;
if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
|| Tcl_GetIntFromObj(interp, objv[2], &iArg2)
){
Tcl_DecrRefCount(pRet);
return TCL_ERROR;
}
test_fts3_node_chunksize = iArg1;
test_fts3_node_chunk_threshold = iArg2;
}
Tcl_SetObjResult(interp, pRet);
Tcl_DecrRefCount(pRet);
#endif
UNUSED_PARAMETER(clientData);
return TCL_OK;
}
#ifdef SQLITE_ENABLE_FTS3
/**************************************************************************
** Beginning of test tokenizer code.
**
** For language 0, this tokenizer is similar to the default 'simple'
** tokenizer. For other languages L, the following:
**
** * Odd numbered languages are case-sensitive. Even numbered
** languages are not.
**
** * Language ids 100 or greater are considered an error.
**
** The implementation assumes that the input contains only ASCII characters
** (i.e. those that may be encoded in UTF-8 using a single byte).
*/
typedef struct test_tokenizer {
sqlite3_tokenizer base;
} test_tokenizer;
typedef struct test_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *aInput; /* Input being tokenized */
int nInput; /* Size of the input in bytes */
int iInput; /* Current offset in aInput */
int iToken; /* Index of next token to be returned */
char *aBuffer; /* Buffer containing current token */
int nBuffer; /* Number of bytes allocated at pToken */
int iLangid; /* Configured language id */
} test_tokenizer_cursor;
static int testTokenizerCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
test_tokenizer *pNew;
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
pNew = sqlite3_malloc(sizeof(test_tokenizer));
if( !pNew ) return SQLITE_NOMEM;
memset(pNew, 0, sizeof(test_tokenizer));
*ppTokenizer = (sqlite3_tokenizer *)pNew;
return SQLITE_OK;
}
static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
test_tokenizer *p = (test_tokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
static int testTokenizerOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
int rc = SQLITE_OK; /* Return code */
test_tokenizer_cursor *pCsr; /* New cursor object */
UNUSED_PARAMETER(pTokenizer);
pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
if( pCsr==0 ){
rc = SQLITE_NOMEM;
}else{
memset(pCsr, 0, sizeof(test_tokenizer_cursor));
pCsr->aInput = pInput;
if( nBytes<0 ){
pCsr->nInput = (int)strlen(pInput);
}else{
pCsr->nInput = nBytes;
}
}
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return rc;
}
static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
sqlite3_free(pCsr->aBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
static int testIsTokenChar(char c){
return (c>='a' && c<='z') || (c>='A' && c<='Z');
}
static int testTolower(char c){
char ret = c;
if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
return ret;
}
static int testTokenizerNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
int rc = SQLITE_OK;
const char *p;
const char *pEnd;
p = &pCsr->aInput[pCsr->iInput];
pEnd = &pCsr->aInput[pCsr->nInput];
/* Skip past any white-space */
assert( p<=pEnd );
while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
if( p==pEnd ){
rc = SQLITE_DONE;
}else{
/* Advance to the end of the token */
const char *pToken = p;
sqlite3_int64 nToken;
while( p<pEnd && testIsTokenChar(*p) ) p++;
nToken = (sqlite3_int64)(p-pToken);
/* Copy the token into the buffer */
if( nToken>pCsr->nBuffer ){
sqlite3_free(pCsr->aBuffer);
pCsr->aBuffer = sqlite3_malloc64(nToken);
}
if( pCsr->aBuffer==0 ){
rc = SQLITE_NOMEM;
}else{
int i;
if( pCsr->iLangid & 0x00000001 ){
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
}else{
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = (char)testTolower(pToken[i]);
}
pCsr->iToken++;
pCsr->iInput = (int)(p - pCsr->aInput);
*ppToken = pCsr->aBuffer;
*pnBytes = (int)nToken;
*piStartOffset = (int)(pToken - pCsr->aInput);
*piEndOffset = (int)(p - pCsr->aInput);
*piPosition = pCsr->iToken;
}
}
return rc;
}
static int testTokenizerLanguage(
sqlite3_tokenizer_cursor *pCursor,
int iLangid
){
int rc = SQLITE_OK;
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
pCsr->iLangid = iLangid;
if( pCsr->iLangid>=100 ){
rc = SQLITE_ERROR;
}
return rc;
}
#endif
static int SQLITE_TCLAPI fts3_test_tokenizer_cmd(
ClientData clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
#ifdef SQLITE_ENABLE_FTS3
static const sqlite3_tokenizer_module testTokenizerModule = {
1,
testTokenizerCreate,
testTokenizerDestroy,
testTokenizerOpen,
testTokenizerClose,
testTokenizerNext,
testTokenizerLanguage
};
const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
if( objc!=1 ){
Tcl_WrongNumArgs(interp, 1, objv, "");
return TCL_ERROR;
}
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
(const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
));
#endif
UNUSED_PARAMETER(clientData);
return TCL_OK;
}
static int SQLITE_TCLAPI fts3_test_varint_cmd(
ClientData clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
#ifdef SQLITE_ENABLE_FTS3
char aBuf[24];
int rc;
Tcl_WideInt w;
sqlite3_int64 w2;
int nByte, nByte2;
if( objc!=2 ){
Tcl_WrongNumArgs(interp, 1, objv, "INTEGER");
return TCL_ERROR;
}
rc = Tcl_GetWideIntFromObj(interp, objv[1], &w);
if( rc!=TCL_OK ) return rc;
nByte = sqlite3Fts3PutVarint(aBuf, w);
nByte2 = sqlite3Fts3GetVarint(aBuf, &w2);
if( w!=w2 || nByte!=nByte2 ){
char *zErr = sqlite3_mprintf("error testing %lld", w);
Tcl_ResetResult(interp);
Tcl_AppendResult(interp, zErr, 0);
return TCL_ERROR;
}
if( w<=2147483647 && w>=0 ){
int i;
nByte2 = fts3GetVarint32(aBuf, &i);
if( (int)w!=i || nByte!=nByte2 ){
char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w);
Tcl_ResetResult(interp);
Tcl_AppendResult(interp, zErr, 0);
return TCL_ERROR;
}
}
#endif
UNUSED_PARAMETER(clientData);
return TCL_OK;
}
/*
** End of tokenizer code.
**************************************************************************/
/*
** sqlite3_fts3_may_be_corrupt BOOLEAN
**
** Set or clear the global "may-be-corrupt" flag. Return the old value.
*/
static int SQLITE_TCLAPI fts3_may_be_corrupt(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
int bOld = sqlite3_fts3_may_be_corrupt;
if( objc!=2 && objc!=1 ){
Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?");
return TCL_ERROR;
}
if( objc==2 ){
int bNew;
if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR;
sqlite3_fts3_may_be_corrupt = bNew;
}
Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld));
return TCL_OK;
}
int Sqlitetestfts3_Init(Tcl_Interp *interp){
Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
Tcl_CreateObjCommand(interp,
"fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
);
Tcl_CreateObjCommand(
interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
);
Tcl_CreateObjCommand(
interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0
);
Tcl_CreateObjCommand(
interp, "sqlite3_fts3_may_be_corrupt", fts3_may_be_corrupt, 0, 0
);
return TCL_OK;
}
#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
#endif /* ifdef SQLITE_TEST */

View File

@ -1,456 +0,0 @@
/*
** 2013 Apr 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file contains code for the "fts3tokenize" virtual table module.
** An fts3tokenize virtual table is created as follows:
**
** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
** <tokenizer-name>, <arg-1>, ...
** );
**
** The table created has the following schema:
**
** CREATE TABLE <tbl>(input, token, start, end, position)
**
** When queried, the query must include a WHERE clause of type:
**
** input = <string>
**
** The virtual table module tokenizes this <string>, using the FTS3
** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
** statement and returns one row for each token in the result. With
** fields set as follows:
**
** input: Always set to a copy of <string>
** token: A token from the input.
** start: Byte offset of the token within the input <string>.
** end: Byte offset of the byte immediately following the end of the
** token within the input string.
** pos: Token offset of token within input.
**
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <string.h>
#include <assert.h>
typedef struct Fts3tokTable Fts3tokTable;
typedef struct Fts3tokCursor Fts3tokCursor;
/*
** Virtual table structure.
*/
struct Fts3tokTable {
sqlite3_vtab base; /* Base class used by SQLite core */
const sqlite3_tokenizer_module *pMod;
sqlite3_tokenizer *pTok;
};
/*
** Virtual table cursor structure.
*/
struct Fts3tokCursor {
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
char *zInput; /* Input string */
sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
int iRowid; /* Current 'rowid' value */
const char *zToken; /* Current 'token' value */
int nToken; /* Size of zToken in bytes */
int iStart; /* Current 'start' value */
int iEnd; /* Current 'end' value */
int iPos; /* Current 'pos' value */
};
/*
** Query FTS for the tokenizer implementation named zName.
*/
static int fts3tokQueryTokenizer(
Fts3Hash *pHash,
const char *zName,
const sqlite3_tokenizer_module **pp,
char **pzErr
){
sqlite3_tokenizer_module *p;
int nName = (int)strlen(zName);
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
if( !p ){
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName);
return SQLITE_ERROR;
}
*pp = p;
return SQLITE_OK;
}
/*
** The second argument, argv[], is an array of pointers to nul-terminated
** strings. This function makes a copy of the array and strings into a
** single block of memory. It then dequotes any of the strings that appear
** to be quoted.
**
** If successful, output parameter *pazDequote is set to point at the
** array of dequoted strings and SQLITE_OK is returned. The caller is
** responsible for eventually calling sqlite3_free() to free the array
** in this case. Or, if an error occurs, an SQLite error code is returned.
** The final value of *pazDequote is undefined in this case.
*/
static int fts3tokDequoteArray(
int argc, /* Number of elements in argv[] */
const char * const *argv, /* Input array */
char ***pazDequote /* Output array */
){
int rc = SQLITE_OK; /* Return code */
if( argc==0 ){
*pazDequote = 0;
}else{
int i;
int nByte = 0;
char **azDequote;
for(i=0; i<argc; i++){
nByte += (int)(strlen(argv[i]) + 1);
}
*pazDequote = azDequote = sqlite3_malloc64(sizeof(char *)*argc + nByte);
if( azDequote==0 ){
rc = SQLITE_NOMEM;
}else{
char *pSpace = (char *)&azDequote[argc];
for(i=0; i<argc; i++){
int n = (int)strlen(argv[i]);
azDequote[i] = pSpace;
memcpy(pSpace, argv[i], n+1);
sqlite3Fts3Dequote(pSpace);
pSpace += (n+1);
}
}
}
return rc;
}
/*
** Schema of the tokenizer table.
*/
#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
/*
** This function does all the work for both the xConnect and xCreate methods.
** These tables have no persistent representation of their own, so xConnect
** and xCreate are identical operations.
**
** argv[0]: module name
** argv[1]: database name
** argv[2]: table name
** argv[3]: first argument (tokenizer name)
*/
static int fts3tokConnectMethod(
sqlite3 *db, /* Database connection */
void *pHash, /* Hash table of tokenizers */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
Fts3tokTable *pTab = 0;
const sqlite3_tokenizer_module *pMod = 0;
sqlite3_tokenizer *pTok = 0;
int rc;
char **azDequote = 0;
int nDequote;
rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
if( rc!=SQLITE_OK ) return rc;
nDequote = argc-3;
rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
if( rc==SQLITE_OK ){
const char *zModule;
if( nDequote<1 ){
zModule = "simple";
}else{
zModule = azDequote[0];
}
rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
}
assert( (rc==SQLITE_OK)==(pMod!=0) );
if( rc==SQLITE_OK ){
const char * const *azArg = 0;
if( nDequote>1 ) azArg = (const char * const *)&azDequote[1];
rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
}
if( rc==SQLITE_OK ){
pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
if( pTab==0 ){
rc = SQLITE_NOMEM;
}
}
if( rc==SQLITE_OK ){
memset(pTab, 0, sizeof(Fts3tokTable));
pTab->pMod = pMod;
pTab->pTok = pTok;
*ppVtab = &pTab->base;
}else{
if( pTok ){
pMod->xDestroy(pTok);
}
}
sqlite3_free(azDequote);
return rc;
}
/*
** This function does the work for both the xDisconnect and xDestroy methods.
** These tables have no persistent representation of their own, so xDisconnect
** and xDestroy are identical operations.
*/
static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
pTab->pMod->xDestroy(pTab->pTok);
sqlite3_free(pTab);
return SQLITE_OK;
}
/*
** xBestIndex - Analyze a WHERE and ORDER BY clause.
*/
static int fts3tokBestIndexMethod(
sqlite3_vtab *pVTab,
sqlite3_index_info *pInfo
){
int i;
UNUSED_PARAMETER(pVTab);
for(i=0; i<pInfo->nConstraint; i++){
if( pInfo->aConstraint[i].usable
&& pInfo->aConstraint[i].iColumn==0
&& pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ
){
pInfo->idxNum = 1;
pInfo->aConstraintUsage[i].argvIndex = 1;
pInfo->aConstraintUsage[i].omit = 1;
pInfo->estimatedCost = 1;
return SQLITE_OK;
}
}
pInfo->idxNum = 0;
assert( pInfo->estimatedCost>1000000.0 );
return SQLITE_OK;
}
/*
** xOpen - Open a cursor.
*/
static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
Fts3tokCursor *pCsr;
UNUSED_PARAMETER(pVTab);
pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
if( pCsr==0 ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(Fts3tokCursor));
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Reset the tokenizer cursor passed as the only argument. As if it had
** just been returned by fts3tokOpenMethod().
*/
static void fts3tokResetCursor(Fts3tokCursor *pCsr){
if( pCsr->pCsr ){
Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
pTab->pMod->xClose(pCsr->pCsr);
pCsr->pCsr = 0;
}
sqlite3_free(pCsr->zInput);
pCsr->zInput = 0;
pCsr->zToken = 0;
pCsr->nToken = 0;
pCsr->iStart = 0;
pCsr->iEnd = 0;
pCsr->iPos = 0;
pCsr->iRowid = 0;
}
/*
** xClose - Close a cursor.
*/
static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
fts3tokResetCursor(pCsr);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** xNext - Advance the cursor to the next row, if any.
*/
static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
int rc; /* Return code */
pCsr->iRowid++;
rc = pTab->pMod->xNext(pCsr->pCsr,
&pCsr->zToken, &pCsr->nToken,
&pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
);
if( rc!=SQLITE_OK ){
fts3tokResetCursor(pCsr);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
return rc;
}
/*
** xFilter - Initialize a cursor to point at the start of its data.
*/
static int fts3tokFilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
int rc = SQLITE_ERROR;
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(nVal);
fts3tokResetCursor(pCsr);
if( idxNum==1 ){
const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
int nByte = sqlite3_value_bytes(apVal[0]);
pCsr->zInput = sqlite3_malloc64(nByte+1);
if( pCsr->zInput==0 ){
rc = SQLITE_NOMEM;
}else{
if( nByte>0 ) memcpy(pCsr->zInput, zByte, nByte);
pCsr->zInput[nByte] = 0;
rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
if( rc==SQLITE_OK ){
pCsr->pCsr->pTokenizer = pTab->pTok;
}
}
}
if( rc!=SQLITE_OK ) return rc;
return fts3tokNextMethod(pCursor);
}
/*
** xEof - Return true if the cursor is at EOF, or false otherwise.
*/
static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
return (pCsr->zToken==0);
}
/*
** xColumn - Return a column value.
*/
static int fts3tokColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
/* CREATE TABLE x(input, token, start, end, position) */
switch( iCol ){
case 0:
sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
break;
case 1:
sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
break;
case 2:
sqlite3_result_int(pCtx, pCsr->iStart);
break;
case 3:
sqlite3_result_int(pCtx, pCsr->iEnd);
break;
default:
assert( iCol==4 );
sqlite3_result_int(pCtx, pCsr->iPos);
break;
}
return SQLITE_OK;
}
/*
** xRowid - Return the current rowid for the cursor.
*/
static int fts3tokRowidMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite_int64 *pRowid /* OUT: Rowid value */
){
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
*pRowid = (sqlite3_int64)pCsr->iRowid;
return SQLITE_OK;
}
/*
** Register the fts3tok module with database connection db. Return SQLITE_OK
** if successful or an error code if sqlite3_create_module() fails.
*/
int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
static const sqlite3_module fts3tok_module = {
0, /* iVersion */
fts3tokConnectMethod, /* xCreate */
fts3tokConnectMethod, /* xConnect */
fts3tokBestIndexMethod, /* xBestIndex */
fts3tokDisconnectMethod, /* xDisconnect */
fts3tokDisconnectMethod, /* xDestroy */
fts3tokOpenMethod, /* xOpen */
fts3tokCloseMethod, /* xClose */
fts3tokFilterMethod, /* xFilter */
fts3tokNextMethod, /* xNext */
fts3tokEofMethod, /* xEof */
fts3tokColumnMethod, /* xColumn */
fts3tokRowidMethod, /* xRowid */
0, /* xUpdate */
0, /* xBegin */
0, /* xSync */
0, /* xCommit */
0, /* xRollback */
0, /* xFindFunction */
0, /* xRename */
0, /* xSavepoint */
0, /* xRelease */
0, /* xRollbackTo */
0 /* xShadowName */
};
int rc; /* Return code */
rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View File

@ -1,520 +0,0 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is part of an SQLite module implementing full-text search.
** This particular file implements the generic tokenizer interface.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <string.h>
/*
** Return true if the two-argument version of fts3_tokenizer()
** has been activated via a prior call to sqlite3_db_config(db,
** SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0);
*/
static int fts3TokenizerEnabled(sqlite3_context *context){
sqlite3 *db = sqlite3_context_db_handle(context);
int isEnabled = 0;
sqlite3_db_config(db,SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER,-1,&isEnabled);
return isEnabled;
}
/*
** Implementation of the SQL scalar function for accessing the underlying
** hash table. This function may be called as follows:
**
** SELECT <function-name>(<key-name>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void fts3TokenizerFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
Fts3Hash *pHash;
void *pPtr = 0;
const unsigned char *zName;
int nName;
assert( argc==1 || argc==2 );
pHash = (Fts3Hash *)sqlite3_user_data(context);
zName = sqlite3_value_text(argv[0]);
nName = sqlite3_value_bytes(argv[0])+1;
if( argc==2 ){
if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[1]) ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( zName==0 || n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
pPtr = *(void **)sqlite3_value_blob(argv[1]);
pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr);
if( pOld==pPtr ){
sqlite3_result_error(context, "out of memory", -1);
}
}else{
sqlite3_result_error(context, "fts3tokenize disabled", -1);
return;
}
}else{
if( zName ){
pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
}
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
}
if( fts3TokenizerEnabled(context) || sqlite3_value_frombind(argv[0]) ){
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
}
int sqlite3Fts3IsIdChar(char c){
static const char isFtsIdChar[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
return (c&0x80 || isFtsIdChar[(int)(c)]);
}
const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
const char *z1;
const char *z2 = 0;
/* Find the start of the next token. */
z1 = zStr;
while( z2==0 ){
char c = *z1;
switch( c ){
case '\0': return 0; /* No more tokens here */
case '\'':
case '"':
case '`': {
z2 = z1;
while( *++z2 && (*z2!=c || *++z2==c) );
break;
}
case '[':
z2 = &z1[1];
while( *z2 && z2[0]!=']' ) z2++;
if( *z2 ) z2++;
break;
default:
if( sqlite3Fts3IsIdChar(*z1) ){
z2 = &z1[1];
while( sqlite3Fts3IsIdChar(*z2) ) z2++;
}else{
z1++;
}
}
}
*pn = (int)(z2-z1);
return z1;
}
int sqlite3Fts3InitTokenizer(
Fts3Hash *pHash, /* Tokenizer hash table */
const char *zArg, /* Tokenizer name */
sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */
char **pzErr /* OUT: Set to malloced error message */
){
int rc;
char *z = (char *)zArg;
int n = 0;
char *zCopy;
char *zEnd; /* Pointer to nul-term of zCopy */
sqlite3_tokenizer_module *m;
zCopy = sqlite3_mprintf("%s", zArg);
if( !zCopy ) return SQLITE_NOMEM;
zEnd = &zCopy[strlen(zCopy)];
z = (char *)sqlite3Fts3NextToken(zCopy, &n);
if( z==0 ){
assert( n==0 );
z = zCopy;
}
z[n] = '\0';
sqlite3Fts3Dequote(z);
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
if( !m ){
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z);
rc = SQLITE_ERROR;
}else{
char const **aArg = 0;
int iArg = 0;
z = &z[n+1];
while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){
sqlite3_int64 nNew = sizeof(char *)*(iArg+1);
char const **aNew = (const char **)sqlite3_realloc64((void *)aArg, nNew);
if( !aNew ){
sqlite3_free(zCopy);
sqlite3_free((void *)aArg);
return SQLITE_NOMEM;
}
aArg = aNew;
aArg[iArg++] = z;
z[n] = '\0';
sqlite3Fts3Dequote(z);
z = &z[n+1];
}
rc = m->xCreate(iArg, aArg, ppTok);
assert( rc!=SQLITE_OK || *ppTok );
if( rc!=SQLITE_OK ){
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer");
}else{
(*ppTok)->pModule = m;
}
sqlite3_free((void *)aArg);
}
sqlite3_free(zCopy);
return rc;
}
#ifdef SQLITE_TEST
#if defined(INCLUDE_SQLITE_TCL_H)
# include "sqlite_tcl.h"
#else
# include "tcl.h"
#endif
#include <string.h>
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two or more arguments:
**
** SELECT <function-name>(<key-name>, ..., <input-string>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example,
** using the built-in "simple" tokenizer:
**
** SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
** "{0 i I 1 dont don't 2 see see 3 how how}"
**
*/
static void testFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
Fts3Hash *pHash;
sqlite3_tokenizer_module *p;
sqlite3_tokenizer *pTokenizer = 0;
sqlite3_tokenizer_cursor *pCsr = 0;
const char *zErr = 0;
const char *zName;
int nName;
const char *zInput;
int nInput;
const char *azArg[64];
const char *zToken;
int nToken = 0;
int iStart = 0;
int iEnd = 0;
int iPos = 0;
int i;
Tcl_Obj *pRet;
if( argc<2 ){
sqlite3_result_error(context, "insufficient arguments", -1);
return;
}
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[argc-1]);
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
pHash = (Fts3Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr2, -1);
sqlite3_free(zErr2);
return;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
for(i=1; i<argc-1; i++){
azArg[i-1] = (const char *)sqlite3_value_text(argv[i]);
}
if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
zToken = &zInput[iStart];
nToken = iEnd-iStart;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
}
if( SQLITE_OK!=p->xClose(pCsr) ){
zErr = "error in xClose()";
goto finish;
}
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
zErr = "error in xDestroy()";
goto finish;
}
finish:
if( zErr ){
sqlite3_result_error(context, zErr, -1);
}else{
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
}
Tcl_DecrRefCount(pRet);
}
static
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
static
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB
&& sqlite3_column_bytes(pStmt, 0)==sizeof(*pp)
){
memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
/*
** Implementation of the scalar function fts3_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts3_tokenizer() function
** can be used as designed by the C-code in the queryTokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:
**
** SELECT fts3_tokenizer_internal_test();
**
*/
static void intTestFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
int rc;
const sqlite3_tokenizer_module *p1;
const sqlite3_tokenizer_module *p2;
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
/* Test the query function */
sqlite3Fts3SimpleTokenizerModule(&p1);
rc = queryTokenizer(db, "simple", &p2);
assert( rc==SQLITE_OK );
assert( p1==p2 );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_ERROR );
assert( p2==0 );
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
/* Test the storage function */
if( fts3TokenizerEnabled(context) ){
rc = registerTokenizer(db, "nosuchtokenizer", p1);
assert( rc==SQLITE_OK );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_OK );
assert( p2==p1 );
}
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}
#endif
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialized to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** fts3TokenizerFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header
** comment above struct HashTableVtab) to the database schema. Both
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts3InitHashTable(
sqlite3 *db,
Fts3Hash *pHash,
const char *zName
){
int rc = SQLITE_OK;
void *p = (void *)pHash;
const int any = SQLITE_UTF8|SQLITE_DIRECTONLY;
#ifdef SQLITE_TEST
char *zTest = 0;
char *zTest2 = 0;
void *pdb = (void *)db;
zTest = sqlite3_mprintf("%s_test", zName);
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
if( !zTest || !zTest2 ){
rc = SQLITE_NOMEM;
}
#endif
if( SQLITE_OK==rc ){
rc = sqlite3_create_function(db, zName, 1, any, p, fts3TokenizerFunc, 0, 0);
}
if( SQLITE_OK==rc ){
rc = sqlite3_create_function(db, zName, 2, any, p, fts3TokenizerFunc, 0, 0);
}
#ifdef SQLITE_TEST
if( SQLITE_OK==rc ){
rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0);
}
if( SQLITE_OK==rc ){
rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0);
}
#endif
#ifdef SQLITE_TEST
sqlite3_free(zTest);
sqlite3_free(zTest2);
#endif
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View File

@ -1,161 +0,0 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS3_TOKENIZER_H_
#define _FTS3_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface. When a new tokenizer
** implementation is registered, the caller provides a pointer to
** an sqlite3_tokenizer_module containing pointers to the callback
** functions that make up an implementation.
**
** When an fts3 table is created, it passes any arguments passed to
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
** implementation. The xCreate() function in turn returns an
** sqlite3_tokenizer structure representing the specific tokenizer to
** be used for the fts3 table (customized by the tokenizer clause arguments).
**
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
** method is called. It returns an sqlite3_tokenizer_cursor object
** that may be used to tokenize a specific input buffer based on
** the tokenization rules supplied by a specific sqlite3_tokenizer
** object.
*/
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module {
/*
** Structure version. Should always be set to 0 or 1.
*/
int iVersion;
/*
** Create a new tokenizer. The values in the argv[] array are the
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
** TABLE statement that created the fts3 table. For example, if
** the following SQL is executed:
**
** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
**
** then argc is set to 2, and the argv[] array contains pointers
** to the strings "arg1" and "arg2".
**
** This method should return either SQLITE_OK (0), or an SQLite error
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
** to point at the newly created tokenizer structure. The generic
** sqlite3_tokenizer.pModule variable should not be initialized by
** this callback. The caller will do so.
*/
int (*xCreate)(
int argc, /* Size of argv array */
const char *const*argv, /* Tokenizer argument strings */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
);
/*
** Destroy an existing tokenizer. The fts3 module calls this method
** exactly once for each successful call to xCreate().
*/
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Create a tokenizer cursor to tokenize an input buffer. The caller
** is responsible for ensuring that the input buffer remains valid
** until the cursor is closed (using the xClose() method).
*/
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
/*
** Destroy an existing tokenizer cursor. The fts3 module calls this
** method exactly once for each successful call to xOpen().
*/
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
/*
** Retrieve the next token from the tokenizer cursor pCursor. This
** method should either return SQLITE_OK and set the values of the
** "OUT" variables identified below, or SQLITE_DONE to indicate that
** the end of the buffer has been reached, or an SQLite error code.
**
** *ppToken should be set to point at a buffer containing the
** normalized version of the token (i.e. after any case-folding and/or
** stemming has been performed). *pnBytes should be set to the length
** of this buffer in bytes. The input text that generated the token is
** identified by the byte offsets returned in *piStartOffset and
** *piEndOffset. *piStartOffset should be set to the index of the first
** byte of the token in the input buffer. *piEndOffset should be set
** to the index of the first byte just past the end of the token in
** the input buffer.
**
** The buffer *ppToken is set to point at is managed by the tokenizer
** implementation. It is only required to be valid until the next call
** to xNext() or xClose().
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
/***********************************************************************
** Methods below this point are only available if iVersion>=1.
*/
/*
** Configure the language id of a tokenizer cursor.
*/
int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
int fts3_global_term_cnt(int iTerm, int iCol);
int fts3_term_cnt(int iTerm, int iCol);
#endif /* _FTS3_TOKENIZER_H_ */

View File

@ -1,234 +0,0 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fts3_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
static int simpleDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
static int fts3_isalnum(int x){
return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z');
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = (int)strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
sqlite3_free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !fts3_isalnum(i) ? -1 : 0;
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
UNUSED_PARAMETER(pTokenizer);
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
sqlite3_free(c->pToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
char *pNew;
c->nTokenAllocated = n+20;
pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
if( !pNew ) return SQLITE_NOMEM;
c->pToken = pNew;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch);
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
0,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts3SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

View File

@ -1,397 +0,0 @@
/*
** 2012 May 24
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "unicode" full-text-search tokenizer.
*/
#ifndef SQLITE_DISABLE_FTS3_UNICODE
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fts3_tokenizer.h"
/*
** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
** from the sqlite3 source file utf.c. If this file is compiled as part
** of the amalgamation, they are not required.
*/
#ifndef SQLITE_AMALGAMATION
static const unsigned char sqlite3Utf8Trans1[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};
#define READ_UTF8(zIn, zTerm, c) \
c = *(zIn++); \
if( c>=0xc0 ){ \
c = sqlite3Utf8Trans1[c-0xc0]; \
while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
c = (c<<6) + (0x3f & *(zIn++)); \
} \
if( c<0x80 \
|| (c&0xFFFFF800)==0xD800 \
|| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
}
#define WRITE_UTF8(zOut, c) { \
if( c<0x00080 ){ \
*zOut++ = (u8)(c&0xFF); \
} \
else if( c<0x00800 ){ \
*zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \
*zOut++ = 0x80 + (u8)(c & 0x3F); \
} \
else if( c<0x10000 ){ \
*zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \
*zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
*zOut++ = 0x80 + (u8)(c & 0x3F); \
}else{ \
*zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \
*zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \
*zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
*zOut++ = 0x80 + (u8)(c & 0x3F); \
} \
}
#endif /* ifndef SQLITE_AMALGAMATION */
typedef struct unicode_tokenizer unicode_tokenizer;
typedef struct unicode_cursor unicode_cursor;
struct unicode_tokenizer {
sqlite3_tokenizer base;
int eRemoveDiacritic;
int nException;
int *aiException;
};
struct unicode_cursor {
sqlite3_tokenizer_cursor base;
const unsigned char *aInput; /* Input text being tokenized */
int nInput; /* Size of aInput[] in bytes */
int iOff; /* Current offset within aInput[] */
int iToken; /* Index of next token to be returned */
char *zToken; /* storage for current token */
int nAlloc; /* space allocated at zToken */
};
/*
** Destroy a tokenizer allocated by unicodeCreate().
*/
static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
if( pTokenizer ){
unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer;
sqlite3_free(p->aiException);
sqlite3_free(p);
}
return SQLITE_OK;
}
/*
** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE
** statement has specified that the tokenizer for this table shall consider
** all characters in string zIn/nIn to be separators (if bAlnum==0) or
** token characters (if bAlnum==1).
**
** For each codepoint in the zIn/nIn string, this function checks if the
** sqlite3FtsUnicodeIsalnum() function already returns the desired result.
** If so, no action is taken. Otherwise, the codepoint is added to the
** unicode_tokenizer.aiException[] array. For the purposes of tokenization,
** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all
** codepoints in the aiException[] array.
**
** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()
** identifies as a diacritic) occurs in the zIn/nIn string it is ignored.
** It is not possible to change the behavior of the tokenizer with respect
** to these codepoints.
*/
static int unicodeAddExceptions(
unicode_tokenizer *p, /* Tokenizer to add exceptions to */
int bAlnum, /* Replace Isalnum() return value with this */
const char *zIn, /* Array of characters to make exceptions */
int nIn /* Length of z in bytes */
){
const unsigned char *z = (const unsigned char *)zIn;
const unsigned char *zTerm = &z[nIn];
unsigned int iCode;
int nEntry = 0;
assert( bAlnum==0 || bAlnum==1 );
while( z<zTerm ){
READ_UTF8(z, zTerm, iCode);
assert( (sqlite3FtsUnicodeIsalnum((int)iCode) & 0xFFFFFFFE)==0 );
if( sqlite3FtsUnicodeIsalnum((int)iCode)!=bAlnum
&& sqlite3FtsUnicodeIsdiacritic((int)iCode)==0
){
nEntry++;
}
}
if( nEntry ){
int *aNew; /* New aiException[] array */
int nNew; /* Number of valid entries in array aNew[] */
aNew = sqlite3_realloc64(p->aiException,(p->nException+nEntry)*sizeof(int));
if( aNew==0 ) return SQLITE_NOMEM;
nNew = p->nException;
z = (const unsigned char *)zIn;
while( z<zTerm ){
READ_UTF8(z, zTerm, iCode);
if( sqlite3FtsUnicodeIsalnum((int)iCode)!=bAlnum
&& sqlite3FtsUnicodeIsdiacritic((int)iCode)==0
){
int i, j;
for(i=0; i<nNew && aNew[i]<(int)iCode; i++);
for(j=nNew; j>i; j--) aNew[j] = aNew[j-1];
aNew[i] = (int)iCode;
nNew++;
}
}
p->aiException = aNew;
p->nException = nNew;
}
return SQLITE_OK;
}
/*
** Return true if the p->aiException[] array contains the value iCode.
*/
static int unicodeIsException(unicode_tokenizer *p, int iCode){
if( p->nException>0 ){
int *a = p->aiException;
int iLo = 0;
int iHi = p->nException-1;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( iCode==a[iTest] ){
return 1;
}else if( iCode>a[iTest] ){
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
}
return 0;
}
/*
** Return true if, for the purposes of tokenization, codepoint iCode is
** considered a token character (not a separator).
*/
static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){
assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode);
}
/*
** Create a new tokenizer instance.
*/
static int unicodeCreate(
int nArg, /* Size of array argv[] */
const char * const *azArg, /* Tokenizer creation arguments */
sqlite3_tokenizer **pp /* OUT: New tokenizer handle */
){
unicode_tokenizer *pNew; /* New tokenizer object */
int i;
int rc = SQLITE_OK;
pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
if( pNew==NULL ) return SQLITE_NOMEM;
memset(pNew, 0, sizeof(unicode_tokenizer));
pNew->eRemoveDiacritic = 1;
for(i=0; rc==SQLITE_OK && i<nArg; i++){
const char *z = azArg[i];
int n = (int)strlen(z);
if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
pNew->eRemoveDiacritic = 1;
}
else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
pNew->eRemoveDiacritic = 0;
}
else if( n==19 && memcmp("remove_diacritics=2", z, 19)==0 ){
pNew->eRemoveDiacritic = 2;
}
else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
}
else if( n>=11 && memcmp("separators=", z, 11)==0 ){
rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
}
else{
/* Unrecognized argument */
rc = SQLITE_ERROR;
}
}
if( rc!=SQLITE_OK ){
unicodeDestroy((sqlite3_tokenizer *)pNew);
pNew = 0;
}
*pp = (sqlite3_tokenizer *)pNew;
return rc;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int unicodeOpen(
sqlite3_tokenizer *p, /* The tokenizer */
const char *aInput, /* Input string */
int nInput, /* Size of string aInput in bytes */
sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */
){
unicode_cursor *pCsr;
pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
if( pCsr==0 ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(unicode_cursor));
pCsr->aInput = (const unsigned char *)aInput;
if( aInput==0 ){
pCsr->nInput = 0;
pCsr->aInput = (const unsigned char*)"";
}else if( nInput<0 ){
pCsr->nInput = (int)strlen(aInput);
}else{
pCsr->nInput = nInput;
}
*pp = &pCsr->base;
UNUSED_PARAMETER(p);
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
unicode_cursor *pCsr = (unicode_cursor *) pCursor;
sqlite3_free(pCsr->zToken);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int unicodeNext(
sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */
const char **paToken, /* OUT: Token text */
int *pnToken, /* OUT: Number of bytes at *paToken */
int *piStart, /* OUT: Starting offset of token */
int *piEnd, /* OUT: Ending offset of token */
int *piPos /* OUT: Position integer of token */
){
unicode_cursor *pCsr = (unicode_cursor *)pC;
unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
unsigned int iCode = 0;
char *zOut;
const unsigned char *z = &pCsr->aInput[pCsr->iOff];
const unsigned char *zStart = z;
const unsigned char *zEnd;
const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
/* Scan past any delimiter characters before the start of the next token.
** Return SQLITE_DONE early if this takes us all the way to the end of
** the input. */
while( z<zTerm ){
READ_UTF8(z, zTerm, iCode);
if( unicodeIsAlnum(p, (int)iCode) ) break;
zStart = z;
}
if( zStart>=zTerm ) return SQLITE_DONE;
zOut = pCsr->zToken;
do {
int iOut;
/* Grow the output buffer if required. */
if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
char *zNew = sqlite3_realloc64(pCsr->zToken, pCsr->nAlloc+64);
if( !zNew ) return SQLITE_NOMEM;
zOut = &zNew[zOut - pCsr->zToken];
pCsr->zToken = zNew;
pCsr->nAlloc += 64;
}
/* Write the folded case of the last character read to the output */
zEnd = z;
iOut = sqlite3FtsUnicodeFold((int)iCode, p->eRemoveDiacritic);
if( iOut ){
WRITE_UTF8(zOut, iOut);
}
/* If the cursor is not at EOF, read the next character */
if( z>=zTerm ) break;
READ_UTF8(z, zTerm, iCode);
}while( unicodeIsAlnum(p, (int)iCode)
|| sqlite3FtsUnicodeIsdiacritic((int)iCode)
);
/* Set the output variables and return. */
pCsr->iOff = (int)(z - pCsr->aInput);
*paToken = pCsr->zToken;
*pnToken = (int)(zOut - pCsr->zToken);
*piStart = (int)(zStart - pCsr->aInput);
*piEnd = (int)(zEnd - pCsr->aInput);
*piPos = pCsr->iToken++;
return SQLITE_OK;
}
/*
** Set *ppModule to a pointer to the sqlite3_tokenizer_module
** structure for the unicode tokenizer.
*/
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
static const sqlite3_tokenizer_module module = {
0,
unicodeCreate,
unicodeDestroy,
unicodeOpen,
unicodeClose,
unicodeNext,
0,
};
*ppModule = &module;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */

View File

@ -1,383 +0,0 @@
/*
** 2012-05-25
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
#ifndef SQLITE_DISABLE_FTS3_UNICODE
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
#include <assert.h>
/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3FtsUnicodeIsalnum(int c){
/* Each unsigned integer in the following array corresponds to a contiguous
** range of unicode codepoints that are not either letters or numbers (i.e.
** codepoints for which this function should return 0).
**
** The most significant 22 bits in each 32-bit value contain the first
** codepoint in the range. The least significant 10 bits are used to store
** the size of the range (always at least 1). In other words, the value
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
static const unsigned int aEntry[] = {
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
0x380400F0,
};
static const unsigned int aAscii[4] = {
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
};
if( (unsigned int)c<128 ){
return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
}else if( (unsigned int)c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes = 0;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aEntry[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( aEntry[0]<key );
assert( key>=aEntry[iRes] );
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
}
return 1;
}
/*
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int remove_diacritic(int c, int bComplex){
unsigned short aDia[] = {
0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896,
3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106,
4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344,
4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198,
6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468,
61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,
62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
63182, 63242, 63274, 63310, 63368, 63390,
};
#define HIBIT ((unsigned char)0x80)
unsigned char aChar[] = {
'\0', 'a', 'c', 'e', 'i', 'n',
'o', 'u', 'y', 'y', 'a', 'c',
'd', 'e', 'e', 'g', 'h', 'i',
'j', 'k', 'l', 'n', 'o', 'r',
's', 't', 'u', 'u', 'w', 'y',
'z', 'o', 'u', 'a', 'i', 'o',
'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o',
'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a',
'e', 'i', 'o', 'r', 'u', 's',
't', 'h', 'a', 'e', 'o'|HIBIT, 'o',
'o'|HIBIT, 'y', '\0', '\0', '\0', '\0',
'\0', '\0', '\0', '\0', 'a', 'b',
'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT,
'f', 'g', 'h', 'h', 'i', 'i'|HIBIT,
'k', 'l', 'l'|HIBIT, 'l', 'm', 'n',
'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's',
's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w',
'w', 'x', 'y', 'z', 'h', 't',
'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT,
'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT,
'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y',
};
unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
int iRes = 0;
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aDia[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( key>=aDia[iRes] );
if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
}
/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
int sqlite3FtsUnicodeIsdiacritic(int c){
unsigned int mask0 = 0x08029FDF;
unsigned int mask1 = 0x000361F8;
if( c<768 || c>817 ) return 0;
return (c < 768+32) ?
(mask0 & ((unsigned int)1 << (c-768))) :
(mask1 & ((unsigned int)1 << (c-768-32)));
}
/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3FtsUnicodeFold(int c, int eRemoveDiacritic){
/* Each entry in the following array defines a rule for folding a range
** of codepoints to lower case. The rule applies to a range of nRange
** codepoints starting at codepoint iCode.
**
** If the least significant bit in flags is clear, then the rule applies
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
** need to be folded). Or, if it is set, then the rule only applies to
** every second codepoint in the range, starting with codepoint C.
**
** The 7 most significant bits in flags are an index into the aiOff[]
** array. If a specific codepoint C does require folding, then its lower
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
**
** The contents of this array are generated by parsing the CaseFolding.txt
** file distributed as part of the "Unicode Character Database". See
** http://www.unicode.org for details.
*/
static const struct TableEntry {
unsigned short iCode;
unsigned char flags;
unsigned char nRange;
} aEntry[] = {
{65, 14, 26}, {181, 64, 1}, {192, 14, 23},
{216, 14, 7}, {256, 1, 48}, {306, 1, 6},
{313, 1, 16}, {330, 1, 46}, {376, 116, 1},
{377, 1, 6}, {383, 104, 1}, {385, 50, 1},
{386, 1, 4}, {390, 44, 1}, {391, 0, 1},
{393, 42, 2}, {395, 0, 1}, {398, 32, 1},
{399, 38, 1}, {400, 40, 1}, {401, 0, 1},
{403, 42, 1}, {404, 46, 1}, {406, 52, 1},
{407, 48, 1}, {408, 0, 1}, {412, 52, 1},
{413, 54, 1}, {415, 56, 1}, {416, 1, 6},
{422, 60, 1}, {423, 0, 1}, {425, 60, 1},
{428, 0, 1}, {430, 60, 1}, {431, 0, 1},
{433, 58, 2}, {435, 1, 4}, {439, 62, 1},
{440, 0, 1}, {444, 0, 1}, {452, 2, 1},
{453, 0, 1}, {455, 2, 1}, {456, 0, 1},
{458, 2, 1}, {459, 1, 18}, {478, 1, 18},
{497, 2, 1}, {498, 1, 4}, {502, 122, 1},
{503, 134, 1}, {504, 1, 40}, {544, 110, 1},
{546, 1, 18}, {570, 70, 1}, {571, 0, 1},
{573, 108, 1}, {574, 68, 1}, {577, 0, 1},
{579, 106, 1}, {580, 28, 1}, {581, 30, 1},
{582, 1, 10}, {837, 36, 1}, {880, 1, 4},
{886, 0, 1}, {902, 18, 1}, {904, 16, 3},
{908, 26, 1}, {910, 24, 2}, {913, 14, 17},
{931, 14, 9}, {962, 0, 1}, {975, 4, 1},
{976, 140, 1}, {977, 142, 1}, {981, 146, 1},
{982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
{1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
{1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
{1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
{1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
{1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
{4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
{7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
{7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
{7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
{8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
{8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
{8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
{8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
{8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
{8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
{8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
{8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
{8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
{11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
{11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
{11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
{11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
{42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
{42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
{42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
{42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
{65313, 14, 26},
};
static const unsigned short aiOff[] = {
1, 2, 8, 15, 16, 26, 28, 32,
37, 38, 40, 48, 63, 64, 69, 71,
79, 80, 116, 202, 203, 205, 206, 207,
209, 210, 211, 213, 214, 217, 218, 219,
775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
65514, 65521, 65527, 65528, 65529,
};
int ret = c;
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
const struct TableEntry *p;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
assert( c>aEntry[0].iCode );
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
if( cmp>=0 ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( iRes>=0 && c>=aEntry[iRes].iCode );
p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
if( eRemoveDiacritic ){
ret = remove_diacritic(ret, eRemoveDiacritic==2);
}
}
else if( c>=66560 && c<66600 ){
ret = c + 40;
}
return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */

File diff suppressed because it is too large Load Diff

View File

@ -1,122 +0,0 @@
#--------------------------------------------------------------------------
# This script contains several sub-programs used to test FTS3/FTS4
# performance. It does not run the queries directly, but generates SQL
# scripts that can be run using the shell tool.
#
# The following cases are tested:
#
# 1. Inserting documents into an FTS3 table.
# 2. Optimizing an FTS3 table (i.e. "INSERT INTO t1 VALUES('optimize')").
# 3. Deleting documents from an FTS3 table.
# 4. Querying FTS3 tables.
#
# Number of tokens in vocabulary. And number of tokens in each document.
#
set VOCAB_SIZE 2000
set DOC_SIZE 100
set NUM_INSERTS 100000
set NUM_SELECTS 1000
# Force everything in this script to be deterministic.
#
expr {srand(0)}
proc usage {} {
puts stderr "Usage: $::argv0 <rows> <selects>"
exit -1
}
proc sql {sql} {
puts $::fd $sql
}
# Return a list of $nWord randomly generated tokens each between 2 and 10
# characters in length.
#
proc build_vocab {nWord} {
set ret [list]
set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
for {set i 0} {$i<$nWord} {incr i} {
set len [expr {int((rand()*9.0)+2)}]
set term ""
for {set j 0} {$j<$len} {incr j} {
append term [lindex $chars [expr {int(rand()*[llength $chars])}]]
}
lappend ret $term
}
set ret
}
proc select_term {} {
set n [llength $::vocab]
set t [expr int(rand()*$n*3)]
if {$t>=2*$n} { set t [expr {($t-2*$n)/100}] }
if {$t>=$n} { set t [expr {($t-$n)/10}] }
lindex $::vocab $t
}
proc select_doc {nTerm} {
set ret [list]
for {set i 0} {$i<$nTerm} {incr i} {
lappend ret [select_term]
}
set ret
}
proc test_1 {nInsert} {
sql "PRAGMA synchronous = OFF;"
sql "DROP TABLE IF EXISTS t1;"
sql "CREATE VIRTUAL TABLE t1 USING fts4;"
for {set i 0} {$i < $nInsert} {incr i} {
set doc [select_doc $::DOC_SIZE]
sql "INSERT INTO t1 VALUES('$doc');"
}
}
proc test_2 {} {
sql "INSERT INTO t1(t1) VALUES('optimize');"
}
proc test_3 {nSelect} {
for {set i 0} {$i < $nSelect} {incr i} {
sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term]';"
}
}
proc test_4 {nSelect} {
for {set i 0} {$i < $nSelect} {incr i} {
sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term] [select_term]';"
}
}
if {[llength $argv]!=0} usage
set ::vocab [build_vocab $::VOCAB_SIZE]
set ::fd [open fts3speed_insert.sql w]
test_1 $NUM_INSERTS
close $::fd
set ::fd [open fts3speed_select.sql w]
test_3 $NUM_SELECTS
close $::fd
set ::fd [open fts3speed_select2.sql w]
test_4 $NUM_SELECTS
close $::fd
set ::fd [open fts3speed_optimize.sql w]
test_2
close $::fd
puts "Success. Created files:"
puts " fts3speed_insert.sql"
puts " fts3speed_select.sql"
puts " fts3speed_select2.sql"
puts " fts3speed_optimize.sql"

View File

@ -1,115 +0,0 @@
#!/usr/bin/tclsh
#
# This script builds a single C code file holding all of FTS3 code.
# The name of the output file is fts3amal.c. To build this file,
# first do:
#
# make target_source
#
# The make target above moves all of the source code files into
# a subdirectory named "tsrc". (This script expects to find the files
# there and will not work if they are not found.)
#
# After the "tsrc" directory has been created and populated, run
# this script:
#
# tclsh mkfts3amal.tcl
#
# The amalgamated FTS3 code will be written into fts3amal.c
#
# Open the output file and write a header comment at the beginning
# of the file.
#
set out [open fts3amal.c w]
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
puts $out [subst \
{/******************************************************************************
** This file is an amalgamation of separate C source files from the SQLite
** Full Text Search extension 2 (fts3). By combining all the individual C
** code files into this single large file, the entire code can be compiled
** as a one translation unit. This allows many compilers to do optimizations
** that would not be possible if the files were compiled separately. It also
** makes the code easier to import into other projects.
**
** This amalgamation was generated on $today.
*/}]
# These are the header files used by FTS3. The first time any of these
# files are seen in a #include statement in the C code, include the complete
# text of the file in-line. The file only needs to be included once.
#
foreach hdr {
fts3.h
fts3_hash.h
fts3_tokenizer.h
sqlite3.h
sqlite3ext.h
} {
set available_hdr($hdr) 1
}
# 78 stars used for comment formatting.
set s78 \
{*****************************************************************************}
# Insert a comment into the code
#
proc section_comment {text} {
global out s78
set n [string length $text]
set nstar [expr {60 - $n}]
set stars [string range $s78 0 $nstar]
puts $out "/************** $text $stars/"
}
# Read the source file named $filename and write it into the
# sqlite3.c output file. If any #include statements are seen,
# process them approprately.
#
proc copy_file {filename} {
global seen_hdr available_hdr out
set tail [file tail $filename]
section_comment "Begin file $tail"
set in [open $filename r]
while {![eof $in]} {
set line [gets $in]
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
if {[info exists available_hdr($hdr)]} {
if {$available_hdr($hdr)} {
section_comment "Include $hdr in the middle of $tail"
copy_file tsrc/$hdr
section_comment "Continuing where we left off in $tail"
}
} elseif {![info exists seen_hdr($hdr)]} {
set seen_hdr($hdr) 1
puts $out $line
}
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
puts $out "#if 0"
} elseif {[regexp {^#line} $line]} {
# Skip #line directives.
} else {
puts $out $line
}
}
close $in
section_comment "End of $tail"
}
# Process the source files. Process files containing commonly
# used subroutines first in order to help the compiler find
# inlining opportunities.
#
foreach file {
fts3.c
fts3_hash.c
fts3_porter.c
fts3_tokenizer.c
fts3_tokenizer1.c
} {
copy_file tsrc/$file
}
close $out

View File

@ -1,16 +0,0 @@
#!/bin/sh
set -e
srcdir=`dirname $(dirname $(dirname $(dirname $0)))`
./testfixture $srcdir/test/fts3.test --output=fts3cov-out.txt
echo ""
for f in `ls $srcdir/ext/fts3/*.c`
do
f=`basename $f`
echo -ne "$f: "
gcov -b $f | grep Taken | sed 's/Taken at least once://'
done

View File

@ -1,875 +0,0 @@
/*
** This program is a debugging and analysis utility that displays
** information about an FTS3 or FTS4 index.
**
** Link this program against the SQLite3 amalgamation with the
** SQLITE_ENABLE_FTS4 compile-time option. Then run it as:
**
** fts3view DATABASE
**
** to get a list of all FTS3/4 tables in DATABASE, or do
**
** fts3view DATABASE TABLE COMMAND ....
**
** to see various aspects of the TABLE table. Type fts3view with no
** arguments for a list of available COMMANDs.
*/
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "sqlite3.h"
/*
** Extra command-line arguments:
*/
int nExtra;
char **azExtra;
/*
** Look for a command-line argument.
*/
const char *findOption(const char *zName, int hasArg, const char *zDefault){
int i;
const char *zResult = zDefault;
for(i=0; i<nExtra; i++){
const char *z = azExtra[i];
while( z[0]=='-' ) z++;
if( strcmp(z, zName)==0 ){
int j = 1;
if( hasArg==0 || i==nExtra-1 ) j = 0;
zResult = azExtra[i+j];
while( i+j<nExtra ){
azExtra[i] = azExtra[i+j+1];
i++;
}
break;
}
}
return zResult;
}
/*
** Prepare an SQL query
*/
static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){
va_list ap;
char *zSql;
sqlite3_stmt *pStmt;
int rc;
va_start(ap, zFormat);
zSql = sqlite3_vmprintf(zFormat, ap);
va_end(ap);
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc ){
fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql);
exit(1);
}
sqlite3_free(zSql);
return pStmt;
}
/*
** Run an SQL statement
*/
static int runSql(sqlite3 *db, const char *zFormat, ...){
va_list ap;
char *zSql;
int rc;
va_start(ap, zFormat);
zSql = sqlite3_vmprintf(zFormat, ap);
rc = sqlite3_exec(db, zSql, 0, 0, 0);
va_end(ap);
return rc;
}
/*
** Show the table schema
*/
static void showSchema(sqlite3 *db, const char *zTab){
sqlite3_stmt *pStmt;
pStmt = prepare(db,
"SELECT sql FROM sqlite_schema"
" WHERE name LIKE '%q%%'"
" ORDER BY 1",
zTab);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
printf("%s;\n", sqlite3_column_text(pStmt, 0));
}
sqlite3_finalize(pStmt);
pStmt = prepare(db, "PRAGMA page_size");
while( sqlite3_step(pStmt)==SQLITE_ROW ){
printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0));
}
sqlite3_finalize(pStmt);
pStmt = prepare(db, "PRAGMA journal_mode");
while( sqlite3_step(pStmt)==SQLITE_ROW ){
printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0));
}
sqlite3_finalize(pStmt);
pStmt = prepare(db, "PRAGMA auto_vacuum");
while( sqlite3_step(pStmt)==SQLITE_ROW ){
const char *zType = "???";
switch( sqlite3_column_int(pStmt, 0) ){
case 0: zType = "OFF"; break;
case 1: zType = "FULL"; break;
case 2: zType = "INCREMENTAL"; break;
}
printf("PRAGMA auto_vacuum=%s;\n", zType);
}
sqlite3_finalize(pStmt);
pStmt = prepare(db, "PRAGMA encoding");
while( sqlite3_step(pStmt)==SQLITE_ROW ){
printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0));
}
sqlite3_finalize(pStmt);
}
/*
** Read a 64-bit variable-length integer from memory starting at p[0].
** Return the number of bytes read, or 0 on error.
** The value is stored in *v.
*/
int getVarint(const unsigned char *p, sqlite_int64 *v){
const unsigned char *q = p;
sqlite_uint64 x = 0, y = 1;
while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){
x += y * (*q++ & 0x7f);
y <<= 7;
}
x += y * (*q++);
*v = (sqlite_int64) x;
return (int) (q - (unsigned char *)p);
}
/* Show the content of the %_stat table
*/
static void showStat(sqlite3 *db, const char *zTab){
sqlite3_stmt *pStmt;
pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
printf("stat[%d] =", sqlite3_column_int(pStmt, 0));
switch( sqlite3_column_type(pStmt, 1) ){
case SQLITE_INTEGER: {
printf(" %d\n", sqlite3_column_int(pStmt, 1));
break;
}
case SQLITE_BLOB: {
unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1);
int len = sqlite3_column_bytes(pStmt, 1);
int i = 0;
sqlite3_int64 v;
while( i<len ){
i += getVarint(x, &v);
printf(" %lld", v);
}
printf("\n");
break;
}
}
}
sqlite3_finalize(pStmt);
}
/*
** Report on the vocabulary. This creates an fts4aux table with a random
** name, but deletes it in the end.
*/
static void showVocabulary(sqlite3 *db, const char *zTab){
char *zAux;
sqlite3_uint64 r;
sqlite3_stmt *pStmt;
int nDoc = 0;
int nToken = 0;
int nOccurrence = 0;
int nTop;
int n, i;
sqlite3_randomness(sizeof(r), &r);
zAux = sqlite3_mprintf("viewer_%llx", zTab, r);
runSql(db, "BEGIN");
pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
nDoc = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
printf("Number of documents...................... %9d\n", nDoc);
runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab);
pStmt = prepare(db,
"SELECT count(*), sum(occurrences) FROM %s WHERE col='*'",
zAux);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
nToken = sqlite3_column_int(pStmt, 0);
nOccurrence = sqlite3_column_int(pStmt, 1);
}
sqlite3_finalize(pStmt);
printf("Total tokens in all documents............ %9d\n", nOccurrence);
printf("Total number of distinct tokens.......... %9d\n", nToken);
if( nToken==0 ) goto end_vocab;
n = 0;
pStmt = prepare(db, "SELECT count(*) FROM %s"
" WHERE col='*' AND occurrences==1", zAux);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
n = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
printf("Tokens used exactly once................. %9d %5.2f%%\n",
n, n*100.0/nToken);
n = 0;
pStmt = prepare(db, "SELECT count(*) FROM %s"
" WHERE col='*' AND documents==1", zAux);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
n = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
printf("Tokens used in only one document......... %9d %5.2f%%\n",
n, n*100.0/nToken);
if( nDoc>=2000 ){
n = 0;
pStmt = prepare(db, "SELECT count(*) FROM %s"
" WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
n = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n",
n, n*100.0/nToken);
}
if( nDoc>=200 ){
n = 0;
pStmt = prepare(db, "SELECT count(*) FROM %s"
" WHERE col='*' AND occurrences<=%d", zAux, nDoc/100);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
n = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n",
n, n*100.0/nToken);
}
nTop = atoi(findOption("top", 1, "25"));
printf("The %d most common tokens:\n", nTop);
pStmt = prepare(db,
"SELECT term, documents FROM %s"
" WHERE col='*'"
" ORDER BY documents DESC, term"
" LIMIT %d", zAux, nTop);
i = 0;
while( sqlite3_step(pStmt)==SQLITE_ROW ){
i++;
n = sqlite3_column_int(pStmt, 1);
printf(" %2d. %-30s %9d docs %5.2f%%\n", i,
sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc);
}
sqlite3_finalize(pStmt);
end_vocab:
runSql(db, "ROLLBACK");
sqlite3_free(zAux);
}
/*
** Report on the number and sizes of segments
*/
static void showSegmentStats(sqlite3 *db, const char *zTab){
sqlite3_stmt *pStmt;
int nSeg = 0;
sqlite3_int64 szSeg = 0, mxSeg = 0;
int nIdx = 0;
sqlite3_int64 szIdx = 0, mxIdx = 0;
int nRoot = 0;
sqlite3_int64 szRoot = 0, mxRoot = 0;
sqlite3_int64 mx;
int nLeaf;
int n;
int pgsz;
int mxLevel;
int i;
pStmt = prepare(db,
"SELECT count(*), sum(length(block)), max(length(block))"
" FROM '%q_segments'",
zTab);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
nSeg = sqlite3_column_int(pStmt, 0);
szSeg = sqlite3_column_int64(pStmt, 1);
mxSeg = sqlite3_column_int64(pStmt, 2);
}
sqlite3_finalize(pStmt);
pStmt = prepare(db,
"SELECT count(*), sum(length(block)), max(length(block))"
" FROM '%q_segments' a JOIN '%q_segdir' b"
" WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block",
zTab, zTab);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
nIdx = sqlite3_column_int(pStmt, 0);
szIdx = sqlite3_column_int64(pStmt, 1);
mxIdx = sqlite3_column_int64(pStmt, 2);
}
sqlite3_finalize(pStmt);
pStmt = prepare(db,
"SELECT count(*), sum(length(root)), max(length(root))"
" FROM '%q_segdir'",
zTab);
while( sqlite3_step(pStmt)==SQLITE_ROW ){
nRoot = sqlite3_column_int(pStmt, 0);
szRoot = sqlite3_column_int64(pStmt, 1);
mxRoot = sqlite3_column_int64(pStmt, 2);
}
sqlite3_finalize(pStmt);
printf("Number of segments....................... %9d\n", nSeg+nRoot);
printf("Number of leaf segments.................. %9d\n", nSeg-nIdx);
printf("Number of index segments................. %9d\n", nIdx);
printf("Number of root segments.................. %9d\n", nRoot);
printf("Total size of all segments............... %9lld\n", szSeg+szRoot);
printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx);
printf("Total size of all index segments......... %9lld\n", szIdx);
printf("Total size of all root segments.......... %9lld\n", szRoot);
if( nSeg>0 ){
printf("Average size of all segments............. %11.1f\n",
(double)(szSeg+szRoot)/(double)(nSeg+nRoot));
printf("Average size of leaf segments............ %11.1f\n",
(double)(szSeg-szIdx)/(double)(nSeg-nIdx));
}
if( nIdx>0 ){
printf("Average size of index segments........... %11.1f\n",
(double)szIdx/(double)nIdx);
}
if( nRoot>0 ){
printf("Average size of root segments............ %11.1f\n",
(double)szRoot/(double)nRoot);
}
mx = mxSeg;
if( mx<mxRoot ) mx = mxRoot;
printf("Maximum segment size..................... %9lld\n", mx);
printf("Maximum index segment size............... %9lld\n", mxIdx);
printf("Maximum root segment size................ %9lld\n", mxRoot);
pStmt = prepare(db, "PRAGMA page_size");
pgsz = 1024;
while( sqlite3_step(pStmt)==SQLITE_ROW ){
pgsz = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
printf("Database page size....................... %9d\n", pgsz);
pStmt = prepare(db,
"SELECT count(*)"
" FROM '%q_segments' a JOIN '%q_segdir' b"
" WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block"
" AND length(a.block)>%d",
zTab, zTab, pgsz-45);
n = 0;
while( sqlite3_step(pStmt)==SQLITE_ROW ){
n = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
nLeaf = nSeg - nIdx;
printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n",
pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
mxLevel = 0;
while( sqlite3_step(pStmt)==SQLITE_ROW ){
mxLevel = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
for(i=0; i<=mxLevel; i++){
pStmt = prepare(db,
"SELECT count(*), sum(len), avg(len), max(len), sum(len>%d),"
" count(distinct idx)"
" FROM (SELECT length(a.block) AS len, idx"
" FROM '%q_segments' a JOIN '%q_segdir' b"
" WHERE (a.blockid BETWEEN b.start_block"
" AND b.leaves_end_block)"
" AND (b.level%%1024)==%d)",
pgsz-45, zTab, zTab, i);
if( sqlite3_step(pStmt)==SQLITE_ROW
&& (nLeaf = sqlite3_column_int(pStmt, 0))>0
){
sqlite3_int64 sz;
nIdx = sqlite3_column_int(pStmt, 5);
printf("For level %d:\n", i);
printf(" Number of indexes...................... %9d\n", nIdx);
printf(" Number of leaf segments................ %9d\n", nLeaf);
if( nIdx>1 ){
printf(" Average leaf segments per index........ %11.1f\n",
(double)nLeaf/(double)nIdx);
}
printf(" Total size of all leaf segments........ %9lld\n",
(sz = sqlite3_column_int64(pStmt, 1)));
printf(" Average size of leaf segments.......... %11.1f\n",
sqlite3_column_double(pStmt, 2));
if( nIdx>1 ){
printf(" Average leaf segment size per index.... %11.1f\n",
(double)sz/(double)nIdx);
}
printf(" Maximum leaf segment size.............. %9lld\n",
sqlite3_column_int64(pStmt, 3));
n = sqlite3_column_int(pStmt, 4);
printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n",
pgsz-45, n, n*100.0/nLeaf);
}
sqlite3_finalize(pStmt);
}
}
/*
** Print a single "tree" line of the segdir map output.
*/
static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){
printf(" tree %9lld", iLower);
if( iUpper>iLower ){
printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1);
}
printf("\n");
}
/*
** Check to see if the block of a %_segments entry is NULL.
*/
static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){
sqlite3_stmt *pStmt;
int rc = 1;
pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'"
" WHERE blockid=%lld", zTab, iBlockId);
if( sqlite3_step(pStmt)==SQLITE_ROW ){
rc = sqlite3_column_int(pStmt, 0);
}
sqlite3_finalize(pStmt);
return rc;
}
/*
** Show a map of segments derived from the %_segdir table.
*/
static void showSegdirMap(sqlite3 *db, const char *zTab){
int mxIndex, iIndex;
sqlite3_stmt *pStmt = 0;
sqlite3_stmt *pStmt2 = 0;
int prevLevel;
pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab);
if( sqlite3_step(pStmt)==SQLITE_ROW ){
mxIndex = sqlite3_column_int(pStmt, 0);
}else{
mxIndex = 0;
}
sqlite3_finalize(pStmt);
printf("Number of inverted indices............... %3d\n", mxIndex+1);
pStmt = prepare(db,
"SELECT level, idx, start_block, leaves_end_block, end_block, rowid"
" FROM '%q_segdir'"
" WHERE level/1024==?"
" ORDER BY level DESC, idx",
zTab);
pStmt2 = prepare(db,
"SELECT blockid FROM '%q_segments'"
" WHERE blockid BETWEEN ? AND ? ORDER BY blockid",
zTab);
for(iIndex=0; iIndex<=mxIndex; iIndex++){
if( mxIndex>0 ){
printf("**************************** Index %d "
"****************************\n", iIndex);
}
sqlite3_bind_int(pStmt, 1, iIndex);
prevLevel = -1;
while( sqlite3_step(pStmt)==SQLITE_ROW ){
int iLevel = sqlite3_column_int(pStmt, 0)%1024;
int iIdx = sqlite3_column_int(pStmt, 1);
sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2);
sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3);
sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4);
char rtag[20];
if( iLevel!=prevLevel ){
printf("level %2d idx %2d", iLevel, iIdx);
prevLevel = iLevel;
}else{
printf(" idx %2d", iIdx);
}
sqlite3_snprintf(sizeof(rtag), rtag, "r%lld",
sqlite3_column_int64(pStmt,5));
printf(" root %9s\n", rtag);
if( iLEnd>iStart ){
sqlite3_int64 iLower, iPrev = 0, iX;
if( iLEnd+1<=iEnd ){
sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
sqlite3_bind_int64(pStmt2, 2, iEnd);
iLower = -1;
while( sqlite3_step(pStmt2)==SQLITE_ROW ){
iX = sqlite3_column_int64(pStmt2, 0);
if( iLower<0 ){
iLower = iPrev = iX;
}else if( iX==iPrev+1 ){
iPrev = iX;
}else{
printTreeLine(iLower, iPrev);
iLower = iPrev = iX;
}
}
sqlite3_reset(pStmt2);
if( iLower>=0 ){
if( iLower==iPrev && iLower==iEnd
&& isNullSegment(db,zTab,iLower)
){
printf(" null %9lld\n", iLower);
}else{
printTreeLine(iLower, iPrev);
}
}
}
printf(" leaves %9lld thru %9lld (%lld blocks)\n",
iStart, iLEnd, iLEnd - iStart + 1);
}
}
sqlite3_reset(pStmt);
}
sqlite3_finalize(pStmt);
sqlite3_finalize(pStmt2);
}
/*
** Decode a single segment block and display the results on stdout.
*/
static void decodeSegment(
const unsigned char *aData, /* Content to print */
int nData /* Number of bytes of content */
){
sqlite3_int64 iChild = 0;
sqlite3_int64 iPrefix;
sqlite3_int64 nTerm;
sqlite3_int64 n;
sqlite3_int64 iDocsz;
int iHeight;
sqlite3_int64 i = 0;
int cnt = 0;
char zTerm[1000];
i += getVarint(aData, &n);
iHeight = (int)n;
printf("height: %d\n", iHeight);
if( iHeight>0 ){
i += getVarint(aData+i, &iChild);
printf("left-child: %lld\n", iChild);
}
while( i<nData ){
if( (cnt++)>0 ){
i += getVarint(aData+i, &iPrefix);
}else{
iPrefix = 0;
}
i += getVarint(aData+i, &nTerm);
if( iPrefix+nTerm+1 >= sizeof(zTerm) ){
fprintf(stderr, "term to long\n");
exit(1);
}
memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
zTerm[iPrefix+nTerm] = 0;
i += nTerm;
if( iHeight==0 ){
i += getVarint(aData+i, &iDocsz);
printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
i += iDocsz;
}else{
printf("term: %-25s child %lld\n", zTerm, ++iChild);
}
}
}
/*
** Print a a blob as hex and ascii.
*/
static void printBlob(
const unsigned char *aData, /* Content to print */
int nData /* Number of bytes of content */
){
int i, j;
const char *zOfstFmt;
const int perLine = 16;
if( (nData&~0xfff)==0 ){
zOfstFmt = " %03x: ";
}else if( (nData&~0xffff)==0 ){
zOfstFmt = " %04x: ";
}else if( (nData&~0xfffff)==0 ){
zOfstFmt = " %05x: ";
}else if( (nData&~0xffffff)==0 ){
zOfstFmt = " %06x: ";
}else{
zOfstFmt = " %08x: ";
}
for(i=0; i<nData; i += perLine){
fprintf(stdout, zOfstFmt, i);
for(j=0; j<perLine; j++){
if( i+j>nData ){
fprintf(stdout, " ");
}else{
fprintf(stdout,"%02x ", aData[i+j]);
}
}
for(j=0; j<perLine; j++){
if( i+j>nData ){
fprintf(stdout, " ");
}else{
fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.');
}
}
fprintf(stdout,"\n");
}
}
/*
** Convert text to a 64-bit integer
*/
static sqlite3_int64 atoi64(const char *z){
sqlite3_int64 v = 0;
while( z[0]>='0' && z[0]<='9' ){
v = v*10 + z[0] - '0';
z++;
}
return v;
}
/*
** Return a prepared statement which, when stepped, will return in its
** first column the blob associated with segment zId. If zId begins with
** 'r' then it is a rowid of a %_segdir entry. Otherwise it is a
** %_segment entry.
*/
static sqlite3_stmt *prepareToGetSegment(
sqlite3 *db, /* The database */
const char *zTab, /* The FTS3/4 table name */
const char *zId /* ID of the segment to open */
){
sqlite3_stmt *pStmt;
if( zId[0]=='r' ){
pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld",
zTab, atoi64(zId+1));
}else{
pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld",
zTab, atoi64(zId));
}
return pStmt;
}
/*
** Print the content of a segment or of the root of a segdir. The segment
** or root is identified by azExtra[0]. If the first character of azExtra[0]
** is 'r' then the remainder is the integer rowid of the %_segdir entry.
** If the first character of azExtra[0] is not 'r' then, then all of
** azExtra[0] is an integer which is the block number.
**
** If the --raw option is present in azExtra, then a hex dump is provided.
** Otherwise a decoding is shown.
*/
static void showSegment(sqlite3 *db, const char *zTab){
const unsigned char *aData;
int nData;
sqlite3_stmt *pStmt;
pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
if( sqlite3_step(pStmt)!=SQLITE_ROW ){
sqlite3_finalize(pStmt);
return;
}
nData = sqlite3_column_bytes(pStmt, 0);
aData = sqlite3_column_blob(pStmt, 0);
printf("Segment %s of size %d bytes:\n", azExtra[0], nData);
if( findOption("raw", 0, 0)!=0 ){
printBlob(aData, nData);
}else{
decodeSegment(aData, nData);
}
sqlite3_finalize(pStmt);
}
/*
** Decode a single doclist and display the results on stdout.
*/
static void decodeDoclist(
const unsigned char *aData, /* Content to print */
int nData /* Number of bytes of content */
){
sqlite3_int64 iPrevDocid = 0;
sqlite3_int64 iDocid;
sqlite3_int64 iPos;
sqlite3_int64 iPrevPos = 0;
sqlite3_int64 iCol;
int i = 0;
while( i<nData ){
i += getVarint(aData+i, &iDocid);
printf("docid %lld col0", iDocid+iPrevDocid);
iPrevDocid += iDocid;
iPrevPos = 0;
while( 1 ){
i += getVarint(aData+i, &iPos);
if( iPos==1 ){
i += getVarint(aData+i, &iCol);
printf(" col%lld", iCol);
iPrevPos = 0;
}else if( iPos==0 ){
printf("\n");
break;
}else{
iPrevPos += iPos - 2;
printf(" %lld", iPrevPos);
}
}
}
}
/*
** Print the content of a doclist. The segment or segdir-root is
** identified by azExtra[0]. If the first character of azExtra[0]
** is 'r' then the remainder is the integer rowid of the %_segdir entry.
** If the first character of azExtra[0] is not 'r' then, then all of
** azExtra[0] is an integer which is the block number. The offset
** into the segment is identified by azExtra[1]. The size of the doclist
** is azExtra[2].
**
** If the --raw option is present in azExtra, then a hex dump is provided.
** Otherwise a decoding is shown.
*/
static void showDoclist(sqlite3 *db, const char *zTab){
const unsigned char *aData;
sqlite3_int64 offset;
int nData;
sqlite3_stmt *pStmt;
offset = atoi64(azExtra[1]);
nData = atoi(azExtra[2]);
pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
if( sqlite3_step(pStmt)!=SQLITE_ROW ){
sqlite3_finalize(pStmt);
return;
}
aData = sqlite3_column_blob(pStmt, 0);
printf("Doclist at %s offset %lld of size %d bytes:\n",
azExtra[0], offset, nData);
if( findOption("raw", 0, 0)!=0 ){
printBlob(aData+offset, nData);
}else{
decodeDoclist(aData+offset, nData);
}
sqlite3_finalize(pStmt);
}
/*
** Show the top N largest segments
*/
static void listBigSegments(sqlite3 *db, const char *zTab){
int nTop, i;
sqlite3_stmt *pStmt;
sqlite3_int64 sz;
sqlite3_int64 id;
nTop = atoi(findOption("top", 1, "25"));
printf("The %d largest segments:\n", nTop);
pStmt = prepare(db,
"SELECT blockid, length(block) AS len FROM '%q_segments'"
" ORDER BY 2 DESC, 1"
" LIMIT %d", zTab, nTop);
i = 0;
while( sqlite3_step(pStmt)==SQLITE_ROW ){
i++;
id = sqlite3_column_int64(pStmt, 0);
sz = sqlite3_column_int64(pStmt, 1);
printf(" %2d. %9lld size %lld\n", i, id, sz);
}
sqlite3_finalize(pStmt);
}
static void usage(const char *argv0){
fprintf(stderr, "Usage: %s DATABASE\n"
" or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0);
fprintf(stderr,
"ARGS:\n"
" big-segments [--top N] show the largest segments\n"
" doclist BLOCKID OFFSET SIZE [--raw] Decode a doclist\n"
" schema FTS table schema\n"
" segdir directory of segments\n"
" segment BLOCKID [--raw] content of a segment\n"
" segment-stats info on segment sizes\n"
" stat the %%_stat table\n"
" vocabulary [--top N] document vocabulary\n"
);
exit(1);
}
int main(int argc, char **argv){
sqlite3 *db;
int rc;
const char *zTab;
const char *zCmd;
if( argc<2 ) usage(argv[0]);
rc = sqlite3_open(argv[1], &db);
if( rc ){
fprintf(stderr, "Cannot open %s\n", argv[1]);
exit(1);
}
if( argc==2 ){
sqlite3_stmt *pStmt;
int cnt = 0;
pStmt = prepare(db, "SELECT b.sql"
" FROM sqlite_schema a, sqlite_schema b"
" WHERE a.name GLOB '*_segdir'"
" AND b.name=substr(a.name,1,length(a.name)-7)"
" ORDER BY 1");
while( sqlite3_step(pStmt)==SQLITE_ROW ){
cnt++;
printf("%s;\n", sqlite3_column_text(pStmt, 0));
}
sqlite3_finalize(pStmt);
if( cnt==0 ){
printf("/* No FTS3/4 tables found in database %s */\n", argv[1]);
}
return 0;
}
if( argc<4 ) usage(argv[0]);
zTab = argv[2];
zCmd = argv[3];
nExtra = argc-4;
azExtra = argv+4;
if( strcmp(zCmd,"big-segments")==0 ){
listBigSegments(db, zTab);
}else if( strcmp(zCmd,"doclist")==0 ){
if( argc<7 ) usage(argv[0]);
showDoclist(db, zTab);
}else if( strcmp(zCmd,"schema")==0 ){
showSchema(db, zTab);
}else if( strcmp(zCmd,"segdir")==0 ){
showSegdirMap(db, zTab);
}else if( strcmp(zCmd,"segment")==0 ){
if( argc<5 ) usage(argv[0]);
showSegment(db, zTab);
}else if( strcmp(zCmd,"segment-stats")==0 ){
showSegmentStats(db, zTab);
}else if( strcmp(zCmd,"stat")==0 ){
showStat(db, zTab);
}else if( strcmp(zCmd,"vocabulary")==0 ){
showVocabulary(db, zTab);
}else{
usage(argv[0]);
}
return 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More