freebsd-dev/include/search.h

/*-
 * Written by J.T. Conklin <jtc@NetBSD.org>
 * Public domain.
 *
 *	$NetBSD: search.h,v 1.16 2005/02/03 04:39:32 perry Exp $
 * $FreeBSD$
 */

#ifndef _SEARCH_H_
#define _SEARCH_H_

#include <sys/cdefs.h>
#include <sys/_types.h>

#ifndef _SIZE_T_DECLARED
typedef	__size_t	size_t;
#define	_SIZE_T_DECLARED
#endif

typedef	struct entry {
	char	*key;
	void	*data;
} ENTRY;

typedef	enum {
	FIND, ENTER
} ACTION;

typedef	enum {
	preorder,
	postorder,
	endorder,
	leaf
} VISIT;

#ifdef _SEARCH_PRIVATE
typedef	struct node {
	void         *key;
	struct node  *llink, *rlink;
	signed char   balance;
} node_t;

struct que_elem {
	struct que_elem *next;
	struct que_elem *prev;
};
#endif

#if __BSD_VISIBLE
struct hsearch_data {
	struct __hsearch *__hsearch;
};
#endif

__BEGIN_DECLS
int	 hcreate(size_t);
void	 hdestroy(void);
ENTRY	*hsearch(ENTRY, ACTION);
void	 insque(void *, void *);
void	*lfind(const void *, const void *, size_t *, size_t,
	    int (*)(const void *, const void *));
void	*lsearch(const void *, void *, size_t *, size_t,
	    int (*)(const void *, const void *));
void	 remque(void *);
void	*tdelete(const void * __restrict, void ** __restrict,
	    int (*)(const void *, const void *));
void	*tfind(const void *, void * const *,
	    int (*)(const void *, const void *));
void	*tsearch(const void *, void **, int (*)(const void *, const void *));
void	 twalk(const void *, void (*)(const void *, VISIT, int));

#if __BSD_VISIBLE
int	 hcreate_r(size_t, struct hsearch_data *);
void	 hdestroy_r(struct hsearch_data *);
int	 hsearch_r(ENTRY, ACTION, ENTRY **, struct hsearch_data *);
#endif

__END_DECLS

#endif /* !_SEARCH_H_ */
Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`/*-`
Add re-entrant versions of the hash functions based on the GNU api. While testing this I found a conformance issue in hdestroy() that will be fixed in a subsequent commit. Obtained from: NetBSD (hcreate.c, CVS Rev. 1.7) 2014-07-21 15:22:48 +00:00			`* Written by J.T. Conklin <jtc@NetBSD.org>`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`* Public domain.`
Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`*`
Add re-entrant versions of the hash functions based on the GNU api. While testing this I found a conformance issue in hdestroy() that will be fixed in a subsequent commit. Obtained from: NetBSD (hcreate.c, CVS Rev. 1.7) 2014-07-21 15:22:48 +00:00			`* $NetBSD: search.h,v 1.16 2005/02/03 04:39:32 perry Exp $`
Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`* $FreeBSD$`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`*/`

			`#ifndef _SEARCH_H_`
			`#define _SEARCH_H_`

			`#include <sys/cdefs.h>`
o Merge <machine/ansi.h> and <machine/types.h> into a new header called <machine/_types.h>. o <machine/ansi.h> will continue to live so it can define MD clock macros, which are only MD because of gratuitous differences between architectures. o Change all headers to make use of this. This mainly involves changing: #ifdef _BSD_FOO_T_ typedef _BSD_FOO_T_ foo_t; #undef _BSD_FOO_T_ #endif to: #ifndef _FOO_T_DECLARED typedef __foo_t foo_t; #define _FOO_T_DECLARED #endif Concept by: bde Reviewed by: jake, obrien 2002-08-21 16:20:02 +00:00			`#include <sys/_types.h>`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00
o Merge <machine/ansi.h> and <machine/types.h> into a new header called <machine/_types.h>. o <machine/ansi.h> will continue to live so it can define MD clock macros, which are only MD because of gratuitous differences between architectures. o Change all headers to make use of this. This mainly involves changing: #ifdef _BSD_FOO_T_ typedef _BSD_FOO_T_ foo_t; #undef _BSD_FOO_T_ #endif to: #ifndef _FOO_T_DECLARED typedef __foo_t foo_t; #define _FOO_T_DECLARED #endif Concept by: bde Reviewed by: jake, obrien 2002-08-21 16:20:02 +00:00			`#ifndef _SIZE_T_DECLARED`
			`typedef __size_t size_t;`
			`#define _SIZE_T_DECLARED`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`#endif`

Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`typedef struct entry {`
			`char *key;`
			`void *data;`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`} ENTRY;`

Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`typedef enum {`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`FIND, ENTER`
			`} ACTION;`

Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`typedef enum {`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`preorder,`
			`postorder,`
			`endorder,`
			`leaf`
			`} VISIT;`

			`#ifdef _SEARCH_PRIVATE`
Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`typedef struct node {`
Let tsearch()/tdelete() use an AVL tree. The existing implementations of POSIX tsearch() and tdelete() don't attempt to perform any balancing at all. Testing reveals that inserting 100k nodes into a tree sequentially takes approximately one minute on my system. Though most other BSDs also don't use any balanced tree internally, C libraries like glibc and musl do provide better implementations. glibc uses a red-black tree and musl uses an AVL tree. Red-black trees have the advantage over AVL trees that they only require O(1) rotations after insertion and deletion, but have the disadvantage that the tree has a maximum depth of 2log2(n) instead of 1.44log2(n). My take is that it's better to focus on having a lower maximum depth, for the reason that in the case of tsearch() the invocation of the comparator likely dominates the running time. This change replaces the tsearch() and tdelete() functions by versions that create an AVL tree. Compared to musl's implementation, this version is different in two different ways: - We don't keep track of heights; just balances. This is sufficient. This has the advantage that it reduces the number of nodes that are being accessed. Storing heights requires us to also access all of the siblings along the path. - Don't use any recursion at all. We know that the tree cannot 2^64 elements in size, so the height of the tree can never be larger than 96. Use a 128-bit bitmask to keep track of the path that is computed. This allows us to iterate over the same path twice, meaning we can apply rotations from top to bottom. Inserting 100k nodes into a tree now only takes 0.015 seconds. Insertion seems to be twice as fast as glibc, whereas deletion has about the same performance. Unlike glibc, it uses a fixed amount of memory. I also experimented with both recursive and iterative bottom-up implementations of the same algorithm. This iterative top-down version performs similar to the recursive bottom-up version in terms of speed and code size. For some reason, the iterative bottom-up algorithm was actually 30% faster for deletion, but has a quadratic memory complexity to keep track of all the parent pointers. Reviewed by: jilles Obtained from: https://github.com/NuxiNL/cloudlibc Differential Revision: https://reviews.freebsd.org/D4412 2015-12-22 18:12:11 +00:00			`void *key;`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`struct node llink, rlink;`
Let tsearch()/tdelete() use an AVL tree. The existing implementations of POSIX tsearch() and tdelete() don't attempt to perform any balancing at all. Testing reveals that inserting 100k nodes into a tree sequentially takes approximately one minute on my system. Though most other BSDs also don't use any balanced tree internally, C libraries like glibc and musl do provide better implementations. glibc uses a red-black tree and musl uses an AVL tree. Red-black trees have the advantage over AVL trees that they only require O(1) rotations after insertion and deletion, but have the disadvantage that the tree has a maximum depth of 2log2(n) instead of 1.44log2(n). My take is that it's better to focus on having a lower maximum depth, for the reason that in the case of tsearch() the invocation of the comparator likely dominates the running time. This change replaces the tsearch() and tdelete() functions by versions that create an AVL tree. Compared to musl's implementation, this version is different in two different ways: - We don't keep track of heights; just balances. This is sufficient. This has the advantage that it reduces the number of nodes that are being accessed. Storing heights requires us to also access all of the siblings along the path. - Don't use any recursion at all. We know that the tree cannot 2^64 elements in size, so the height of the tree can never be larger than 96. Use a 128-bit bitmask to keep track of the path that is computed. This allows us to iterate over the same path twice, meaning we can apply rotations from top to bottom. Inserting 100k nodes into a tree now only takes 0.015 seconds. Insertion seems to be twice as fast as glibc, whereas deletion has about the same performance. Unlike glibc, it uses a fixed amount of memory. I also experimented with both recursive and iterative bottom-up implementations of the same algorithm. This iterative top-down version performs similar to the recursive bottom-up version in terms of speed and code size. For some reason, the iterative bottom-up algorithm was actually 30% faster for deletion, but has a quadratic memory complexity to keep track of all the parent pointers. Reviewed by: jilles Obtained from: https://github.com/NuxiNL/cloudlibc Differential Revision: https://reviews.freebsd.org/D4412 2015-12-22 18:12:11 +00:00			`signed char balance;`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`} node_t;`
- Remove the old insque() and remque() functions and their manual page from the compatibility library. - Add new implementations of insque() and remque() which conform to IEEE Std 1003.1-2001 to libc. Add a new manual page for them and connect them to the build. - Add the prototypes of insque() and remque() to the search.h header. 2002-10-16 14:00:46 +00:00
			`struct que_elem {`
			`struct que_elem *next;`
			`struct que_elem *prev;`
			`};`
bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`#endif`

Add re-entrant versions of the hash functions based on the GNU api. While testing this I found a conformance issue in hdestroy() that will be fixed in a subsequent commit. Obtained from: NetBSD (hcreate.c, CVS Rev. 1.7) 2014-07-21 15:22:48 +00:00			`#if __BSD_VISIBLE`
			`struct hsearch_data {`
Replace implementation of hsearch() by one that scales. Traditionally the hcreate() function creates a hash table that uses chaining, using a fixed user-provided size. The problem with this approach is that this often either wastes memory (table too big) or yields bad performance (table too small). For applications it may not always be easy to estimate the right hash table size. A fixed number only increases performance compared to a linked list by a constant factor. This problem can be solved easily by dynamically resizing the hash table. If the size of the hash table is at least doubled, this has no negative on the running time complexity. If a dynamically sized hash table is used, we can also switch to using open addressing instead of chaining, which has the advantage of just using a single allocation for the entire table, instead of allocating many small objects. Finally, a problem with the existing implementation is that its deterministic algorithm for hashing makes it possible to come up with fixed patterns to trigger an excessive number of collisions. We can easily solve this by using FNV-1a as a hashing algorithm in combination with a randomly generated offset basis. Measurements have shown that this implementation is about 20-25% faster than the existing implementation (even if the existing implementation is given an excessive number of buckets). Though it allocates more memory through malloc() than the old implementation (between 4-8 pointers per used entry instead of 3), process memory use is similar to the old implementation as if the estimated size was underestimated by a factor 10. This is due to the fact that malloc() needs to perform less bookkeeping. Reviewed by: jilles, pfg Obtained from: https://github.com/NuxiNL/cloudlibc Differential Revision: https://reviews.freebsd.org/D4644 2015-12-27 07:50:11 +00:00			`struct __hsearch *__hsearch;`
Add re-entrant versions of the hash functions based on the GNU api. While testing this I found a conformance issue in hdestroy() that will be fixed in a subsequent commit. Obtained from: NetBSD (hcreate.c, CVS Rev. 1.7) 2014-07-21 15:22:48 +00:00			`};`
			`#endif`

bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`__BEGIN_DECLS`
Breath deep and take __P out of the system include files. # This appears to not break X11, but I'm having problems compiling the # glide part of the server with or without this patch, so I can't tell # for sure. 2002-03-23 17:24:55 +00:00			`int hcreate(size_t);`
			`void hdestroy(void);`
			`ENTRY *hsearch(ENTRY, ACTION);`
Revert r268826: The current ordering of this header is a feature as it is more consistent with POSIX. Also adding gratuitous newlines is not elegant. Pointed out by: bde 2014-07-18 16:21:15 +00:00			`void insque(void , void );`
- Remove the lsearch() and lfind() functions and their manpage from the compatibility library libcompat. - Add new implementations of lsearch() and lfind() which conform to IEEE Std 1003.1-2001 to libc. Add a new manual page for them and add them to the makefile. - Add function prototypes for lsearch() and lfind() to the search.h header. 2002-10-16 14:29:23 +00:00			`void lfind(const void , const void , size_t , size_t,`
			`int ()(const void , const void *));`
			`void lsearch(const void , void , size_t , size_t,`
			`int ()(const void , const void *));`
- Remove the old insque() and remque() functions and their manual page from the compatibility library. - Add new implementations of insque() and remque() which conform to IEEE Std 1003.1-2001 to libc. Add a new manual page for them and connect them to the build. - Add the prototypes of insque() and remque() to the search.h header. 2002-10-16 14:00:46 +00:00			`void remque(void *);`
Style: One space between "restrict" qualifier and "*". 2002-09-06 11:24:06 +00:00			`void tdelete(const void __restrict, void ** __restrict,`
- Add the 'restrict' qualifier to match the IEEE Std 1003.1-2001 prototype of the tdelete(3) function. - Remove duplicated space. - Use an ANSI-C function definition for tdelete(3). - Update the manual page. 2002-08-14 21:16:41 +00:00			`int ()(const void , const void *));`
Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`void tfind(const void , void * const *,`
			`int ()(const void , const void *));`
Breath deep and take __P out of the system include files. # This appears to not break X11, but I'm having problems compiling the # glide part of the server with or without this patch, so I can't tell # for sure. 2002-03-23 17:24:55 +00:00			`void tsearch(const void , void *, int ()(const void , const void ));`
Fix various style(9) bugs: o Source ID's in wrong location. o Space used, instead of tab, after typedef. o Unaligned function prototype for twalk(). Other changes: o Add missing const qualifier in tfind(). o Add comment about missing functions. 2002-10-03 06:31:16 +00:00			`void twalk(const void , void ()(const void *, VISIT, int));`
Add re-entrant versions of the hash functions based on the GNU api. While testing this I found a conformance issue in hdestroy() that will be fixed in a subsequent commit. Obtained from: NetBSD (hcreate.c, CVS Rev. 1.7) 2014-07-21 15:22:48 +00:00
			`#if __BSD_VISIBLE`
			`int hcreate_r(size_t, struct hsearch_data *);`
			`void hdestroy_r(struct hsearch_data *);`
			`int hsearch_r(ENTRY, ACTION, ENTRY *, struct hsearch_data );`
			`#endif`

bring in binary search tree code. Obtained from: NetBSD 2000-07-01 06:55:11 +00:00			`__END_DECLS`

			`#endif /* !_SEARCH_H_ */`