Add the strhash family of routines. They provide a number of features
that the db/hash functions don't, and they're much simpler to use for low-overhead string hashing.
This commit is contained in:
parent
7f5473b3cc
commit
4796867161
@ -4,8 +4,8 @@
|
||||
.PATH: ${.CURDIR}/${MACHINE}/stdlib ${.CURDIR}/stdlib
|
||||
|
||||
SRCS+= abort.c atexit.c atof.c atoi.c atol.c bsearch.c calloc.c div.c \
|
||||
exit.c getenv.c getopt.c getsubopt.c heapsort.c labs.c ldiv.c \
|
||||
malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \
|
||||
exit.c getenv.c getopt.c getsubopt.c strhash.c heapsort.c labs.c \
|
||||
ldiv.c malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \
|
||||
realpath.c setenv.c strtod.c strtol.c strtoq.c strtoul.c \
|
||||
strtouq.c system.c
|
||||
|
||||
|
414
lib/libc/stdlib/strhash.c
Normal file
414
lib/libc/stdlib/strhash.c
Normal file
@ -0,0 +1,414 @@
|
||||
#ifndef lint
|
||||
static char *rcsid = "$Header: /home/ncvs/src/usr.bin/dmenu/hash.c,v 1.1 1995/02/25 02:16:34 jkh Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
*
|
||||
* Copyright 1990
|
||||
* Terry Jones & Jordan Hubbard
|
||||
*
|
||||
* PCS Computer Systeme, GmbH.
|
||||
* Munich, West Germany
|
||||
*
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This is unsupported software and is subject to change without notice.
|
||||
* the author makes no representations about the suitability of this software
|
||||
* for any purpose. It is supplied "as is" without express or implied
|
||||
* warranty.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and its
|
||||
* documentation for any purpose and without fee is hereby granted, provided
|
||||
* that the above copyright notice appear in all copies and that both that
|
||||
* copyright notice and this permission notice appear in supporting
|
||||
* documentation, and that the name of the author not be used in
|
||||
* advertising or publicity pertaining to distribution of the software
|
||||
* without specific, written prior permission.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is a fairly simple open addressing hash scheme.
|
||||
* Terry did all the code, I just did the spec.
|
||||
* Thanks again, you crazy Aussie..
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* $Log: hash.c,v $
|
||||
* Revision 1.1 1995/02/25 02:16:34 jkh
|
||||
* Second version of this - now support the essentials of a basic
|
||||
* attributed file system for storing menu information and command
|
||||
* templates. This is not finished yet, but it does compile so I can
|
||||
* commit it to the tree now and continue working on it.
|
||||
*
|
||||
* Revision 2.0 90/03/26 01:44:26 jkh
|
||||
* pre-beta check-in
|
||||
*
|
||||
* Revision 1.8 90/03/09 19:22:35 jkh
|
||||
* Fixed bogus comment.
|
||||
*
|
||||
* Revision 1.7 90/03/09 19:01:08 jkh
|
||||
* Added comments, GPL.
|
||||
*
|
||||
* Revision 1.6 90/03/08 17:55:58 terry
|
||||
* Rearranged hash_purge to be a tiny bit more efficient.
|
||||
* Added verbose option to hash_stats.
|
||||
*
|
||||
* Revision 1.5 90/03/08 17:19:54 terry
|
||||
* Added hash_purge. Added arg to hash_traverse. Changed all
|
||||
* void * to Generic.
|
||||
*
|
||||
* Revision 1.4 90/03/08 12:02:35 terry
|
||||
* Fixed problems with allocation that I screwed up last night.
|
||||
* Changed bucket lists to be singly linked. Thanks to JKH, my hero.
|
||||
*
|
||||
* Revision 1.3 90/03/07 21:33:33 terry
|
||||
* Cleaned up a few decls to keep gcc -Wall quiet.
|
||||
*
|
||||
* Revision 1.2 90/03/07 21:14:53 terry
|
||||
* Comments. Added HASH_STATS define. Removed hash_find()
|
||||
* and new_node().
|
||||
*
|
||||
* Revision 1.1 90/03/07 20:49:45 terry
|
||||
* Initial revision
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <strhash.h>
|
||||
|
||||
#define HASH_NULL (hash_table *)0
|
||||
#define NODE_NULL (hash_node *)0
|
||||
#define GENERIC_NULL (void *)0
|
||||
|
||||
#define HASH_SZ 97
|
||||
|
||||
|
||||
static int _hash(int size, char *key);
|
||||
static hash_node *list_find(caddr_t key, hash_node *head);
|
||||
|
||||
|
||||
/*
|
||||
* hash_create()
|
||||
*
|
||||
* Malloc room for a new hash table and then room for its
|
||||
* bucket pointers. Then set all the buckets to
|
||||
* point to 0. Return the address of the new table.
|
||||
*/
|
||||
hash_table *
|
||||
hash_create(int size)
|
||||
{
|
||||
register int i;
|
||||
hash_table *new = (hash_table *)malloc(sizeof(hash_table));
|
||||
|
||||
if (!new || size < 0){
|
||||
return HASH_NULL;
|
||||
}
|
||||
|
||||
if (size == 0){
|
||||
size = HASH_SZ;
|
||||
}
|
||||
|
||||
if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
|
||||
return HASH_NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < size; i++){
|
||||
new->buckets[i] = NODE_NULL;
|
||||
}
|
||||
new->size = size;
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* list_find()
|
||||
*
|
||||
* Find the key in the linked list pointed to by head.
|
||||
*/
|
||||
static hash_node *
|
||||
list_find(caddr_t key, hash_node *head)
|
||||
{
|
||||
while (head){
|
||||
if (!strcmp(head->key, key)){
|
||||
return head;
|
||||
}
|
||||
head = head->next;
|
||||
}
|
||||
return NODE_NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _hash()
|
||||
*
|
||||
* Compute the hash value for the given key.
|
||||
*/
|
||||
static int
|
||||
_hash(int size, char *key)
|
||||
{
|
||||
unsigned int h = 0x0;
|
||||
|
||||
while (*key){
|
||||
h = (h << 1) ^ (h ^ *key++);
|
||||
}
|
||||
|
||||
h %= size;
|
||||
return h;
|
||||
}
|
||||
|
||||
/*
|
||||
* hash_destroy()
|
||||
*
|
||||
* Find the key and (if it's there) remove it entirely.
|
||||
* The function (*nukefunc)() is in charge of disposing
|
||||
* of the storage help by the data associated with the node.
|
||||
*/
|
||||
void
|
||||
hash_destroy(hash_table *table, char *key, void (*nukefunc)())
|
||||
{
|
||||
int bucket = _hash(table->size, key);
|
||||
hash_node *found = table->buckets[bucket];
|
||||
hash_node *to_free = NODE_NULL;
|
||||
|
||||
if (!found) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!strcmp(found->key, key)) {
|
||||
/*
|
||||
* It was the head of the list.
|
||||
*/
|
||||
table->buckets[bucket] = found->next;
|
||||
to_free = found;
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* Walk the list, looking one ahead.
|
||||
*/
|
||||
while (found->next) {
|
||||
if (!strcmp(found->next->key, key)) {
|
||||
to_free = found->next;
|
||||
found->next = found->next->next;
|
||||
break;
|
||||
}
|
||||
found = found->next;
|
||||
}
|
||||
|
||||
if (!to_free){
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (nukefunc)
|
||||
(*nukefunc)(to_free->key, to_free->data);
|
||||
free(to_free);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* hash_search()
|
||||
*
|
||||
* Search the table for the given key. Then:
|
||||
*
|
||||
* 1) If you find it and there is no replacement function, just
|
||||
* return what you found. (This is a simple search).
|
||||
* 2) If you find it and there is a replacement function, run
|
||||
* the function on the data you found, and replace the old
|
||||
* data with whatever is passed in datum. Return 0.
|
||||
* 3) If you don't find it and there is some datum, insert a
|
||||
* new item into the table. Insertions go at the front of
|
||||
* the bucket. Return 0.
|
||||
* 4) Otherwise just return 0.
|
||||
*
|
||||
*/
|
||||
void *
|
||||
hash_search(hash_table *table, caddr_t key, void *datum,
|
||||
void (*replace_func)())
|
||||
{
|
||||
int bucket = hash(table->size, key);
|
||||
hash_node *found = list_find(key, table->buckets[bucket]);
|
||||
|
||||
if (found){
|
||||
if (!replace_func){
|
||||
return found->data;
|
||||
}
|
||||
else{
|
||||
(*replace_func)(found->data);
|
||||
found->data = datum;
|
||||
}
|
||||
}
|
||||
else{
|
||||
if (datum){
|
||||
|
||||
static int assign_key();
|
||||
|
||||
hash_node *new = (hash_node *)malloc(sizeof(hash_node));
|
||||
|
||||
if (!new || !assign_key(key, new)){
|
||||
return GENERIC_NULL;
|
||||
}
|
||||
new->data = datum;
|
||||
new->next = table->buckets[bucket];
|
||||
table->buckets[bucket] = new;
|
||||
}
|
||||
}
|
||||
return GENERIC_NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* assign_key()
|
||||
*
|
||||
* Set the key value of a node to be 'key'. Get some space from
|
||||
* malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
|
||||
*/
|
||||
static int
|
||||
assign_key(char *key, hash_node *node)
|
||||
{
|
||||
if (!node || !key){
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(node->key = (char *)malloc(strlen(key) + 1))){
|
||||
return 0;
|
||||
}
|
||||
|
||||
node->key[0] = '\0';
|
||||
strcat(node->key, key);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* hash_traverse()
|
||||
*
|
||||
* Traverse the hash table and run the function func on the
|
||||
* data found at each node and the argument we're passed for it.
|
||||
*/
|
||||
void
|
||||
hash_traverse(hash_table *table, int (*func)(), void *arg)
|
||||
{
|
||||
register int i;
|
||||
register int size = table->size;
|
||||
|
||||
if (!func)
|
||||
return;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
hash_node *n = table->buckets[i];
|
||||
while (n) {
|
||||
if ((*func)(n->key, n->data, arg) == 0)
|
||||
return;
|
||||
n = n->next;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* hash_purge()
|
||||
*
|
||||
* Run through the entire hash table. Call purge_func
|
||||
* on the data found at each node, and then free the node.
|
||||
* Set all the bucket pointers to 0.
|
||||
*/
|
||||
void
|
||||
hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
|
||||
{
|
||||
register int i;
|
||||
register int size = table->size;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
hash_node *n = table->buckets[i];
|
||||
if (n) {
|
||||
do {
|
||||
hash_node *to_free = n;
|
||||
if (purge_func) {
|
||||
(*purge_func)(n->key, n->data);
|
||||
}
|
||||
n = n->next;
|
||||
free(to_free);
|
||||
} while (n);
|
||||
table->buckets[i] = NODE_NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HASH_STATS
|
||||
#include <stdio.h>
|
||||
#define min(a, b) (a) < (b) ? (a) : (b)
|
||||
|
||||
/*
|
||||
* hash_stats()
|
||||
*
|
||||
* Print statistics about the current table allocation to stdout.
|
||||
*/
|
||||
void
|
||||
hash_stats(hash_table *table, int verbose)
|
||||
{
|
||||
register int i;
|
||||
int total_elements = 0;
|
||||
int non_empty_buckets = 0;
|
||||
int max_count = 0;
|
||||
int max_repeats = 0;
|
||||
int *counts;
|
||||
int size = table->size;
|
||||
|
||||
if (!(counts = (int *)malloc(size * sizeof(int)))){
|
||||
fprintf(stderr, "malloc returns 0\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i = 0; i < size; i++){
|
||||
int x = 0;
|
||||
hash_node *n = table->buckets[i];
|
||||
counts[i] = 0;
|
||||
while (n){
|
||||
if (!x){
|
||||
x = 1;
|
||||
non_empty_buckets++;
|
||||
if (verbose){
|
||||
printf("bucket %2d: ", i);
|
||||
}
|
||||
}
|
||||
if (verbose){
|
||||
printf(" %s", n->key);
|
||||
}
|
||||
counts[i]++;
|
||||
n = n->next;
|
||||
}
|
||||
|
||||
total_elements += counts[i];
|
||||
if (counts[i] > max_count){
|
||||
max_count = counts[i];
|
||||
max_repeats = 1;
|
||||
}
|
||||
else if (counts[i] == max_count){
|
||||
max_repeats++;
|
||||
}
|
||||
|
||||
if (counts[i] && verbose){
|
||||
printf(" (%d)\n", counts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
|
||||
|
||||
if (total_elements){
|
||||
printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
|
||||
(double)100 * (double)non_empty_buckets / (double)(size));
|
||||
printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
|
||||
printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
|
||||
printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif /* HASH_STATS */
|
Loading…
Reference in New Issue
Block a user