Add the strhash family of routines. They provide a number of features

that the db/hash functions don't, and they're much simpler to use for low-overhead string hashing.
1995-03-26 10:21:55 +00:00 · 1995-03-26 10:21:55 +00:00 · 4796867161
commit 4796867161
parent 7f5473b3cc
2 changed files with 416 additions and 2 deletions
--- a/lib/libc/stdlib/Makefile.inc
+++ b/lib/libc/stdlib/Makefile.inc
@ -4,8 +4,8 @@
 .PATH: ${.CURDIR}/${MACHINE}/stdlib ${.CURDIR}/stdlib

 SRCS+=	abort.c atexit.c atof.c atoi.c atol.c bsearch.c calloc.c div.c \
-	exit.c getenv.c getopt.c getsubopt.c heapsort.c labs.c ldiv.c \
-	malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \
+	exit.c getenv.c getopt.c getsubopt.c strhash.c heapsort.c labs.c \
+	ldiv.c malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \
 	realpath.c setenv.c strtod.c strtol.c strtoq.c strtoul.c \
 	strtouq.c system.c

--- a/lib/libc/stdlib/strhash.c
+++ b/lib/libc/stdlib/strhash.c
@ -0,0 +1,414 @@
+#ifndef lint
+static char *rcsid = "$Header: /home/ncvs/src/usr.bin/dmenu/hash.c,v 1.1 1995/02/25 02:16:34 jkh Exp $";
+#endif
+
+/*
+ *
+ *                      Copyright 1990
+ *               Terry Jones & Jordan Hubbard
+ *
+ *		  PCS Computer Systeme, GmbH.
+ *	             Munich, West Germany
+ *
+ *
+ *  All rights reserved.
+ * 
+ *  This is unsupported software and is subject to change without notice.
+ *  the author makes no representations about the suitability of this software
+ *  for any purpose. It is supplied "as is" without express or implied
+ *  warranty.
+ * 
+ *  Permission to use, copy, modify, and distribute this software and its
+ *  documentation for any purpose and without fee is hereby granted, provided
+ *  that the above copyright notice appear in all copies and that both that
+ *  copyright notice and this permission notice appear in supporting
+ *  documentation, and that the name of the author not be used in
+ *  advertising or publicity pertaining to distribution of the software
+ *  without specific, written prior permission.
+ *
+ */
+
+/*
+ * This is a fairly simple open addressing hash scheme.
+ * Terry did all the code, I just did the spec.
+ * Thanks again, you crazy Aussie..
+ * 
+ */
+
+/*
+ * $Log: hash.c,v $
+ * Revision 1.1  1995/02/25  02:16:34  jkh
+ * Second version of this - now support the essentials of a basic
+ * attributed file system for storing menu information and command
+ * templates.  This is not finished yet, but it does compile so I can
+ * commit it to the tree now and continue working on it.
+ *
+ * Revision 2.0  90/03/26  01:44:26  jkh
+ * pre-beta check-in
+ * 
+ * Revision 1.8  90/03/09  19:22:35  jkh
+ * Fixed bogus comment.
+ * 
+ * Revision 1.7  90/03/09  19:01:08  jkh
+ * Added comments, GPL.
+ * 
+ * Revision 1.6  90/03/08  17:55:58  terry
+ * Rearranged hash_purge to be a tiny bit more efficient.
+ * Added verbose option to hash_stats.
+ * 
+ * Revision 1.5  90/03/08  17:19:54  terry
+ * Added hash_purge. Added arg to hash_traverse. Changed all
+ * void * to Generic.
+ * 
+ * Revision 1.4  90/03/08  12:02:35  terry
+ * Fixed problems with allocation that I screwed up last night.
+ * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
+ * 
+ * Revision 1.3  90/03/07  21:33:33  terry
+ * Cleaned up a few decls to keep gcc -Wall quiet.
+ * 
+ * Revision 1.2  90/03/07  21:14:53  terry
+ * Comments. Added HASH_STATS define. Removed hash_find()
+ * and new_node().
+ * 
+ * Revision 1.1  90/03/07  20:49:45  terry
+ * Initial revision
+ * 
+ *
+ */
+
+
+#include <string.h>
+#include <sys/types.h>
+#include <strhash.h>
+
+#define HASH_NULL    (hash_table *)0
+#define NODE_NULL    (hash_node *)0
+#define GENERIC_NULL (void *)0
+
+#define HASH_SZ 97
+
+
+static int _hash(int size, char *key);
+static hash_node *list_find(caddr_t key, hash_node *head);
+
+
+/*
+ * hash_create()
+ *
+ * Malloc room for a new hash table and then room for its
+ * bucket pointers. Then set all the buckets to
+ * point to 0. Return the address of the new table.
+ */
+hash_table *
+hash_create(int size)
+{
+    register int i;  
+    hash_table *new = (hash_table *)malloc(sizeof(hash_table));
+
+    if (!new || size < 0){
+	return HASH_NULL;
+    }
+    
+    if (size == 0){
+	size = HASH_SZ;
+    }
+    
+    if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
+	return HASH_NULL;
+    }
+
+    for (i = 0; i < size; i++){
+	new->buckets[i] = NODE_NULL;
+    } 
+    new->size = size;
+    
+    return new;
+}
+
+
+/*
+ * list_find()
+ *
+ * Find the key in the linked list pointed to by head.
+ */
+static hash_node *
+list_find(caddr_t key, hash_node *head)
+{
+    while (head){
+	if (!strcmp(head->key, key)){
+	    return head;
+	}
+	head = head->next;
+    }
+    return NODE_NULL;
+}
+
+
+/*
+ * _hash()
+ *
+ * Compute the hash value for the given key.
+ */
+static int
+_hash(int size, char *key)
+{
+    unsigned int h = 0x0;
+
+    while (*key){
+	h = (h << 1) ^ (h ^ *key++);
+    }	
+
+    h %= size;
+    return h;
+}
+
+/*
+ * hash_destroy()
+ *
+ * Find the key and (if it's there) remove it entirely.
+ * The function (*nukefunc)() is in charge of disposing
+ * of the storage help by the data associated with the node.
+ */
+void 
+hash_destroy(hash_table *table, char *key, void (*nukefunc)())
+{
+    int bucket = _hash(table->size, key);
+    hash_node *found = table->buckets[bucket];
+    hash_node *to_free = NODE_NULL;
+    
+    if (!found) {
+	return;
+    }
+    
+    if (!strcmp(found->key, key)) {
+	/*
+	 * It was the head of the list.
+	 */
+	table->buckets[bucket] = found->next;
+	to_free = found;
+    }
+    else {
+	/*
+	 * Walk the list, looking one ahead.
+	 */
+	while (found->next) {
+	    if (!strcmp(found->next->key, key)) {
+		to_free = found->next;
+		found->next = found->next->next;
+		break;
+	    }
+	    found = found->next;
+	}
+	
+	if (!to_free){
+	    return;
+	}
+    }
+    
+    if (nukefunc)
+	(*nukefunc)(to_free->key, to_free->data);
+    free(to_free);
+    return;
+}
+
+
+/*
+ * hash_search()
+ *
+ * Search the table for the given key. Then:
+ *
+ * 1) If you find it and there is no replacement function, just 
+ *    return what you found. (This is a simple search). 
+ * 2) If you find it and there is a replacement function, run
+ *    the function on the data you found, and replace the old
+ *    data with whatever is passed in datum. Return 0.
+ * 3) If you don't find it and there is some datum, insert a
+ *    new item into the table. Insertions go at the front of
+ *    the bucket. Return 0.
+ * 4) Otherwise just return 0.
+ *
+ */
+void *
+hash_search(hash_table *table, caddr_t key, void *datum,
+	    void (*replace_func)())
+{
+    int bucket = hash(table->size, key);
+    hash_node *found = list_find(key, table->buckets[bucket]);
+
+    if (found){
+	if (!replace_func){
+	    return found->data;
+	}
+	else{
+	    (*replace_func)(found->data);
+	    found->data = datum;
+	}
+    }
+    else{
+	if (datum){
+	    
+	    static int assign_key();
+	    
+	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));
+	    
+	    if (!new || !assign_key(key, new)){
+		return GENERIC_NULL;
+	    }
+	    new->data = datum;
+	    new->next = table->buckets[bucket];
+	    table->buckets[bucket] = new;
+	}
+    }
+    return GENERIC_NULL;
+}
+
+
+/*
+ * assign_key()
+ *
+ * Set the key value of a node to be 'key'. Get some space from 
+ * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
+ */
+static int
+assign_key(char *key, hash_node *node)
+{
+    if (!node || !key){
+	return 0;
+    }
+    
+    if (!(node->key = (char *)malloc(strlen(key) + 1))){
+	return 0;
+    }
+    
+    node->key[0] = '\0';
+    strcat(node->key, key);
+    return 1;
+}
+
+/*
+ * hash_traverse()
+ *
+ * Traverse the hash table and run the function func on the
+ * data found at each node and the argument we're passed for it.
+ */
+void
+hash_traverse(hash_table *table, int (*func)(), void *arg)
+{
+    register int i; 
+    register int size = table->size;
+
+    if (!func)
+	return;
+    
+    for (i = 0; i < size; i++) {
+	hash_node *n = table->buckets[i];
+	while (n) {
+	    if ((*func)(n->key, n->data, arg) == 0)
+		return;
+	    n = n->next;
+	}
+    } 
+    return;
+}
+
+/*
+ * hash_purge()
+ *
+ * Run through the entire hash table. Call purge_func
+ * on the data found at each node, and then free the node.
+ * Set all the bucket pointers to 0.
+ */
+void
+hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
+{
+    register int i; 
+    register int size = table->size;
+    
+    for (i = 0; i < size; i++) {
+	hash_node *n = table->buckets[i];
+	if (n) {
+	    do {
+		hash_node *to_free = n;
+		if (purge_func) {
+		    (*purge_func)(n->key, n->data);
+		}
+		n = n->next;
+		free(to_free);
+	    } while (n);
+	    table->buckets[i] = NODE_NULL;
+	}
+    } 
+}
+
+#ifdef HASH_STATS
+#include <stdio.h>
+#define min(a, b) (a) < (b) ? (a) : (b)
+
+/*
+ * hash_stats()
+ *
+ * Print statistics about the current table allocation to stdout.
+ */
+void
+hash_stats(hash_table *table, int verbose)
+{
+    register int i; 
+    int total_elements = 0;
+    int non_empty_buckets = 0;
+    int max_count = 0;
+    int max_repeats = 0;
+    int *counts;
+    int size = table->size;
+    
+    if (!(counts = (int *)malloc(size * sizeof(int)))){
+	fprintf(stderr, "malloc returns 0\n");
+	exit(1); 
+    }
+    
+    for (i = 0; i < size; i++){
+	int x = 0;
+	hash_node *n = table->buckets[i];
+	counts[i] = 0;
+	while (n){
+	    if (!x){
+		x = 1;
+		non_empty_buckets++;
+		if (verbose){
+		    printf("bucket %2d: ", i);
+		}
+	    }
+	    if (verbose){
+		printf(" %s", n->key);
+	    }
+	    counts[i]++;
+	    n = n->next;
+	}
+	
+	total_elements += counts[i];
+	if (counts[i] > max_count){
+	    max_count = counts[i];
+	    max_repeats = 1;
+	}
+	else if (counts[i] == max_count){
+	    max_repeats++;
+	}
+	
+	if (counts[i] && verbose){
+	    printf(" (%d)\n", counts[i]);
+	}
+    } 
+    
+    printf("\n");
+    printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
+    
+    if (total_elements){
+	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size, 
+	       (double)100 * (double)non_empty_buckets / (double)(size));
+	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
+	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
+	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
+    }
+    return;
+}
+#endif /* HASH_STATS */