freebsd-dev/lib/libc/stdlib/strhash.c
1996-01-13 14:25:04 +00:00

437 lines
9.8 KiB
C

#ifndef lint
static char *rcsid = "$Header: /home/ncvs/src/lib/libc/stdlib/strhash.c,v 1.5 1995/10/22 14:53:17 phk Exp $";
#endif
/*
*
* Copyright 1990
* Terry Jones & Jordan Hubbard
*
* PCS Computer Systeme, GmbH.
* Munich, West Germany
*
*
* All rights reserved.
*
* This is unsupported software and is subject to change without notice.
* the author makes no representations about the suitability of this software
* for any purpose. It is supplied "as is" without express or implied
* warranty.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose and without fee is hereby granted, provided
* that the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of the author not be used in
* advertising or publicity pertaining to distribution of the software
* without specific, written prior permission.
*
*/
/*
* This is a fairly simple open addressing hash scheme.
* Terry did all the code, I just did the spec.
* Thanks again, you crazy Aussie..
*
*/
/*
* $Log: strhash.c,v $
* Revision 1.5 1995/10/22 14:53:17 phk
* Mino cleanup, #includes & unused vars.
*
* Revision 1.4 1995/05/30 05:41:55 rgrimes
* Remove trailing whitespace.
*
* Revision 1.3 1995/03/28 08:41:02 jkh
* Fix a missing _hash() to prevent namespace pollution with the db/hash routines.
* Grrr. If the dbhash routines weren't grossly overengineered I wouldn't
* even need to do this! :-(
*
* Also now export the hash_stats routine. Manpage coming RSN - I promise.
*
* Revision 1.2 1995/03/26 19:32:24 ache
* Hash 8bit chars without sign extension
*
* Revision 1.1 1995/03/26 10:21:55 jkh
* Add the strhash family of routines. They provide a number of features
* that the db/hash functions don't, and they're much simpler to use for
* low-overhead string hashing.
*
* Revision 1.1 1995/02/25 02:16:34 jkh
* Second version of this - now support the essentials of a basic
* attributed file system for storing menu information and command
* templates. This is not finished yet, but it does compile so I can
* commit it to the tree now and continue working on it.
*
* Revision 2.0 90/03/26 01:44:26 jkh
* pre-beta check-in
*
* Revision 1.8 90/03/09 19:22:35 jkh
* Fixed bogus comment.
*
* Revision 1.7 90/03/09 19:01:08 jkh
* Added comments, GPL.
*
* Revision 1.6 90/03/08 17:55:58 terry
* Rearranged hash_purge to be a tiny bit more efficient.
* Added verbose option to hash_stats.
*
* Revision 1.5 90/03/08 17:19:54 terry
* Added hash_purge. Added arg to hash_traverse. Changed all
* void * to Generic.
*
* Revision 1.4 90/03/08 12:02:35 terry
* Fixed problems with allocation that I screwed up last night.
* Changed bucket lists to be singly linked. Thanks to JKH, my hero.
*
* Revision 1.3 90/03/07 21:33:33 terry
* Cleaned up a few decls to keep gcc -Wall quiet.
*
* Revision 1.2 90/03/07 21:14:53 terry
* Comments. Added HASH_STATS define. Removed hash_find()
* and new_node().
*
* Revision 1.1 90/03/07 20:49:45 terry
* Initial revision
*
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <strhash.h>
#define HASH_NULL (hash_table *)0
#define NODE_NULL (hash_node *)0
#define GENERIC_NULL (void *)0
#define HASH_SZ 97
static int _hash(int size, char *key);
static hash_node *list_find(caddr_t key, hash_node *head);
/*
* hash_create()
*
* Malloc room for a new hash table and then room for its
* bucket pointers. Then set all the buckets to
* point to 0. Return the address of the new table.
*/
hash_table *
hash_create(int size)
{
register int i;
hash_table *new = (hash_table *)malloc(sizeof(hash_table));
if (!new || size < 0){
return HASH_NULL;
}
if (size == 0){
size = HASH_SZ;
}
if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
return HASH_NULL;
}
for (i = 0; i < size; i++){
new->buckets[i] = NODE_NULL;
}
new->size = size;
return new;
}
/*
* list_find()
*
* Find the key in the linked list pointed to by head.
*/
static hash_node *
list_find(caddr_t key, hash_node *head)
{
while (head){
if (!strcmp(head->key, key)){
return head;
}
head = head->next;
}
return NODE_NULL;
}
/*
* _hash()
*
* Compute the hash value for the given key.
*/
static int
_hash(int size, char *key)
{
unsigned int h = 0x0;
while (*key){
h = (h << 1) ^ (h ^ (unsigned char) *key++);
}
h %= size;
return h;
}
/*
* hash_destroy()
*
* Find the key and (if it's there) remove it entirely.
* The function (*nukefunc)() is in charge of disposing
* of the storage help by the data associated with the node.
*/
void
hash_destroy(hash_table *table, char *key, void (*nukefunc)())
{
int bucket = _hash(table->size, key);
hash_node *found = table->buckets[bucket];
hash_node *to_free = NODE_NULL;
if (!found) {
return;
}
if (!strcmp(found->key, key)) {
/*
* It was the head of the list.
*/
table->buckets[bucket] = found->next;
to_free = found;
}
else {
/*
* Walk the list, looking one ahead.
*/
while (found->next) {
if (!strcmp(found->next->key, key)) {
to_free = found->next;
found->next = found->next->next;
break;
}
found = found->next;
}
if (!to_free){
return;
}
}
if (nukefunc)
(*nukefunc)(to_free->key, to_free->data);
free(to_free);
return;
}
/*
* hash_search()
*
* Search the table for the given key. Then:
*
* 1) If you find it and there is no replacement function, just
* return what you found. (This is a simple search).
* 2) If you find it and there is a replacement function, run
* the function on the data you found, and replace the old
* data with whatever is passed in datum. Return 0.
* 3) If you don't find it and there is some datum, insert a
* new item into the table. Insertions go at the front of
* the bucket. Return 0.
* 4) Otherwise just return 0.
*
*/
void *
hash_search(hash_table *table, caddr_t key, void *datum,
void (*replace_func)())
{
int bucket = _hash(table->size, key);
hash_node *found = list_find(key, table->buckets[bucket]);
if (found){
if (!replace_func){
return found->data;
}
else{
(*replace_func)(found->data);
found->data = datum;
}
}
else{
if (datum){
static int assign_key();
hash_node *new = (hash_node *)malloc(sizeof(hash_node));
if (!new || !assign_key(key, new)){
return GENERIC_NULL;
}
new->data = datum;
new->next = table->buckets[bucket];
table->buckets[bucket] = new;
return new;
}
}
return GENERIC_NULL;
}
/*
* assign_key()
*
* Set the key value of a node to be 'key'. Get some space from
* malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
*/
static int
assign_key(char *key, hash_node *node)
{
if (!node || !key){
return 0;
}
if (!(node->key = (char *)malloc(strlen(key) + 1))){
return 0;
}
node->key[0] = '\0';
strcat(node->key, key);
return 1;
}
/*
* hash_traverse()
*
* Traverse the hash table and run the function func on the
* data found at each node and the argument we're passed for it.
*/
void
hash_traverse(hash_table *table, int (*func)(), void *arg)
{
register int i;
register int size = table->size;
if (!func)
return;
for (i = 0; i < size; i++) {
hash_node *n = table->buckets[i];
while (n) {
if ((*func)(n->key, n->data, arg) == 0)
return;
n = n->next;
}
}
return;
}
/*
* hash_purge()
*
* Run through the entire hash table. Call purge_func
* on the data found at each node, and then free the node.
* Set all the bucket pointers to 0.
*/
void
hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
{
register int i;
register int size = table->size;
for (i = 0; i < size; i++) {
hash_node *n = table->buckets[i];
if (n) {
do {
hash_node *to_free = n;
if (purge_func) {
(*purge_func)(n->key, n->data);
}
n = n->next;
free(to_free);
} while (n);
table->buckets[i] = NODE_NULL;
}
}
}
#undef min
#define min(a, b) (a) < (b) ? (a) : (b)
/*
* hash_stats()
*
* Print statistics about the current table allocation to stdout.
*/
void
hash_stats(hash_table *table, int verbose)
{
register int i;
int total_elements = 0;
int non_empty_buckets = 0;
int max_count = 0;
int max_repeats = 0;
int *counts;
int size = table->size;
if (!(counts = (int *)malloc(size * sizeof(int)))){
fprintf(stderr, "malloc returns 0\n");
exit(1);
}
for (i = 0; i < size; i++){
int x = 0;
hash_node *n = table->buckets[i];
counts[i] = 0;
while (n){
if (!x){
x = 1;
non_empty_buckets++;
if (verbose){
printf("bucket %2d: ", i);
}
}
if (verbose){
printf(" %s", n->key);
}
counts[i]++;
n = n->next;
}
total_elements += counts[i];
if (counts[i] > max_count){
max_count = counts[i];
max_repeats = 1;
}
else if (counts[i] == max_count){
max_repeats++;
}
if (counts[i] && verbose){
printf(" (%d)\n", counts[i]);
}
}
printf("\n");
printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
if (total_elements){
printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
(double)100 * (double)non_empty_buckets / (double)(size));
printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
}
return;
}