| /* |
| * Copyright 1993, 1995 Christopher Seiwald. |
| * |
| * This file is part of Jam - see jam.c for Copyright information. |
| */ |
| |
| # include "jam.h" |
| # include "hash.h" |
| # include "compile.h" |
| # include <assert.h> |
| |
| /* |
| * hash.c - simple in-memory hashing routines |
| * |
| * External routines: |
| * |
| * hashinit() - initialize a hash table, returning a handle |
| * hashitem() - find a record in the table, and optionally enter a new one |
| * hashdone() - free a hash table, given its handle |
| * |
| * Internal routines: |
| * |
| * hashrehash() - resize and rebuild hp->tab, the hash table |
| * |
| * 4/29/93 - ensure ITEM's are aligned |
| */ |
| |
| /* */ |
| #define HASH_DEBUG_PROFILE 1 |
| /* */ |
| |
| char *hashsccssid="@(#)hash.c 1.14 () 6/20/88"; |
| |
| /* Header attached to all data items entered into a hash table. */ |
| |
| struct hashhdr |
| { |
| struct item * next; |
| unsigned int keyval; /* for quick comparisons */ |
| }; |
| |
| /* This structure overlays the one handed to hashenter(). Its actual size is |
| * given to hashinit(). |
| */ |
| |
| struct hashdata |
| { |
| char * key; |
| /* rest of user data */ |
| }; |
| |
| typedef struct item |
| { |
| struct hashhdr hdr; |
| struct hashdata data; |
| } ITEM ; |
| |
| # define MAX_LISTS 32 |
| |
| struct hash |
| { |
| /* |
| * the hash table, just an array of item pointers |
| */ |
| struct { |
| int nel; |
| ITEM **base; |
| } tab; |
| |
| int bloat; /* tab.nel / items.nel */ |
| int inel; /* initial number of elements */ |
| |
| /* |
| * the array of records, maintained by these routines |
| * essentially a microallocator |
| */ |
| struct { |
| int more; /* how many more ITEMs fit in lists[ list ] */ |
| ITEM *free; /* free list of items */ |
| char *next; /* where to put more ITEMs in lists[ list ] */ |
| int datalen; /* length of records in this hash table */ |
| int size; /* sizeof( ITEM ) + aligned datalen */ |
| int nel; /* total ITEMs held by all lists[] */ |
| int list; /* index into lists[] */ |
| |
| struct { |
| int nel; /* total ITEMs held by this list */ |
| char *base; /* base of ITEMs array */ |
| } lists[ MAX_LISTS ]; |
| } items; |
| |
| char * name; /* just for hashstats() */ |
| }; |
| |
| static void hashrehash( struct hash *hp ); |
| static void hashstat( struct hash *hp ); |
| static void * hash_mem_alloc(size_t datalen, size_t size); |
| static void hash_mem_free(size_t datalen, void * data); |
| #ifdef OPT_BOEHM_GC |
| static void hash_mem_finalizer(char * key, struct hash * hp); |
| #endif |
| |
| static unsigned int jenkins_one_at_a_time_hash(const unsigned char *key) |
| { |
| unsigned int hash = 0; |
| |
| while ( *key ) |
| { |
| hash += *key++; |
| hash += (hash << 10); |
| hash ^= (hash >> 6); |
| } |
| hash += (hash << 3); |
| hash ^= (hash >> 11); |
| hash += (hash << 15); |
| |
| return hash; |
| } |
| |
| /* |
| static unsigned int knuth_hash(const unsigned char *key) |
| { |
| unsigned int keyval = *key; |
| while ( *key ) |
| keyval = keyval * 2147059363 + *key++; |
| return keyval; |
| } |
| */ |
| |
| static unsigned int hash_keyval( const char * key_ ) |
| { |
| /* |
| return knuth_hash((const unsigned char *)key_); |
| */ |
| return jenkins_one_at_a_time_hash((const unsigned char *)key_); |
| } |
| |
| #define hash_bucket(hp,keyval) ((hp)->tab.base + ( (keyval) % (hp)->tab.nel )) |
| |
| /* Find the hash item for the given data. Returns pointer to the |
| item and if given a pointer to the item before the found item. |
| If it's the first item in a bucket, there is no previous item, |
| and zero is returned for the previous item instead. |
| */ |
| static ITEM * hash_search( |
| struct hash *hp, |
| unsigned int keyval, |
| const char * keydata, |
| ITEM * * previous ) |
| { |
| ITEM * i = *hash_bucket(hp,keyval); |
| ITEM * p = 0; |
| |
| for ( ; i; i = i->hdr.next ) |
| { |
| if ( ( keyval == i->hdr.keyval ) && |
| !strcmp( i->data.key, keydata ) ) |
| { |
| if (previous) |
| { |
| *previous = p; |
| } |
| return i; |
| } |
| p = i; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * hash_free() - remove the given item from the table if it's there. |
| * Returns 1 if found, 0 otherwise. |
| * |
| * NOTE: 2nd argument is HASHDATA*, not HASHDATA** as elsewhere. |
| */ |
| int |
| hash_free( |
| register struct hash *hp, |
| HASHDATA *data) |
| { |
| ITEM * i = 0; |
| ITEM * prev = 0; |
| unsigned int keyval = hash_keyval(data->key); |
| |
| i = hash_search( hp, keyval, data->key, &prev ); |
| if (i) |
| { |
| /* mark it free so we skip it during enumeration */ |
| i->data.key = 0; |
| /* unlink the record from the hash chain */ |
| if (prev) prev->hdr.next = i->hdr.next; |
| else *hash_bucket(hp,keyval) = i->hdr.next; |
| /* link it into the freelist */ |
| i->hdr.next = hp->items.free; |
| hp->items.free = i; |
| /* we have another item */ |
| hp->items.more++; |
| |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * hashitem() - find a record in the table, and optionally enter a new one |
| */ |
| |
| int |
| hashitem( |
| register struct hash *hp, |
| HASHDATA **data, |
| int enter ) |
| { |
| register ITEM *i; |
| char *b = (*data)->key; |
| unsigned int keyval = hash_keyval(b); |
| |
| #ifdef HASH_DEBUG_PROFILE |
| profile_frame prof[1]; |
| if ( DEBUG_PROFILE ) |
| profile_enter( 0, prof ); |
| #endif |
| |
| if ( enter && !hp->items.more ) |
| hashrehash( hp ); |
| |
| if ( !enter && !hp->items.nel ) |
| { |
| #ifdef HASH_DEBUG_PROFILE |
| if ( DEBUG_PROFILE ) |
| profile_exit( prof ); |
| #endif |
| return 0; |
| } |
| |
| i = hash_search( hp, keyval, (*data)->key, 0 ); |
| if (i) |
| { |
| *data = &i->data; |
| #ifdef HASH_DEBUG_PROFILE |
| if ( DEBUG_PROFILE ) profile_exit( prof ); |
| #endif |
| return !0; |
| } |
| |
| if ( enter ) |
| { |
| ITEM * * base = hash_bucket(hp,keyval); |
| |
| /* try to grab one from the free list */ |
| if ( hp->items.free ) |
| { |
| i = hp->items.free; |
| hp->items.free = i->hdr.next; |
| assert( i->data.key == 0 ); |
| } |
| else |
| { |
| i = (ITEM *)hp->items.next; |
| hp->items.next += hp->items.size; |
| } |
| hp->items.more--; |
| memcpy( (char *)&i->data, (char *)*data, hp->items.datalen ); |
| i->hdr.keyval = keyval; |
| i->hdr.next = *base; |
| *base = i; |
| *data = &i->data; |
| #ifdef OPT_BOEHM_GC |
| if (sizeof(HASHDATA) == hp->items.datalen) |
| { |
| GC_REGISTER_FINALIZER(i->data.key,&hash_mem_finalizer,hp,0,0); |
| } |
| #endif |
| } |
| |
| #ifdef HASH_DEBUG_PROFILE |
| if ( DEBUG_PROFILE ) |
| profile_exit( prof ); |
| #endif |
| return 0; |
| } |
| |
| /* |
| * hashrehash() - resize and rebuild hp->tab, the hash table |
| */ |
| |
| static void hashrehash( register struct hash *hp ) |
| { |
| int i = ++hp->items.list; |
| hp->items.more = i ? 2 * hp->items.nel : hp->inel; |
| hp->items.next = (char *)hash_mem_alloc( hp->items.datalen, hp->items.more * hp->items.size ); |
| hp->items.free = 0; |
| |
| hp->items.lists[i].nel = hp->items.more; |
| hp->items.lists[i].base = hp->items.next; |
| hp->items.nel += hp->items.more; |
| |
| if ( hp->tab.base ) |
| hash_mem_free( hp->items.datalen, (char *)hp->tab.base ); |
| |
| hp->tab.nel = hp->items.nel * hp->bloat; |
| hp->tab.base = (ITEM **)hash_mem_alloc( hp->items.datalen, hp->tab.nel * sizeof(ITEM **) ); |
| |
| memset( (char *)hp->tab.base, '\0', hp->tab.nel * sizeof( ITEM * ) ); |
| |
| for ( i = 0; i < hp->items.list; ++i ) |
| { |
| int nel = hp->items.lists[i].nel; |
| char *next = hp->items.lists[i].base; |
| |
| for ( ; nel--; next += hp->items.size ) |
| { |
| register ITEM *i = (ITEM *)next; |
| ITEM **ip = hp->tab.base + i->hdr.keyval % hp->tab.nel; |
| /* code currently assumes rehashing only when there are no free items */ |
| assert( i->data.key != 0 ); |
| |
| i->hdr.next = *ip; |
| *ip = i; |
| } |
| } |
| } |
| |
| void hashenumerate( struct hash * hp, void (* f)( void *, void * ), void * data ) |
| { |
| int i; |
| for ( i = 0; i <= hp->items.list; ++i ) |
| { |
| char * next = hp->items.lists[i].base; |
| int nel = hp->items.lists[i].nel; |
| if ( i == hp->items.list ) |
| nel -= hp->items.more; |
| |
| for ( ; nel--; next += hp->items.size ) |
| { |
| ITEM * i = (ITEM *)next; |
| if ( i->data.key != 0 ) /* DO not enumerate freed items. */ |
| f( &i->data, data ); |
| } |
| } |
| } |
| |
| /* --- */ |
| |
| # define ALIGNED(x) ( ( x + sizeof( ITEM ) - 1 ) & ~( sizeof( ITEM ) - 1 ) ) |
| |
| /* |
| * hashinit() - initialize a hash table, returning a handle |
| */ |
| |
| struct hash * |
| hashinit( |
| int datalen, |
| char *name ) |
| { |
| struct hash *hp = (struct hash *)hash_mem_alloc( datalen, sizeof( *hp ) ); |
| |
| hp->bloat = 3; |
| hp->tab.nel = 0; |
| hp->tab.base = (ITEM **)0; |
| hp->items.more = 0; |
| hp->items.free = 0; |
| hp->items.datalen = datalen; |
| hp->items.size = sizeof( struct hashhdr ) + ALIGNED( datalen ); |
| hp->items.list = -1; |
| hp->items.nel = 0; |
| hp->inel = 11 /* 47 */; |
| hp->name = name; |
| |
| return hp; |
| } |
| |
| /* |
| * hashdone() - free a hash table, given its handle |
| */ |
| |
| void |
| hashdone( struct hash *hp ) |
| { |
| int i; |
| |
| if ( !hp ) |
| return; |
| |
| if ( DEBUG_MEM || DEBUG_PROFILE ) |
| hashstat( hp ); |
| |
| if ( hp->tab.base ) |
| hash_mem_free( hp->items.datalen, (char *)hp->tab.base ); |
| for ( i = 0; i <= hp->items.list; ++i ) |
| hash_mem_free( hp->items.datalen, hp->items.lists[i].base ); |
| hash_mem_free( hp->items.datalen, (char *)hp ); |
| } |
| |
| static void * hash_mem_alloc(size_t datalen, size_t size) |
| { |
| if (sizeof(HASHDATA) == datalen) |
| { |
| return BJAM_MALLOC_RAW(size); |
| } |
| else |
| { |
| return BJAM_MALLOC(size); |
| } |
| } |
| |
| static void hash_mem_free(size_t datalen, void * data) |
| { |
| if (sizeof(HASHDATA) == datalen) |
| { |
| BJAM_FREE_RAW(data); |
| } |
| else |
| { |
| BJAM_FREE(data); |
| } |
| } |
| |
| #ifdef OPT_BOEHM_GC |
| static void hash_mem_finalizer(char * key, struct hash * hp) |
| { |
| HASHDATA d; |
| d.key = key; |
| hash_free(hp,&d); |
| } |
| #endif |
| |
| |
| /* ---- */ |
| |
| static void hashstat( struct hash * hp ) |
| { |
| ITEM * * tab = hp->tab.base; |
| int nel = hp->tab.nel; |
| int count = 0; |
| int sets = 0; |
| int run = ( tab[ nel - 1 ] != (ITEM *)0 ); |
| int i; |
| int here; |
| |
| for ( i = nel; i > 0; --i ) |
| { |
| if ( ( here = ( *tab++ != (ITEM *)0 ) ) ) |
| count++; |
| if ( here && !run ) |
| sets++; |
| run = here; |
| } |
| |
| printf( "%s table: %d+%d+%d (%dK+%luK) items+table+hash, %f density\n", |
| hp->name, |
| count, |
| hp->items.nel, |
| hp->tab.nel, |
| hp->items.nel * hp->items.size / 1024, |
| (long unsigned)hp->tab.nel * sizeof( ITEM ** ) / 1024, |
| (float)count / (float)sets ); |
| } |