cloudy
trunk
|
00001 /* This file is part of Cloudy and is copyright (C)1978-2008 by Gary J. Ferland and 00002 * others. For conditions of distribution and use see copyright notice in license.txt */ 00003 #include "cddefines.h" 00004 #include "hash.h" 00005 00006 /* Implement sorted linear hash table -- automatically expands to 00007 * get uniform bin filling, requires good hash function as 2^n masking is used. 00008 * 00009 * References 00010 * 00011 * W. Litwin, Linear hashing: A new tool for file and table addressing, 00012 * Proc. 6th Conference on Very Large Databases, pages 212-223, 1980. 00013 * 00014 * http://www.concentric.net/~Ttwang/tech/sorthash.htm 00015 * 00016 */ 00017 00018 /* Function to calculate 4 byte hash index -- must be good for 2^n table */ 00019 STATIC unsigned long hashfunction(const void *t, const size_t len); 00020 00021 /* Internal utility functions for hash table */ 00022 STATIC entry *lookup_key(const void *key, size_t *lkey, const hashtab *table, 00023 unsigned long *hk); 00024 STATIC void extend(hashtab *table); 00025 STATIC unsigned long getbin(unsigned long hk, const hashtab *table); 00026 00027 00028 hashtab *newhash(void (*freedata)(void *)) 00029 { 00030 hashtab *self; 00031 00032 DEBUG_ENTRY("newhash()"); 00033 00034 self = (hashtab *) MALLOC(sizeof(hashtab)); 00035 self->freedata = freedata; 00036 self->nelem = 0; 00037 self->size = 0x1; 00038 self->fullmask = 0x1; 00039 self->frontmask = 0x0; 00040 self->space = 128; 00041 self->hashfunction = hashfunction; 00042 self->tab = (entry **) MALLOC(self->space*sizeof(entry *)); 00043 self->tab[0] = NULL; 00044 00045 return self; 00046 } 00047 00048 void freehash(hashtab *table) 00049 { 00050 entry *s, *t; 00051 unsigned long i; 00052 00053 DEBUG_ENTRY("freehash()"); 00054 00055 for(i=0;i<table->size;i++) 00056 { 00057 s = table->tab[i]; 00058 while (s != NULL) 00059 { 00060 t = s->next; 00061 if(table->freedata != NULL) 00062 { 00063 table->freedata(s->data.p); 00064 } 00065 free(s); 00066 s = t; 00067 } 00068 } 00069 00070 free(table->tab); 00071 free(table); 00072 } 00073 00074 data_u *addentry(const void *key, size_t lkey, hashtab *table, int *exists) 00075 { 00076 unsigned long hk, i; 00077 entry *s; 00078 void *p; 00079 00080 DEBUG_ENTRY("addentry()"); 00081 00082 s = lookup_key(key,&lkey,table,&hk); 00083 if(s) 00084 { 00085 *exists = 1; 00086 return &(s->data); 00087 } 00088 00089 *exists = 0; 00090 p = MALLOC(sizeof(entry)+lkey); 00091 s = (entry *) p; 00092 s->hashval = hk; 00093 s->data.lkey = lkey; 00094 s->data.key = (void *)(((char *)p)+sizeof(entry)); 00095 memcpy(s->data.key,key,lkey); 00096 00097 i = getbin(hk,table); 00098 s->next = table->tab[i]; 00099 table->tab[i] = s; 00100 00101 table->nelem++; 00102 00103 extend(table); 00104 00105 return &(s->data); 00106 } 00107 00108 data_u *lookup(const void *key, size_t lkey, const hashtab *table) 00109 { 00110 unsigned long hk; 00111 entry *s; 00112 00113 if(table->nelem == 0) 00114 return NULL; 00115 s = lookup_key(key,&lkey,table,&hk); 00116 if(s == NULL) 00117 return NULL; 00118 return &(s->data); 00119 } 00120 00121 int maxchain(const hashtab *table) 00122 { 00123 unsigned long i, l, max=0; 00124 entry *s; 00125 00126 DEBUG_ENTRY("maxchain()"); 00127 00128 for(i=0;i<table->size;i++) 00129 { 00130 l = 0; 00131 for(s = table->tab[i];s != NULL;s=s->next) 00132 { 00133 l++; 00134 } 00135 if(l > max) 00136 max = l; 00137 } 00138 00139 return max; 00140 } 00141 00142 /* Internal utility functions */ 00143 /* Bernstein hash, see: 00144 http://www.cse.yorku.ca/~oz/hash.html 00145 http://www.burtleburtle.net/bob/hash/doobs.html 00146 */ 00147 enum {HASHINIT=5381,HASHMUL=33}; /* Bernstein */ 00148 /* enum {HASHINIT=0,HASHMUL=131}; */ 00149 STATIC unsigned long hashfunction(const void *t, const size_t len) 00150 { 00151 size_t i; 00152 unsigned long h = HASHINIT; 00153 unsigned char *s = (unsigned char *)t; 00154 00155 DEBUG_ENTRY("hashfunction()"); 00156 00157 for( i=0; i<len; i++ ) 00158 h = HASHMUL*h + s[i]; 00159 00160 return( h ); 00161 } 00162 00163 STATIC entry *lookup_key(const void *key, size_t *lkey, const hashtab *table, 00164 unsigned long *hk) 00165 { 00166 unsigned long i; 00167 entry *s; 00168 00169 DEBUG_ENTRY("lookup_key()"); 00170 00171 if(*lkey == 0) 00172 *lkey = strlen((char *) key)+1; 00173 00174 *hk = table->hashfunction(key,*lkey); 00175 i = getbin(*hk,table); 00176 00177 /* Look through list within bin */ 00178 for(s = table->tab[i];s!=NULL;s=s->next) 00179 { 00180 /* Check for match -- full hash value will likely distinguish */ 00181 if(*hk == s->hashval && 00182 *lkey == s->data.lkey && 00183 !memcmp(key,s->data.key,*lkey)) 00184 { 00185 return s; 00186 } 00187 } 00188 return NULL; 00189 } 00190 00191 STATIC void extend(hashtab *table) 00192 { 00193 unsigned long move, last, i, j; 00194 entry *t, *s; 00195 00196 DEBUG_ENTRY("extend()"); 00197 last = table->size; 00198 table->size++; 00199 if(last > table->fullmask) 00200 { /* Extend table when full */ 00201 table->frontmask = table->fullmask; 00202 table->fullmask <<= 1; 00203 table->fullmask |= 1; 00204 if(table->fullmask+1 > table->space) 00205 { 00206 table->space = table->fullmask+1; 00207 table->tab = (entry **) 00208 REALLOC(table->tab,(table->space)*sizeof(entry *)); 00209 } 00210 } 00211 00212 /* Split next bin in front part */ 00213 i = last & table->frontmask; 00214 t = table->tab[i]; 00215 table->tab[last] = table->tab[i] = NULL; 00216 move = table->frontmask ^ table->fullmask; 00217 while (t) 00218 { /* Go through list and sort between [last] and [i] */ 00219 if(t->hashval & move) 00220 { 00221 j = last; 00222 } 00223 else 00224 { 00225 j = i; 00226 } 00227 s = t->next; 00228 t->next = table->tab[j]; 00229 table->tab[j] = t; 00230 t = s; 00231 } 00232 } 00233 00234 STATIC unsigned long getbin(unsigned long hk, const hashtab *table) 00235 { 00236 unsigned long i; 00237 00238 DEBUG_ENTRY("getbin()"); 00239 i = hk & table->fullmask; 00240 if(i >= table->size) 00241 i &= table->frontmask; 00242 assert (i < table->size && i < table->space); 00243 return i; 00244 } 00245 00246 /* Copy data from hash into list, 00247 optionally selected according to a masking function */ 00248 unsigned long makelist(const hashtab *table, data_u **list, 00249 const unsigned long nlist, int (*maskfun)(data_u *dat)) 00250 { 00251 unsigned long i, n; 00252 entry *s; 00253 00254 DEBUG_ENTRY("makelist()"); 00255 00256 n = 0; 00257 for(i=0;i<table->size;i++) 00258 { 00259 for(s = table->tab[i];s != NULL;s = s->next) 00260 { 00261 if(maskfun == NULL || maskfun(&(s->data))) 00262 list[n++] = &(s->data); 00263 if(n > nlist) 00264 { 00265 fprintf(ioQQQ,"Too many list items for array provided in makelist\n"); 00266 cdEXIT(EXIT_FAILURE); 00267 } 00268 } 00269 } 00270 return n; 00271 } 00272 unsigned long makeplist(const hashtab *table, void **list, 00273 const unsigned long nlist, int (*maskfun)(data_u *dat)) 00274 { 00275 unsigned long i, n; 00276 entry *s; 00277 00278 DEBUG_ENTRY("makeplist()"); 00279 n = 0; 00280 for(i=0;i<table->size;i++) 00281 { 00282 for(s = table->tab[i];s != NULL;s = s->next) 00283 { 00284 if(maskfun == NULL || maskfun(&(s->data))) 00285 list[n++] = s->data.p; 00286 if(n > nlist) 00287 { 00288 fprintf(ioQQQ,"Too many list items for array provided in makeplist\n"); 00289 cdEXIT(EXIT_FAILURE); 00290 } 00291 } 00292 } 00293 return n; 00294 } 00295 00296 #ifdef TEST 00297 main() 00298 { 00299 hashtab *table; 00300 data_u *data; 00301 int i, exists; 00302 00303 char strings[][15] = {"Hello","Goodbye","Thirteen","One", 00304 "Two","Three","Four","Five","Six", 00305 "Seven","Eight"}; 00306 00307 table = newhash(NULL); 00308 for(i=0;i<sizeof(strings)/15;i++) 00309 { 00310 data = addentry(strings[i],0,table,&exists); 00311 data->i = i; 00312 } 00313 00314 for(i=0;i<sizeof(strings)/15;i++) 00315 { 00316 data = lookup(strings[i],0,table); 00317 if(data) 00318 { 00319 printf("Found data %d\n",data->i); 00320 } 00321 else 00322 { 00323 printf("Couldn't find data\n"); 00324 } 00325 } 00326 data = lookup("Banana",0,table); 00327 if(data) 00328 { 00329 printf("Found data %d\n",data->i); 00330 } 00331 else 00332 { 00333 printf("Couldn't find data (as expected)\n"); 00334 } 00335 printf("Longest chain is %d\n",maxchain(table)); 00336 } 00337 #endif