hash.c (4596B)
1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "hash.h" 5 6 #define mergesort bayesmergesort 7 8 /*** 9 * String hash tables. 10 */ 11 12 Stringtab *tfree; 13 14 Stringtab* 15 taballoc(void) 16 { 17 static Stringtab *t; 18 static uint nt; 19 20 if(tfree){ 21 Stringtab *tt = tfree; 22 tfree = tt->link; 23 return tt; 24 } 25 26 if(nt == 0){ 27 t = malloc(64000*sizeof(Stringtab)); 28 if(t == 0) 29 sysfatal("out of memory"); 30 nt = 64000; 31 } 32 nt--; 33 return t++; 34 } 35 36 void 37 tabfree(Stringtab *tt) 38 { 39 tt->link = tfree; 40 tfree = tt; 41 } 42 43 char* 44 xstrdup(char *s, int len) 45 { 46 char *r; 47 static char *t; 48 static int nt; 49 50 if(nt < len){ 51 t = malloc(512*1024+len); 52 if(t == 0) 53 sysfatal("out of memory"); 54 nt = 512*1024; 55 } 56 r = t; 57 t += len; 58 nt -= len; 59 memmove(r, s, len); 60 return r; 61 } 62 63 static uint 64 hash(char *s, int n) 65 { 66 uint h; 67 uchar *p, *ep; 68 h = 0; 69 for(p=(uchar*)s, ep=p+n; p<ep; p++) 70 h = h*37 + *p; 71 return h; 72 } 73 74 static void 75 rehash(Hash *hh) 76 { 77 int h; 78 Stringtab *s; 79 80 if(hh->nstab == 0) 81 hh->nstab = 1024; 82 else 83 hh->nstab = hh->ntab*2; 84 85 free(hh->stab); 86 hh->stab = mallocz(hh->nstab*sizeof(Stringtab*), 1); 87 if(hh->stab == nil) 88 sysfatal("out of memory"); 89 90 for(s=hh->all; s; s=s->link){ 91 h = hash(s->str, s->n) % hh->nstab; 92 s->hash = hh->stab[h]; 93 hh->stab[h] = s; 94 } 95 } 96 97 Stringtab* 98 findstab(Hash *hh, char *str, int n, int create) 99 { 100 uint h; 101 Stringtab *tab, **l; 102 103 if(hh->nstab == 0) 104 rehash(hh); 105 106 h = hash(str, n) % hh->nstab; 107 for(tab=hh->stab[h], l=&hh->stab[h]; tab; l=&tab->hash, tab=tab->hash) 108 if(n==tab->n && memcmp(str, tab->str, n) == 0){ 109 *l = tab->hash; 110 tab->hash = hh->stab[h]; 111 hh->stab[h] = tab; 112 return tab; 113 } 114 115 if(!create) 116 return nil; 117 118 hh->sorted = 0; 119 tab = taballoc(); 120 tab->str = xstrdup(str, n); 121 tab->hash = hh->stab[h]; 122 tab->link = hh->all; 123 hh->all = tab; 124 tab->n = n; 125 tab->count = 0; 126 tab->date = 0; 127 hh->stab[h] = tab; 128 129 hh->ntab++; 130 if(hh->ntab > 2*hh->nstab && !(hh->ntab&(hh->ntab-1))) 131 rehash(hh); 132 return tab; 133 } 134 135 int 136 scmp(Stringtab *a, Stringtab *b) 137 { 138 int n, x; 139 140 if(a == 0) 141 return 1; 142 if(b == 0) 143 return -1; 144 n = a->n; 145 if(n > b->n) 146 n = b->n; 147 x = memcmp(a->str, b->str, n); 148 if(x != 0) 149 return x; 150 if(a->n < b->n) 151 return -1; 152 if(a->n > b->n) 153 return 1; 154 return 0; /* shouldn't happen */ 155 } 156 157 Stringtab* 158 merge(Stringtab *a, Stringtab *b) 159 { 160 Stringtab *s, **l; 161 162 l = &s; 163 while(a || b){ 164 if(scmp(a, b) < 0){ 165 *l = a; 166 l = &a->link; 167 a = a->link; 168 }else{ 169 *l = b; 170 l = &b->link; 171 b = b->link; 172 } 173 } 174 *l = 0; 175 return s; 176 } 177 178 Stringtab* 179 mergesort(Stringtab *s) 180 { 181 Stringtab *a, *b; 182 int delay; 183 184 if(s == nil) 185 return nil; 186 if(s->link == nil) 187 return s; 188 189 a = b = s; 190 delay = 1; 191 while(a && b){ 192 if(delay) /* easy way to handle 2-element list */ 193 delay = 0; 194 else 195 a = a->link; 196 if(b = b->link) 197 b = b->link; 198 } 199 200 b = a->link; 201 a->link = nil; 202 203 a = mergesort(s); 204 b = mergesort(b); 205 206 return merge(a, b); 207 } 208 209 Stringtab* 210 sortstab(Hash *hh) 211 { 212 if(!hh->sorted){ 213 hh->all = mergesort(hh->all); 214 hh->sorted = 1; 215 } 216 return hh->all; 217 } 218 219 int 220 Bwritehash(Biobuf *b, Hash *hh) 221 { 222 Stringtab *s; 223 int now; 224 225 now = time(0); 226 s = sortstab(hh); 227 Bprint(b, "# hash table\n"); 228 for(; s; s=s->link){ 229 if(s->count <= 0) 230 continue; 231 /* 232 * Entries that haven't been updated in thirty days get tossed. 233 */ 234 if(s->date+30*86400 < now) 235 continue; 236 Bwrite(b, s->str, s->n); 237 Bprint(b, "\t%d %d\n", s->count, s->date); 238 } 239 if(Bflush(b) == Beof) 240 return -1; 241 return 0; 242 } 243 244 void 245 Breadhash(Biobuf *b, Hash *hh, int scale) 246 { 247 char *s; 248 char *t; 249 int n; 250 int date; 251 Stringtab *st; 252 253 s = Brdstr(b, '\n', 1); 254 if(s == nil) 255 return; 256 if(strcmp(s, "# hash table") != 0) 257 sysfatal("bad hash table format"); 258 259 while(s = Brdline(b, '\n')){ 260 s[Blinelen(b)-1] = 0; 261 t = strrchr(s, '\t'); 262 if(t == nil) 263 sysfatal("bad hash table format"); 264 *t++ = '\0'; 265 if(*t < '0' || *t > '9') 266 sysfatal("bad hash table format"); 267 n = strtol(t, &t, 10); 268 date = time(0); 269 if(*t != 0){ 270 if(*t == ' '){ 271 t++; 272 date = strtol(t, &t, 10); 273 } 274 if(*t != 0) 275 sysfatal("bad hash table format"); 276 } 277 st = findstab(hh, s, strlen(s), 1); 278 if(date > st->date) 279 st->date = date; 280 st->count += n*scale; 281 } 282 } 283 284 void 285 freehash(Hash *h) 286 { 287 Stringtab *s, *next; 288 289 for(s=h->all; s; s=next){ 290 next = s->link; 291 tabfree(s); 292 } 293 free(h->stab); 294 free(h); 295 } 296 297 Biobuf* 298 Bopenlock(char *file, int mode) 299 { 300 int i; 301 Biobuf *b; 302 char err[ERRMAX]; 303 304 b = nil; 305 for(i=0; i<120; i++){ 306 if((b = Bopen(file, mode)) != nil) 307 break; 308 rerrstr(err, sizeof err); 309 if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil) 310 break; 311 sleep(1000); 312 } 313 return b; 314 }