plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

hash.c (4596B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <bio.h>
      4 #include "hash.h"
      5 
      6 #define mergesort bayesmergesort
      7 
      8 /***
      9  * String hash tables.
     10  */
     11 
     12 Stringtab *tfree;
     13 
     14 Stringtab*
     15 taballoc(void)
     16 {
     17 	static Stringtab *t;
     18 	static uint nt;
     19 
     20 	if(tfree){
     21 		Stringtab *tt = tfree;
     22 		tfree = tt->link;
     23 		return tt;
     24 	}
     25 
     26 	if(nt == 0){
     27 		t = malloc(64000*sizeof(Stringtab));
     28 		if(t == 0)
     29 			sysfatal("out of memory");
     30 		nt = 64000;
     31 	}
     32 	nt--;
     33 	return t++;
     34 }
     35 
     36 void
     37 tabfree(Stringtab *tt)
     38 {
     39 	tt->link = tfree;
     40 	tfree = tt;
     41 }
     42 
     43 char*
     44 xstrdup(char *s, int len)
     45 {
     46 	char *r;
     47 	static char *t;
     48 	static int nt;
     49 
     50 	if(nt < len){
     51 		t = malloc(512*1024+len);
     52 		if(t == 0)
     53 			sysfatal("out of memory");
     54 		nt = 512*1024;
     55 	}
     56 	r = t;
     57 	t += len;
     58 	nt -= len;
     59 	memmove(r, s, len);
     60 	return r;
     61 }
     62 
     63 static uint
     64 hash(char *s, int n)
     65 {
     66 	uint h;
     67 	uchar *p, *ep;
     68 	h = 0;
     69 	for(p=(uchar*)s, ep=p+n; p<ep; p++)
     70 		h = h*37 + *p;
     71 	return h;
     72 }
     73 
     74 static void
     75 rehash(Hash *hh)
     76 {
     77 	int h;
     78 	Stringtab *s;
     79 
     80 	if(hh->nstab == 0)
     81 		hh->nstab = 1024;
     82 	else
     83 		hh->nstab = hh->ntab*2;
     84 
     85 	free(hh->stab);
     86 	hh->stab = mallocz(hh->nstab*sizeof(Stringtab*), 1);
     87 	if(hh->stab == nil)
     88 		sysfatal("out of memory");
     89 
     90 	for(s=hh->all; s; s=s->link){
     91 		h = hash(s->str, s->n) % hh->nstab;
     92 		s->hash = hh->stab[h];
     93 		hh->stab[h] = s;
     94 	}
     95 }
     96 
     97 Stringtab*
     98 findstab(Hash *hh, char *str, int n, int create)
     99 {
    100 	uint h;
    101 	Stringtab *tab, **l;
    102 
    103 	if(hh->nstab == 0)
    104 		rehash(hh);
    105 
    106 	h = hash(str, n) % hh->nstab;
    107 	for(tab=hh->stab[h], l=&hh->stab[h]; tab; l=&tab->hash, tab=tab->hash)
    108 		if(n==tab->n && memcmp(str, tab->str, n) == 0){
    109 			*l = tab->hash;
    110 			tab->hash = hh->stab[h];
    111 			hh->stab[h] = tab;
    112 			return tab;
    113 		}
    114 
    115 	if(!create)
    116 		return nil;
    117 
    118 	hh->sorted = 0;
    119 	tab = taballoc();
    120 	tab->str = xstrdup(str, n);
    121 	tab->hash = hh->stab[h];
    122 	tab->link = hh->all;
    123 	hh->all = tab;
    124 	tab->n = n;
    125 	tab->count = 0;
    126 	tab->date = 0;
    127 	hh->stab[h] = tab;
    128 
    129 	hh->ntab++;
    130 	if(hh->ntab > 2*hh->nstab && !(hh->ntab&(hh->ntab-1)))
    131 		rehash(hh);
    132 	return tab;
    133 }
    134 
    135 int
    136 scmp(Stringtab *a, Stringtab *b)
    137 {
    138 	int n, x;
    139 
    140 	if(a == 0)
    141 		return 1;
    142 	if(b == 0)
    143 		return -1;
    144 	n = a->n;
    145 	if(n > b->n)
    146 		n = b->n;
    147 	x = memcmp(a->str, b->str, n);
    148 	if(x != 0)
    149 		return x;
    150 	if(a->n < b->n)
    151 		return -1;
    152 	if(a->n > b->n)
    153 		return 1;
    154 	return 0;	/* shouldn't happen */
    155 }
    156 
    157 Stringtab*
    158 merge(Stringtab *a, Stringtab *b)
    159 {
    160 	Stringtab *s, **l;
    161 
    162 	l = &s;
    163 	while(a || b){
    164 		if(scmp(a, b) < 0){
    165 			*l = a;
    166 			l = &a->link;
    167 			a = a->link;
    168 		}else{
    169 			*l = b;
    170 			l = &b->link;
    171 			b = b->link;
    172 		}
    173 	}
    174 	*l = 0;
    175 	return s;
    176 }
    177 
    178 Stringtab*
    179 mergesort(Stringtab *s)
    180 {
    181 	Stringtab *a, *b;
    182 	int delay;
    183 
    184 	if(s == nil)
    185 		return nil;
    186 	if(s->link == nil)
    187 		return s;
    188 
    189 	a = b = s;
    190 	delay = 1;
    191 	while(a && b){
    192 		if(delay)	/* easy way to handle 2-element list */
    193 			delay = 0;
    194 		else
    195 			a = a->link;
    196 		if(b = b->link)
    197 			b = b->link;
    198 	}
    199 
    200 	b = a->link;
    201 	a->link = nil;
    202 
    203 	a = mergesort(s);
    204 	b = mergesort(b);
    205 
    206 	return merge(a, b);
    207 }
    208 
    209 Stringtab*
    210 sortstab(Hash *hh)
    211 {
    212 	if(!hh->sorted){
    213 		hh->all = mergesort(hh->all);
    214 		hh->sorted = 1;
    215 	}
    216 	return hh->all;
    217 }
    218 
    219 int
    220 Bwritehash(Biobuf *b, Hash *hh)
    221 {
    222 	Stringtab *s;
    223 	int now;
    224 
    225 	now = time(0);
    226 	s = sortstab(hh);
    227 	Bprint(b, "# hash table\n");
    228 	for(; s; s=s->link){
    229 		if(s->count <= 0)
    230 			continue;
    231 		/*
    232 		 * Entries that haven't been updated in thirty days get tossed.
    233 		 */
    234 		if(s->date+30*86400 < now)
    235 			continue;
    236 		Bwrite(b, s->str, s->n);
    237 		Bprint(b, "\t%d %d\n", s->count, s->date);
    238 	}
    239 	if(Bflush(b) == Beof)
    240 		return -1;
    241 	return 0;
    242 }
    243 
    244 void
    245 Breadhash(Biobuf *b, Hash *hh, int scale)
    246 {
    247 	char *s;
    248 	char *t;
    249 	int n;
    250 	int date;
    251 	Stringtab *st;
    252 
    253 	s = Brdstr(b, '\n', 1);
    254 	if(s == nil)
    255 		return;
    256 	if(strcmp(s, "# hash table") != 0)
    257 		sysfatal("bad hash table format");
    258 
    259 	while(s = Brdline(b, '\n')){
    260 		s[Blinelen(b)-1] = 0;
    261 		t = strrchr(s, '\t');
    262 		if(t == nil)
    263 			sysfatal("bad hash table format");
    264 		*t++ = '\0';
    265 		if(*t < '0' || *t > '9')
    266 			sysfatal("bad hash table format");
    267 		n = strtol(t, &t, 10);
    268 		date = time(0);
    269 		if(*t != 0){
    270 			if(*t == ' '){
    271 				t++;
    272 				date = strtol(t, &t, 10);
    273 			}
    274 			if(*t != 0)
    275 				sysfatal("bad hash table format");
    276 		}
    277 		st = findstab(hh, s, strlen(s), 1);
    278 		if(date > st->date)
    279 			st->date = date;
    280 		st->count += n*scale;
    281 	}
    282 }
    283 
    284 void
    285 freehash(Hash *h)
    286 {
    287 	Stringtab *s, *next;
    288 
    289 	for(s=h->all; s; s=next){
    290 		next = s->link;
    291 		tabfree(s);
    292 	}
    293 	free(h->stab);
    294 	free(h);
    295 }
    296 
    297 Biobuf*
    298 Bopenlock(char *file, int mode)
    299 {
    300 	int i;
    301 	Biobuf *b;
    302 	char err[ERRMAX];
    303 
    304 	b = nil;
    305 	for(i=0; i<120; i++){
    306 		if((b = Bopen(file, mode)) != nil)
    307 			break;
    308 		rerrstr(err, sizeof err);
    309 		if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil)
    310 			break;
    311 		sleep(1000);
    312 	}
    313 	return b;
    314 }