plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

bloom.c (4669B)


      1 /*
      2  * Bloom filter tracking which scores are present in our arenas
      3  * and (more importantly) which are not.
      4  */
      5 
      6 #include "stdinc.h"
      7 #include "dat.h"
      8 #include "fns.h"
      9 
     10 int ignorebloom;
     11 
     12 int
     13 bloominit(Bloom *b, vlong vsize, u8int *data)
     14 {
     15 	ulong size;
     16 
     17 	size = vsize;
     18 	if(size != vsize){	/* truncation */
     19 		werrstr("bloom data too big");
     20 		return -1;
     21 	}
     22 
     23 	b->size = size;
     24 	b->nhash = 32;	/* will be fixed by caller on initialization */
     25 	if(data != nil)
     26 		if(unpackbloomhead(b, data) < 0)
     27 			return -1;
     28 
     29 	b->bitmask = (b->size<<3) - 1;
     30 	b->data = data;
     31 	return 0;
     32 }
     33 
     34 void
     35 wbbloomhead(Bloom *b)
     36 {
     37 	packbloomhead(b, b->data);
     38 }
     39 
     40 Bloom*
     41 readbloom(Part *p)
     42 {
     43 	uchar buf[512];
     44 	Bloom *b;
     45 
     46 	b = vtmallocz(sizeof *b);
     47 	if(readpart(p, 0, buf, sizeof buf) < 0)
     48 		return nil;
     49 	/*
     50 	 * pass buf as b->data so that bloominit
     51 	 * can parse header.  won't be used for
     52 	 * accessing bits (cleared below).
     53 	 */
     54 	if(bloominit(b, 0, buf) < 0){
     55 		vtfree(b);
     56 		return nil;
     57 	}else{
     58 		/*
     59 		 * default block size is system page size.
     60 		 * the bloom filter is usually very big.
     61 		 * bump the block size up to speed i/o.
     62 		 */
     63 		if(p->blocksize < (1<<20)){
     64 			p->blocksize = 1<<20;
     65 			if(p->blocksize > p->size)
     66 				p->blocksize = p->size;
     67 		}
     68 	}
     69 	b->part = p;
     70 	b->data = nil;
     71 	return b;
     72 }
     73 
     74 int
     75 resetbloom(Bloom *b)
     76 {
     77 	uchar *data;
     78 
     79 	data = vtmallocz(b->size);
     80 	b->data = data;
     81 	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
     82 		addstat(StatBloomBits, b->size*8-1);
     83 	else
     84 		addstat(StatBloomBits, b->size*8);
     85 	return 0;
     86 }
     87 
     88 int
     89 loadbloom(Bloom *b)
     90 {
     91 	int i, n;
     92 	uint ones;
     93 	uchar *data;
     94 	u32int *a;
     95 
     96 	data = vtmallocz(b->size);
     97 	if(readpart(b->part, 0, data, b->size) < 0){
     98 		vtfree(b);
     99 		vtfree(data);
    100 		return -1;
    101 	}
    102 	b->data = data;
    103 
    104 	a = (u32int*)b->data;
    105 	n = b->size/4;
    106 	ones = 0;
    107 	for(i=0; i<n; i++)
    108 		ones += countbits(a[i]);
    109 	addstat(StatBloomOnes, ones);
    110 
    111 	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
    112 		addstat(StatBloomBits, b->size*8-1);
    113 	else
    114 		addstat(StatBloomBits, b->size*8);
    115 
    116 	return 0;
    117 }
    118 
    119 int
    120 writebloom(Bloom *b)
    121 {
    122 	wbbloomhead(b);
    123 	if(writepart(b->part, 0, b->data, b->size) < 0)
    124 		return -1;
    125 	if(flushpart(b->part) < 0)
    126 		return -1;
    127 	return 0;
    128 }
    129 
    130 /*
    131  * Derive two random 32-bit quantities a, b from the score
    132  * and then use a+b*i as a sequence of bloom filter indices.
    133  * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
    134  * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
    135  */
    136 static void
    137 gethashes(u8int *score, ulong *h)
    138 {
    139 	int i;
    140 	u32int a, b;
    141 
    142 	a = 0;
    143 	b = 0;
    144 	for(i=4; i+8<=VtScoreSize; i+=8){
    145 		a ^= *(u32int*)(score+i);
    146 		b ^= *(u32int*)(score+i+4);
    147 	}
    148 	if(i+4 <= VtScoreSize)	/* 20 is not 4-aligned */
    149 		a ^= *(u32int*)(score+i);
    150 	for(i=0; i<BloomMaxHash; i++, a+=b)
    151 		h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
    152 }
    153 
    154 static void
    155 _markbloomfilter(Bloom *b, u8int *score)
    156 {
    157 	int i, nnew;
    158 	ulong h[BloomMaxHash];
    159 	u32int x, *y, z, *tab;
    160 
    161 	trace("markbloomfilter", "markbloomfilter %V", score);
    162 	gethashes(score, h);
    163 	nnew = 0;
    164 	tab = (u32int*)b->data;
    165 	for(i=0; i<b->nhash; i++){
    166 		x = h[i];
    167 		y = &tab[(x&b->bitmask)>>5];
    168 		z = 1<<(x&31);
    169 		if(!(*y&z)){
    170 			nnew++;
    171 			*y |= z;
    172 		}
    173 	}
    174 	if(nnew)
    175 		addstat(StatBloomOnes, nnew);
    176 
    177 	trace("markbloomfilter", "markbloomfilter exit");
    178 }
    179 
    180 static int
    181 _inbloomfilter(Bloom *b, u8int *score)
    182 {
    183 	int i;
    184 	ulong h[BloomMaxHash], x;
    185 	u32int *tab;
    186 
    187 	gethashes(score, h);
    188 	tab = (u32int*)b->data;
    189 	for(i=0; i<b->nhash; i++){
    190 		x = h[i];
    191 		if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
    192 			return 0;
    193 	}
    194 	return 1;
    195 }
    196 
    197 int
    198 inbloomfilter(Bloom *b, u8int *score)
    199 {
    200 	int r;
    201 
    202 	if(b == nil || b->data == nil)
    203 		return 1;
    204 
    205 	if(ignorebloom)
    206 		return 1;
    207 
    208 	rlock(&b->lk);
    209 	r = _inbloomfilter(b, score);
    210 	runlock(&b->lk);
    211 	addstat(StatBloomLookup, 1);
    212 	if(r)
    213 		addstat(StatBloomMiss, 1);
    214 	else
    215 		addstat(StatBloomHit, 1);
    216 	return r;
    217 }
    218 
    219 void
    220 markbloomfilter(Bloom *b, u8int *score)
    221 {
    222 	if(b == nil || b->data == nil)
    223 		return;
    224 
    225 	rlock(&b->lk);
    226 	qlock(&b->mod);
    227 	_markbloomfilter(b, score);
    228 	qunlock(&b->mod);
    229 	runlock(&b->lk);
    230 }
    231 
    232 void
    233 markbloomfiltern(Bloom *b, u8int score[][20], int n)
    234 {
    235 	int i;
    236 
    237 	if(b == nil || b->data == nil)
    238 		return;
    239 
    240 	rlock(&b->lk);
    241 	qlock(&b->mod);
    242 	for(i=0; i<n; i++)
    243 		_markbloomfilter(b, score[i]);
    244 	qunlock(&b->mod);
    245 	runlock(&b->lk);
    246 }
    247 
    248 static void
    249 bloomwriteproc(void *v)
    250 {
    251 	int ret;
    252 	Bloom *b;
    253 
    254 	threadsetname("bloomwriteproc");
    255 	b = v;
    256 	for(;;){
    257 		recv(b->writechan, 0);
    258 		if((ret=writebloom(b)) < 0)
    259 			fprint(2, "oops! writing bloom: %r\n");
    260 		else
    261 			ret = 0;
    262 		sendul(b->writedonechan, ret);
    263 	}
    264 }
    265 
    266 void
    267 startbloomproc(Bloom *b)
    268 {
    269 	b->writechan = chancreate(sizeof(void*), 0);
    270 	b->writedonechan = chancreate(sizeof(ulong), 0);
    271 	vtproc(bloomwriteproc, b);
    272 }