bloom.c (4669B)
1 /* 2 * Bloom filter tracking which scores are present in our arenas 3 * and (more importantly) which are not. 4 */ 5 6 #include "stdinc.h" 7 #include "dat.h" 8 #include "fns.h" 9 10 int ignorebloom; 11 12 int 13 bloominit(Bloom *b, vlong vsize, u8int *data) 14 { 15 ulong size; 16 17 size = vsize; 18 if(size != vsize){ /* truncation */ 19 werrstr("bloom data too big"); 20 return -1; 21 } 22 23 b->size = size; 24 b->nhash = 32; /* will be fixed by caller on initialization */ 25 if(data != nil) 26 if(unpackbloomhead(b, data) < 0) 27 return -1; 28 29 b->bitmask = (b->size<<3) - 1; 30 b->data = data; 31 return 0; 32 } 33 34 void 35 wbbloomhead(Bloom *b) 36 { 37 packbloomhead(b, b->data); 38 } 39 40 Bloom* 41 readbloom(Part *p) 42 { 43 uchar buf[512]; 44 Bloom *b; 45 46 b = vtmallocz(sizeof *b); 47 if(readpart(p, 0, buf, sizeof buf) < 0) 48 return nil; 49 /* 50 * pass buf as b->data so that bloominit 51 * can parse header. won't be used for 52 * accessing bits (cleared below). 53 */ 54 if(bloominit(b, 0, buf) < 0){ 55 vtfree(b); 56 return nil; 57 }else{ 58 /* 59 * default block size is system page size. 60 * the bloom filter is usually very big. 61 * bump the block size up to speed i/o. 62 */ 63 if(p->blocksize < (1<<20)){ 64 p->blocksize = 1<<20; 65 if(p->blocksize > p->size) 66 p->blocksize = p->size; 67 } 68 } 69 b->part = p; 70 b->data = nil; 71 return b; 72 } 73 74 int 75 resetbloom(Bloom *b) 76 { 77 uchar *data; 78 79 data = vtmallocz(b->size); 80 b->data = data; 81 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */ 82 addstat(StatBloomBits, b->size*8-1); 83 else 84 addstat(StatBloomBits, b->size*8); 85 return 0; 86 } 87 88 int 89 loadbloom(Bloom *b) 90 { 91 int i, n; 92 uint ones; 93 uchar *data; 94 u32int *a; 95 96 data = vtmallocz(b->size); 97 if(readpart(b->part, 0, data, b->size) < 0){ 98 vtfree(b); 99 vtfree(data); 100 return -1; 101 } 102 b->data = data; 103 104 a = (u32int*)b->data; 105 n = b->size/4; 106 ones = 0; 107 for(i=0; i<n; i++) 108 ones += countbits(a[i]); 109 addstat(StatBloomOnes, ones); 110 111 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */ 112 addstat(StatBloomBits, b->size*8-1); 113 else 114 addstat(StatBloomBits, b->size*8); 115 116 return 0; 117 } 118 119 int 120 writebloom(Bloom *b) 121 { 122 wbbloomhead(b); 123 if(writepart(b->part, 0, b->data, b->size) < 0) 124 return -1; 125 if(flushpart(b->part) < 0) 126 return -1; 127 return 0; 128 } 129 130 /* 131 * Derive two random 32-bit quantities a, b from the score 132 * and then use a+b*i as a sequence of bloom filter indices. 133 * Michael Mitzenmacher has a recent (2005) paper saying this is okay. 134 * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header. 135 */ 136 static void 137 gethashes(u8int *score, ulong *h) 138 { 139 int i; 140 u32int a, b; 141 142 a = 0; 143 b = 0; 144 for(i=4; i+8<=VtScoreSize; i+=8){ 145 a ^= *(u32int*)(score+i); 146 b ^= *(u32int*)(score+i+4); 147 } 148 if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */ 149 a ^= *(u32int*)(score+i); 150 for(i=0; i<BloomMaxHash; i++, a+=b) 151 h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a; 152 } 153 154 static void 155 _markbloomfilter(Bloom *b, u8int *score) 156 { 157 int i, nnew; 158 ulong h[BloomMaxHash]; 159 u32int x, *y, z, *tab; 160 161 trace("markbloomfilter", "markbloomfilter %V", score); 162 gethashes(score, h); 163 nnew = 0; 164 tab = (u32int*)b->data; 165 for(i=0; i<b->nhash; i++){ 166 x = h[i]; 167 y = &tab[(x&b->bitmask)>>5]; 168 z = 1<<(x&31); 169 if(!(*y&z)){ 170 nnew++; 171 *y |= z; 172 } 173 } 174 if(nnew) 175 addstat(StatBloomOnes, nnew); 176 177 trace("markbloomfilter", "markbloomfilter exit"); 178 } 179 180 static int 181 _inbloomfilter(Bloom *b, u8int *score) 182 { 183 int i; 184 ulong h[BloomMaxHash], x; 185 u32int *tab; 186 187 gethashes(score, h); 188 tab = (u32int*)b->data; 189 for(i=0; i<b->nhash; i++){ 190 x = h[i]; 191 if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31)))) 192 return 0; 193 } 194 return 1; 195 } 196 197 int 198 inbloomfilter(Bloom *b, u8int *score) 199 { 200 int r; 201 202 if(b == nil || b->data == nil) 203 return 1; 204 205 if(ignorebloom) 206 return 1; 207 208 rlock(&b->lk); 209 r = _inbloomfilter(b, score); 210 runlock(&b->lk); 211 addstat(StatBloomLookup, 1); 212 if(r) 213 addstat(StatBloomMiss, 1); 214 else 215 addstat(StatBloomHit, 1); 216 return r; 217 } 218 219 void 220 markbloomfilter(Bloom *b, u8int *score) 221 { 222 if(b == nil || b->data == nil) 223 return; 224 225 rlock(&b->lk); 226 qlock(&b->mod); 227 _markbloomfilter(b, score); 228 qunlock(&b->mod); 229 runlock(&b->lk); 230 } 231 232 void 233 markbloomfiltern(Bloom *b, u8int score[][20], int n) 234 { 235 int i; 236 237 if(b == nil || b->data == nil) 238 return; 239 240 rlock(&b->lk); 241 qlock(&b->mod); 242 for(i=0; i<n; i++) 243 _markbloomfilter(b, score[i]); 244 qunlock(&b->mod); 245 runlock(&b->lk); 246 } 247 248 static void 249 bloomwriteproc(void *v) 250 { 251 int ret; 252 Bloom *b; 253 254 threadsetname("bloomwriteproc"); 255 b = v; 256 for(;;){ 257 recv(b->writechan, 0); 258 if((ret=writebloom(b)) < 0) 259 fprint(2, "oops! writing bloom: %r\n"); 260 else 261 ret = 0; 262 sendul(b->writedonechan, ret); 263 } 264 } 265 266 void 267 startbloomproc(Bloom *b) 268 { 269 b->writechan = chancreate(sizeof(void*), 0); 270 b->writedonechan = chancreate(sizeof(ulong), 0); 271 vtproc(bloomwriteproc, b); 272 }