plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

index.c (19202B)


      1 /*
      2  * Index, mapping scores to log positions.
      3  *
      4  * The index is made up of some number of index sections, each of
      5  * which is typically stored on a different disk.  The blocks in all the
      6  * index sections are logically numbered, with each index section
      7  * responsible for a range of blocks.  Blocks are typically 8kB.
      8  *
      9  * The N index blocks are treated as a giant hash table.  The top 32 bits
     10  * of score are used as the key for a lookup.  Each index block holds
     11  * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
     12  *
     13  * The index is sized so that a particular bucket is extraordinarily
     14  * unlikely to overflow: assuming compressed data blocks are 4kB
     15  * on disk, and assuming each block has a 40 byte index entry,
     16  * the index data will be 1% of the total data.  Since scores are essentially
     17  * random, all buckets should be about the same fullness.
     18  * A factor of 5 gives us a wide comfort boundary to account for
     19  * random variation.  So the index disk space should be 5% of the arena disk space.
     20  */
     21 
     22 #include "stdinc.h"
     23 #include "dat.h"
     24 #include "fns.h"
     25 
     26 static int	initindex1(Index*);
     27 static ISect	*initisect1(ISect *is);
     28 
     29 #define KEY(k,d)	((d) ? (k)>>(32-(d)) : 0)
     30 
     31 static char IndexMagic[] = "venti index configuration";
     32 
     33 Index*
     34 initindex(char *name, ISect **sects, int n)
     35 {
     36 	IFile f;
     37 	Index *ix;
     38 	ISect *is;
     39 	u32int last, blocksize, tabsize;
     40 	int i;
     41 
     42 	if(n <= 0){
     43 fprint(2, "bad n\n");
     44 		seterr(EOk, "no index sections to initialize index");
     45 		return nil;
     46 	}
     47 	ix = MKZ(Index);
     48 	if(ix == nil){
     49 fprint(2, "no mem\n");
     50 		seterr(EOk, "can't initialize index: out of memory");
     51 		freeindex(ix);
     52 		return nil;
     53 	}
     54 
     55 	tabsize = sects[0]->tabsize;
     56 	if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
     57 		return nil;
     58 	if(parseindex(&f, ix) < 0){
     59 		freeifile(&f);
     60 		freeindex(ix);
     61 		return nil;
     62 	}
     63 	freeifile(&f);
     64 	if(namecmp(ix->name, name) != 0){
     65 		seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
     66 		return nil;
     67 	}
     68 	if(ix->nsects != n){
     69 		seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
     70 		freeindex(ix);
     71 		return nil;
     72 	}
     73 	ix->sects = sects;
     74 	last = 0;
     75 	blocksize = ix->blocksize;
     76 	for(i = 0; i < ix->nsects; i++){
     77 		is = sects[i];
     78 		if(namecmp(is->index, ix->name) != 0) {
     79 			seterr(ECorrupt, "%s: index name is %s, not %s",
     80 				sects[i]->part->name, is->index, ix->name);
     81 		bad:
     82 			freeindex(ix);
     83 			return nil;
     84 		}
     85 		if(is->blocksize != blocksize) {
     86 			seterr(ECorrupt, "%s: blocksize is %d, not %d",
     87 				sects[i]->part->name, (int)is->blocksize, (int)blocksize);
     88 			goto bad;
     89 		}
     90 		if(is->tabsize != tabsize) {
     91 			seterr(ECorrupt, "%s: tabsize is %d, not %d",
     92 				sects[i]->part->name, (int)is->tabsize, (int)tabsize);
     93 			goto bad;
     94 		}
     95 		if(namecmp(is->name, ix->smap[i].name) != 0) {
     96 			seterr(ECorrupt, "%s: name is %s, not %s",
     97 				sects[i]->part->name, is->name, ix->smap[i].name);
     98 			goto bad;
     99 		}
    100 		if(is->start != ix->smap[i].start || is->stop != ix->smap[i].stop) {
    101 			seterr(ECorrupt, "%s: range is %lld,%lld, not %lld,%lld",
    102 				sects[i]->part->name, is->start, is->stop,
    103 				ix->smap[i].start, ix->smap[i].stop);
    104 			goto bad;
    105 		}
    106 		if(is->start > is->stop) {
    107 			seterr(ECorrupt, "%s: invalid range %lld,%lld",
    108 				sects[i]->part->name, is->start, is->stop);
    109 			goto bad;
    110 		}
    111 		if(is->start != last || is->start > is->stop) {
    112 			seterr(ECorrupt, "%s: range %lld-%lld, but last section ended at %lld",
    113 				sects[i]->part->name, is->start, is->stop, last);
    114 			goto bad;
    115 		}
    116 		last = is->stop;
    117 	}
    118 	ix->tabsize = tabsize;
    119 	ix->buckets = last;
    120 
    121 	if(initindex1(ix) < 0){
    122 		freeindex(ix);
    123 		return nil;
    124 	}
    125 
    126 	ix->arenas = MKNZ(Arena*, ix->narenas);
    127 	if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
    128 		freeindex(ix);
    129 		return nil;
    130 	}
    131 
    132 	return ix;
    133 }
    134 
    135 static int
    136 initindex1(Index *ix)
    137 {
    138 	u32int buckets;
    139 
    140 	ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
    141 	buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
    142 	if(buckets != ix->buckets){
    143 		seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
    144 		return -1;
    145 	}
    146 
    147 	return 0;
    148 }
    149 
    150 int
    151 wbindex(Index *ix)
    152 {
    153 	Fmt f;
    154 	ZBlock *b;
    155 	int i;
    156 
    157 	if(ix->nsects == 0){
    158 		seterr(EOk, "no sections in index %s", ix->name);
    159 		return -1;
    160 	}
    161 	b = alloczblock(ix->tabsize, 1, ix->blocksize);
    162 	if(b == nil){
    163 		seterr(EOk, "can't write index configuration: out of memory");
    164 		return -1;
    165 	}
    166 	fmtzbinit(&f, b);
    167 	if(outputindex(&f, ix) < 0){
    168 		seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
    169 		freezblock(b);
    170 		return -1;
    171 	}
    172 	for(i = 0; i < ix->nsects; i++){
    173 		if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0
    174 		|| flushpart(ix->sects[i]->part) < 0){
    175 			seterr(EOk, "can't write index: %r");
    176 			freezblock(b);
    177 			return -1;
    178 		}
    179 	}
    180 	freezblock(b);
    181 
    182 	for(i = 0; i < ix->nsects; i++)
    183 		if(wbisect(ix->sects[i]) < 0)
    184 			return -1;
    185 
    186 	return 0;
    187 }
    188 
    189 /*
    190  * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
    191  * version, blocksize: u32int
    192  * name: max. ANameSize string
    193  * sections, arenas: AMap
    194  */
    195 int
    196 outputindex(Fmt *f, Index *ix)
    197 {
    198 	if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
    199 	|| outputamap(f, ix->smap, ix->nsects) < 0
    200 	|| outputamap(f, ix->amap, ix->narenas) < 0)
    201 		return -1;
    202 	return 0;
    203 }
    204 
    205 int
    206 parseindex(IFile *f, Index *ix)
    207 {
    208 	AMapN amn;
    209 	u32int v;
    210 	char *s;
    211 
    212 	/*
    213 	 * magic
    214 	 */
    215 	s = ifileline(f);
    216 	if(s == nil || strcmp(s, IndexMagic) != 0){
    217 		seterr(ECorrupt, "bad index magic for %s", f->name);
    218 		return -1;
    219 	}
    220 
    221 	/*
    222 	 * version
    223 	 */
    224 	if(ifileu32int(f, &v) < 0){
    225 		seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
    226 		return -1;
    227 	}
    228 	ix->version = v;
    229 	if(ix->version != IndexVersion){
    230 		seterr(ECorrupt, "bad version number in %s", f->name);
    231 		return -1;
    232 	}
    233 
    234 	/*
    235 	 * name
    236 	 */
    237 	if(ifilename(f, ix->name) < 0){
    238 		seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
    239 		return -1;
    240 	}
    241 
    242 	/*
    243 	 * block size
    244 	 */
    245 	if(ifileu32int(f, &v) < 0){
    246 		seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
    247 		return -1;
    248 	}
    249 	ix->blocksize = v;
    250 
    251 	if(parseamap(f, &amn) < 0)
    252 		return -1;
    253 	ix->nsects = amn.n;
    254 	ix->smap = amn.map;
    255 
    256 	if(parseamap(f, &amn) < 0)
    257 		return -1;
    258 	ix->narenas = amn.n;
    259 	ix->amap = amn.map;
    260 
    261 	return 0;
    262 }
    263 
    264 /*
    265  * initialize an entirely new index
    266  */
    267 Index *
    268 newindex(char *name, ISect **sects, int n)
    269 {
    270 	Index *ix;
    271 	AMap *smap;
    272 	u64int nb;
    273 	u32int div, ub, xb, start, stop, blocksize, tabsize;
    274 	int i, j;
    275 
    276 	if(n < 1){
    277 		seterr(EOk, "creating index with no index sections");
    278 		return nil;
    279 	}
    280 
    281 	/*
    282 	 * compute the total buckets available in the index,
    283 	 * and the total buckets which are used.
    284 	 */
    285 	nb = 0;
    286 	blocksize = sects[0]->blocksize;
    287 	tabsize = sects[0]->tabsize;
    288 	for(i = 0; i < n; i++){
    289 		/*
    290 		 * allow index, start, and stop to be set if index is correct
    291 		 * and start and stop are what we would have picked.
    292 		 * this allows calling fmtindex to reformat the index after
    293 		 * replacing a bad index section with a freshly formatted one.
    294 		 * start and stop are checked below.
    295 		 */
    296 		if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){
    297 			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
    298 			return nil;
    299 		}
    300 		if(blocksize != sects[i]->blocksize){
    301 			seterr(EOk, "%s has block size %d, but %s has %d",
    302 				sects[0]->part->name, (int)blocksize,
    303 				sects[i]->part->name, (int)sects[i]->blocksize);
    304 			return nil;
    305 		}
    306 		if(tabsize != sects[i]->tabsize){
    307 			seterr(EOk, "%s has table size %d, but %s has %d",
    308 				sects[0]->part->name, (int)tabsize,
    309 				sects[i]->part->name, (int)sects[i]->tabsize);
    310 			return nil;
    311 		}
    312 		nb += sects[i]->blocks;
    313 	}
    314 
    315 	/*
    316 	 * check for duplicate names
    317 	 */
    318 	for(i = 0; i < n; i++){
    319 		for(j = i + 1; j < n; j++){
    320 			if(namecmp(sects[i]->name, sects[j]->name) == 0){
    321 				seterr(EOk, "%s and %s both have section name %s",
    322 					sects[i]->part->name,
    323 					sects[j]->part->name,
    324 					sects[i]->name);
    325 				return nil;
    326 			}
    327 		}
    328 	}
    329 
    330 	if(nb >= ((u64int)1 << 32)){
    331 		fprint(2, "%s: index is 2^32 blocks or more; ignoring some of it\n",
    332 			argv0);
    333 		nb = ((u64int)1 << 32) - 1;
    334 	}
    335 
    336 	div = (((u64int)1 << 32) + nb - 1) / nb;
    337 	if(div < 100){
    338 		fprint(2, "%s: index divisor %d too coarse; "
    339 			"index larger than needed, ignoring some of it\n",
    340 			argv0, div);
    341 		div = 100;
    342 		nb = (((u64int)1 << 32) - 1) / (100 - 1);
    343 	}
    344 	ub = (((u64int)1 << 32) - 1) / div + 1;
    345 	if(ub > nb){
    346 		seterr(EBug, "index initialization math wrong");
    347 		return nil;
    348 	}
    349 	xb = nb - ub;
    350 
    351 	/*
    352 	 * initialize each of the index sections
    353 	 * and the section map table
    354 	 */
    355 	smap = MKNZ(AMap, n);
    356 	if(smap == nil){
    357 		seterr(EOk, "can't create new index: out of memory");
    358 		return nil;
    359 	}
    360 	start = 0;
    361 	for(i = 0; i < n; i++){
    362 		stop = start + sects[i]->blocks - xb / n;
    363 		if(i == n - 1)
    364 			stop = ub;
    365 
    366 		if(sects[i]->start != 0 || sects[i]->stop != 0)
    367 		if(sects[i]->start != start || sects[i]->stop != stop){
    368 			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
    369 			return nil;
    370 		}
    371 
    372 		sects[i]->start = start;
    373 		sects[i]->stop = stop;
    374 		namecp(sects[i]->index, name);
    375 
    376 		smap[i].start = start;
    377 		smap[i].stop = stop;
    378 		namecp(smap[i].name, sects[i]->name);
    379 		start = stop;
    380 	}
    381 
    382 	/*
    383 	 * initialize the index itself
    384 	 */
    385 	ix = MKZ(Index);
    386 	if(ix == nil){
    387 		seterr(EOk, "can't create new index: out of memory");
    388 		free(smap);
    389 		return nil;
    390 	}
    391 	ix->version = IndexVersion;
    392 	namecp(ix->name, name);
    393 	ix->sects = sects;
    394 	ix->smap = smap;
    395 	ix->nsects = n;
    396 	ix->blocksize = blocksize;
    397 	ix->buckets = ub;
    398 	ix->tabsize = tabsize;
    399 	ix->div = div;
    400 
    401 	if(initindex1(ix) < 0){
    402 		free(smap);
    403 		return nil;
    404 	}
    405 
    406 	return ix;
    407 }
    408 
    409 ISect*
    410 initisect(Part *part)
    411 {
    412 	ISect *is;
    413 	ZBlock *b;
    414 	int ok;
    415 
    416 	b = alloczblock(HeadSize, 0, 0);
    417 	if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
    418 		seterr(EAdmin, "can't read index section header: %r");
    419 		return nil;
    420 	}
    421 
    422 	is = MKZ(ISect);
    423 	if(is == nil){
    424 		freezblock(b);
    425 		return nil;
    426 	}
    427 	is->part = part;
    428 	ok = unpackisect(is, b->data);
    429 	freezblock(b);
    430 	if(ok < 0){
    431 		seterr(ECorrupt, "corrupted index section header: %r");
    432 		freeisect(is);
    433 		return nil;
    434 	}
    435 
    436 	if(is->version != ISectVersion1 && is->version != ISectVersion2){
    437 		seterr(EAdmin, "unknown index section version %d", is->version);
    438 		freeisect(is);
    439 		return nil;
    440 	}
    441 
    442 	return initisect1(is);
    443 }
    444 
    445 ISect*
    446 newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
    447 {
    448 	ISect *is;
    449 	u32int tabbase;
    450 
    451 	is = MKZ(ISect);
    452 	if(is == nil)
    453 		return nil;
    454 
    455 	namecp(is->name, name);
    456 	is->version = vers;
    457 	is->part = part;
    458 	is->blocksize = blocksize;
    459 	is->start = 0;
    460 	is->stop = 0;
    461 	tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
    462 	is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
    463 	is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
    464 	is->bucketmagic = 0;
    465 	if(is->version == ISectVersion2){
    466 		do{
    467 			is->bucketmagic = fastrand();
    468 		}while(is->bucketmagic==0);
    469 	}
    470 	is = initisect1(is);
    471 	if(is == nil)
    472 		return nil;
    473 
    474 	return is;
    475 }
    476 
    477 /*
    478  * initialize the computed parameters for an index
    479  */
    480 static ISect*
    481 initisect1(ISect *is)
    482 {
    483 	u64int v;
    484 
    485 	is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
    486 	is->blocklog = u64log2(is->blocksize);
    487 	if(is->blocksize != (1 << is->blocklog)){
    488 		seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
    489 		freeisect(is);
    490 		return nil;
    491 	}
    492 	partblocksize(is->part, is->blocksize);
    493 	is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
    494 	if(is->tabbase >= is->blockbase){
    495 		seterr(ECorrupt, "index section config table overlaps bucket storage");
    496 		freeisect(is);
    497 		return nil;
    498 	}
    499 	is->tabsize = is->blockbase - is->tabbase;
    500 	v = is->part->size & ~(u64int)(is->blocksize - 1);
    501 	if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
    502 		seterr(ECorrupt, "invalid blocks in index section %s", is->name);
    503 		/* ZZZ what to do?
    504 		freeisect(is);
    505 		return nil;
    506 		*/
    507 	}
    508 
    509 	if(is->stop - is->start > is->blocks){
    510 		seterr(ECorrupt, "index section overflows available space");
    511 		freeisect(is);
    512 		return nil;
    513 	}
    514 	if(is->start > is->stop){
    515 		seterr(ECorrupt, "invalid index section range");
    516 		freeisect(is);
    517 		return nil;
    518 	}
    519 
    520 	return is;
    521 }
    522 
    523 int
    524 wbisect(ISect *is)
    525 {
    526 	ZBlock *b;
    527 
    528 	b = alloczblock(HeadSize, 1, 0);
    529 	if(b == nil){
    530 		/* ZZZ set error? */
    531 		return -1;
    532 	}
    533 
    534 	if(packisect(is, b->data) < 0){
    535 		seterr(ECorrupt, "can't make index section header: %r");
    536 		freezblock(b);
    537 		return -1;
    538 	}
    539 	if(writepart(is->part, PartBlank, b->data, HeadSize) < 0 || flushpart(is->part) < 0){
    540 		seterr(EAdmin, "can't write index section header: %r");
    541 		freezblock(b);
    542 		return -1;
    543 	}
    544 	freezblock(b);
    545 
    546 	return 0;
    547 }
    548 
    549 void
    550 freeisect(ISect *is)
    551 {
    552 	if(is == nil)
    553 		return;
    554 	free(is);
    555 }
    556 
    557 void
    558 freeindex(Index *ix)
    559 {
    560 	int i;
    561 
    562 	if(ix == nil)
    563 		return;
    564 	free(ix->amap);
    565 	free(ix->arenas);
    566 	if(ix->sects)
    567 		for(i = 0; i < ix->nsects; i++)
    568 			freeisect(ix->sects[i]);
    569 	free(ix->sects);
    570 	free(ix->smap);
    571 	free(ix);
    572 }
    573 
    574 /*
    575  * write a clump to an available arena in the index
    576  * and return the address of the clump within the index.
    577 ZZZ question: should this distinguish between an arena
    578 filling up and real errors writing the clump?
    579  */
    580 u64int
    581 writeiclump(Index *ix, Clump *c, u8int *clbuf)
    582 {
    583 	u64int a;
    584 	int i;
    585 	IAddr ia;
    586 	AState as;
    587 
    588 	trace(TraceLump, "writeiclump enter");
    589 	qlock(&ix->writing);
    590 	for(i = ix->mapalloc; i < ix->narenas; i++){
    591 		a = writeaclump(ix->arenas[i], c, clbuf);
    592 		if(a != TWID64){
    593 			ix->mapalloc = i;
    594 			ia.addr = ix->amap[i].start + a;
    595 			ia.type = c->info.type;
    596 			ia.size = c->info.uncsize;
    597 			ia.blocks = (c->info.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
    598 			as.arena = ix->arenas[i];
    599 			as.aa = ia.addr;
    600 			as.stats = as.arena->memstats;
    601 			insertscore(c->info.score, &ia, IEDirty, &as);
    602 			qunlock(&ix->writing);
    603 			trace(TraceLump, "writeiclump exit");
    604 			return ia.addr;
    605 		}
    606 	}
    607 	qunlock(&ix->writing);
    608 
    609 	seterr(EAdmin, "no space left in arenas");
    610 	trace(TraceLump, "writeiclump failed");
    611 	return TWID64;
    612 }
    613 
    614 /*
    615  * convert an arena index to an relative arena address
    616  */
    617 Arena*
    618 amapitoa(Index *ix, u64int a, u64int *aa)
    619 {
    620 	int i, r, l, m;
    621 
    622 	l = 1;
    623 	r = ix->narenas - 1;
    624 	while(l <= r){
    625 		m = (r + l) / 2;
    626 		if(ix->amap[m].start <= a)
    627 			l = m + 1;
    628 		else
    629 			r = m - 1;
    630 	}
    631 	l--;
    632 
    633 	if(a > ix->amap[l].stop){
    634 for(i=0; i<ix->narenas; i++)
    635 	print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
    636 print("want arena %d for %llux\n", l, a);
    637 		seterr(ECrash, "unmapped address passed to amapitoa");
    638 		return nil;
    639 	}
    640 
    641 	if(ix->arenas[l] == nil){
    642 		seterr(ECrash, "unmapped arena selected in amapitoa");
    643 		return nil;
    644 	}
    645 	*aa = a - ix->amap[l].start;
    646 	return ix->arenas[l];
    647 }
    648 
    649 /*
    650  * convert an arena index to the bounds of the containing arena group.
    651  */
    652 Arena*
    653 amapitoag(Index *ix, u64int a, u64int *gstart, u64int *glimit, int *g)
    654 {
    655 	u64int aa;
    656 	Arena *arena;
    657 
    658 	arena = amapitoa(ix, a, &aa);
    659 	if(arena == nil)
    660 		return nil;
    661 	if(arenatog(arena, aa, gstart, glimit, g) < 0)
    662 		return nil;
    663 	*gstart += a - aa;
    664 	*glimit += a - aa;
    665 	return arena;
    666 }
    667 
    668 int
    669 iaddrcmp(IAddr *ia1, IAddr *ia2)
    670 {
    671 	return ia1->type != ia2->type
    672 		|| ia1->size != ia2->size
    673 		|| ia1->blocks != ia2->blocks
    674 		|| ia1->addr != ia2->addr;
    675 }
    676 
    677 /*
    678  * lookup the score in the partition
    679  *
    680  * nothing needs to be explicitly locked:
    681  * only static parts of ix are used, and
    682  * the bucket is locked by the DBlock lock.
    683  */
    684 int
    685 loadientry(Index *ix, u8int *score, int type, IEntry *ie)
    686 {
    687 	ISect *is;
    688 	DBlock *b;
    689 	IBucket ib;
    690 	u32int buck;
    691 	int h, ok;
    692 
    693 	ok = -1;
    694 
    695 	trace(TraceLump, "loadientry enter");
    696 
    697 	/*
    698 	qlock(&stats.lock);
    699 	stats.indexreads++;
    700 	qunlock(&stats.lock);
    701 	*/
    702 
    703 	if(!inbloomfilter(mainindex->bloom, score)){
    704 		trace(TraceLump, "loadientry bloomhit");
    705 		return -1;
    706 	}
    707 
    708 	trace(TraceLump, "loadientry loadibucket");
    709 	b = loadibucket(ix, score, &is, &buck, &ib);
    710 	trace(TraceLump, "loadientry loadedibucket");
    711 	if(b == nil)
    712 		return -1;
    713 
    714 	if(okibucket(&ib, is) < 0){
    715 		trace(TraceLump, "loadientry badbucket");
    716 		goto out;
    717 	}
    718 
    719 	h = bucklook(score, type, ib.data, ib.n);
    720 	if(h & 1){
    721 		h ^= 1;
    722 		trace(TraceLump, "loadientry found");
    723 		unpackientry(ie, &ib.data[h]);
    724 		ok = 0;
    725 		goto out;
    726 	}
    727 	trace(TraceLump, "loadientry notfound");
    728 	addstat(StatBloomFalseMiss, 1);
    729 out:
    730 	putdblock(b);
    731 	trace(TraceLump, "loadientry exit");
    732 	return ok;
    733 }
    734 
    735 int
    736 okibucket(IBucket *ib, ISect *is)
    737 {
    738 	if(ib->n <= is->buckmax)
    739 		return 0;
    740 
    741 	seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
    742 		ib->n, is->buckmax, is->start, is->stop);
    743 	return -1;
    744 }
    745 
    746 /*
    747  * look for score within data;
    748  * return 1 | byte index of matching index,
    749  * or 0 | index of least element > score
    750  */
    751 int
    752 bucklook(u8int *score, int otype, u8int *data, int n)
    753 {
    754 	int i, r, l, m, h, c, cc, type;
    755 
    756 	if(otype == -1)
    757 		type = -1;
    758 	else
    759 		type = vttodisktype(otype);
    760 	l = 0;
    761 	r = n - 1;
    762 	while(l <= r){
    763 		m = (r + l) >> 1;
    764 		h = m * IEntrySize;
    765 		for(i = 0; i < VtScoreSize; i++){
    766 			c = score[i];
    767 			cc = data[h + i];
    768 			if(c != cc){
    769 				if(c > cc)
    770 					l = m + 1;
    771 				else
    772 					r = m - 1;
    773 				goto cont;
    774 			}
    775 		}
    776 		cc = data[h + IEntryTypeOff];
    777 		if(type != cc && type != -1){
    778 			if(type > cc)
    779 				l = m + 1;
    780 			else
    781 				r = m - 1;
    782 			goto cont;
    783 		}
    784 		return h | 1;
    785 	cont:;
    786 	}
    787 
    788 	return l * IEntrySize;
    789 }
    790 
    791 /*
    792  * compare two IEntries; consistent with bucklook
    793  */
    794 int
    795 ientrycmp(const void *vie1, const void *vie2)
    796 {
    797 	u8int *ie1, *ie2;
    798 	int i, v1, v2;
    799 
    800 	ie1 = (u8int*)vie1;
    801 	ie2 = (u8int*)vie2;
    802 	for(i = 0; i < VtScoreSize; i++){
    803 		v1 = ie1[i];
    804 		v2 = ie2[i];
    805 		if(v1 != v2){
    806 			if(v1 < v2)
    807 				return -1;
    808 			return 1;
    809 		}
    810 	}
    811 	v1 = ie1[IEntryTypeOff];
    812 	v2 = ie2[IEntryTypeOff];
    813 	if(v1 != v2){
    814 		if(v1 < v2)
    815 			return -1;
    816 		return 1;
    817 	}
    818 	return 0;
    819 }
    820 
    821 /*
    822  * find the number of the index section holding bucket #buck
    823  */
    824 int
    825 indexsect0(Index *ix, u32int buck)
    826 {
    827 	int r, l, m;
    828 
    829 	l = 1;
    830 	r = ix->nsects - 1;
    831 	while(l <= r){
    832 		m = (r + l) >> 1;
    833 		if(ix->sects[m]->start <= buck)
    834 			l = m + 1;
    835 		else
    836 			r = m - 1;
    837 	}
    838 	return l - 1;
    839 }
    840 
    841 /*
    842  * load the index block at bucket #buck
    843  */
    844 static DBlock*
    845 loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
    846 {
    847 	ISect *is;
    848 	DBlock *b;
    849 
    850 	is = ix->sects[indexsect0(ix, buck)];
    851 	if(buck < is->start || is->stop <= buck){
    852 		seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
    853 		return nil;
    854 	}
    855 
    856 	buck -= is->start;
    857 	if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
    858 		return nil;
    859 
    860 	if(pis)
    861 		*pis = is;
    862 	if(pbuck)
    863 		*pbuck = buck;
    864 	if(ib)
    865 		unpackibucket(ib, b->data, is->bucketmagic);
    866 	return b;
    867 }
    868 
    869 /*
    870  * find the number of the index section holding score
    871  */
    872 int
    873 indexsect1(Index *ix, u8int *score)
    874 {
    875 	return indexsect0(ix, hashbits(score, 32) / ix->div);
    876 }
    877 
    878 /*
    879  * load the index block responsible for score.
    880  */
    881 static DBlock*
    882 loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
    883 {
    884 	return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
    885 }
    886 
    887 int
    888 indexsect(Index *ix, u8int *score)
    889 {
    890 	return indexsect1(ix, score);
    891 }
    892 
    893 DBlock*
    894 loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
    895 {
    896 	return loadibucket1(ix, score, pis, pbuck, ib);
    897 }