plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

file.c (23784B)


      1 /*
      2  * Manage tree of VtFiles stored in the block cache.
      3  *
      4  * The single point of truth for the info about the VtFiles themselves
      5  * is the block data.  Because of this, there is no explicit locking of
      6  * VtFile structures, and indeed there may be more than one VtFile
      7  * structure for a given Venti file.  They synchronize through the
      8  * block cache.
      9  *
     10  * This is a bit simpler than fossil because there are no epochs
     11  * or tags or anything else.  Just mutable local blocks and immutable
     12  * Venti blocks.
     13  */
     14 
     15 #include <u.h>
     16 #include <libc.h>
     17 #include <venti.h>
     18 
     19 #define MaxBlock (1UL<<31)
     20 
     21 static char ENotDir[] = "walk in non-directory";
     22 static char ETooBig[] = "file too big";
     23 /* static char EBadAddr[] = "bad address"; */
     24 static char ELabelMismatch[] = "label mismatch";
     25 
     26 static int	sizetodepth(uvlong s, int psize, int dsize);
     27 static VtBlock 	*fileload(VtFile *r, VtEntry *e);
     28 static int	shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
     29 static int	shrinksize(VtFile*, VtEntry*, uvlong);
     30 static int	growdepth(VtFile*, VtBlock*, VtEntry*, int);
     31 
     32 #define ISLOCKED(r)	((r)->b != nil)
     33 #define DEPTH(t)	((t)&VtTypeDepthMask)
     34 
     35 static VtFile *
     36 vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, u32int offset, int mode)
     37 {
     38 	int epb;
     39 	VtEntry e;
     40 	VtFile *r;
     41 
     42 	assert(p==nil || ISLOCKED(p));
     43 
     44 	if(p == nil){
     45 		assert(offset == 0);
     46 		epb = 1;
     47 	}else
     48 		epb = p->dsize / VtEntrySize;
     49 
     50 	if(b->type != VtDirType){
     51 		werrstr("bad block type %#uo", b->type);
     52 		return nil;
     53 	}
     54 
     55 	/*
     56 	 * a non-active entry is the only thing that
     57 	 * can legitimately happen here. all the others
     58 	 * get prints.
     59 	 */
     60 	if(vtentryunpack(&e, b->data, offset % epb) < 0){
     61 		fprint(2, "vtentryunpack failed: %r (%.*H)\n", VtEntrySize, b->data+VtEntrySize*(offset%epb));
     62 		return nil;
     63 	}
     64 	if(!(e.flags & VtEntryActive)){
     65 		werrstr("entry not active");
     66 		return nil;
     67 	}
     68 
     69 	if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
     70 		fprint(2, "depth %ud size %llud psize %lud dsize %lud\n",
     71 			DEPTH(e.type), e.size, e.psize, e.dsize);
     72 		werrstr("bad depth");
     73 		return nil;
     74 	}
     75 
     76 	r = vtmallocz(sizeof(VtFile));
     77 	r->c = c;
     78 	r->bsize = b->size;
     79 	r->mode = mode;
     80 	r->dsize = e.dsize;
     81 	r->psize = e.psize;
     82 	r->gen = e.gen;
     83 	r->dir = (e.type & VtTypeBaseMask) == VtDirType;
     84 	r->ref = 1;
     85 	r->parent = p;
     86 	if(p){
     87 		qlock(&p->lk);
     88 		assert(mode == VtOREAD || p->mode == VtORDWR);
     89 		p->ref++;
     90 		qunlock(&p->lk);
     91 	}else{
     92 		assert(b->addr != NilBlock);
     93 		r->local = 1;
     94 	}
     95 	memmove(r->score, b->score, VtScoreSize);
     96 	r->offset = offset;
     97 	r->epb = epb;
     98 
     99 	return r;
    100 }
    101 
    102 VtFile *
    103 vtfileroot(VtCache *c, u32int addr, int mode)
    104 {
    105 	VtFile *r;
    106 	VtBlock *b;
    107 
    108 	b = vtcachelocal(c, addr, VtDirType);
    109 	if(b == nil)
    110 		return nil;
    111 	r = vtfilealloc(c, b, nil, 0, mode);
    112 	vtblockput(b);
    113 	return r;
    114 }
    115 
    116 VtFile*
    117 vtfileopenroot(VtCache *c, VtEntry *e)
    118 {
    119 	VtBlock *b;
    120 	VtFile *f;
    121 
    122 	b = vtcacheallocblock(c, VtDirType, VtEntrySize);
    123 	if(b == nil)
    124 		return nil;
    125 
    126 	vtentrypack(e, b->data, 0);
    127 	f = vtfilealloc(c, b, nil, 0, VtORDWR);
    128 	vtblockput(b);
    129 	return f;
    130 }
    131 
    132 VtFile *
    133 vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
    134 {
    135 	VtEntry e;
    136 
    137 	memset(&e, 0, sizeof e);
    138 	e.flags = VtEntryActive;
    139 	e.psize = psize;
    140 	e.dsize = dsize;
    141 	e.type = type;
    142 	memmove(e.score, vtzeroscore, VtScoreSize);
    143 
    144 	return vtfileopenroot(c, &e);
    145 }
    146 
    147 VtFile *
    148 vtfileopen(VtFile *r, u32int offset, int mode)
    149 {
    150 	ulong bn;
    151 	VtBlock *b;
    152 
    153 	assert(ISLOCKED(r));
    154 	if(!r->dir){
    155 		werrstr(ENotDir);
    156 		return nil;
    157 	}
    158 
    159 	bn = offset/(r->dsize/VtEntrySize);
    160 
    161 	b = vtfileblock(r, bn, mode);
    162 	if(b == nil)
    163 		return nil;
    164 	r = vtfilealloc(r->c, b, r, offset, mode);
    165 	vtblockput(b);
    166 	return r;
    167 }
    168 
    169 VtFile*
    170 vtfilecreate(VtFile *r, int psize, int dsize, int type)
    171 {
    172 	return _vtfilecreate(r, -1, psize, dsize, type);
    173 }
    174 
    175 VtFile*
    176 _vtfilecreate(VtFile *r, int o, int psize, int dsize, int type)
    177 {
    178 	int i;
    179 	VtBlock *b;
    180 	u32int bn, size;
    181 	VtEntry e;
    182 	int epb;
    183 	VtFile *rr;
    184 	u32int offset;
    185 
    186 	assert(ISLOCKED(r));
    187 	assert(type == VtDirType || type == VtDataType);
    188 
    189 	if(!r->dir){
    190 		werrstr(ENotDir);
    191 		return nil;
    192 	}
    193 
    194 	epb = r->dsize/VtEntrySize;
    195 
    196 	size = vtfilegetdirsize(r);
    197 	/*
    198 	 * look at a random block to see if we can find an empty entry
    199 	 */
    200 	if(o == -1){
    201 		offset = lnrand(size+1);
    202 		offset -= offset % epb;
    203 	}else
    204 		offset = o;
    205 
    206 	/* try the given block and then try the last block */
    207 	for(;;){
    208 		bn = offset/epb;
    209 		b = vtfileblock(r, bn, VtORDWR);
    210 		if(b == nil)
    211 			return nil;
    212 		for(i=offset%r->epb; i<epb; i++){
    213 			if(vtentryunpack(&e, b->data, i) < 0)
    214 				continue;
    215 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
    216 				goto Found;
    217 		}
    218 		vtblockput(b);
    219 		if(offset == size){
    220 			fprint(2, "vtfilecreate: cannot happen\n");
    221 			werrstr("vtfilecreate: cannot happen");
    222 			return nil;
    223 		}
    224 		offset = size;
    225 	}
    226 
    227 Found:
    228 	/* found an entry - gen already set */
    229 	e.psize = psize;
    230 	e.dsize = dsize;
    231 	e.flags = VtEntryActive;
    232 	e.type = type;
    233 	e.size = 0;
    234 	memmove(e.score, vtzeroscore, VtScoreSize);
    235 	vtentrypack(&e, b->data, i);
    236 
    237 	offset = bn*epb + i;
    238 	if(offset+1 > size){
    239 		if(vtfilesetdirsize(r, offset+1) < 0){
    240 			vtblockput(b);
    241 			return nil;
    242 		}
    243 	}
    244 
    245 	rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
    246 	vtblockput(b);
    247 	return rr;
    248 }
    249 
    250 static int
    251 vtfilekill(VtFile *r, int doremove)
    252 {
    253 	VtEntry e;
    254 	VtBlock *b;
    255 
    256 	assert(ISLOCKED(r));
    257 	b = fileload(r, &e);
    258 	if(b == nil)
    259 		return -1;
    260 
    261 	if(doremove==0 && e.size == 0){
    262 		/* already truncated */
    263 		vtblockput(b);
    264 		return 0;
    265 	}
    266 
    267 	if(doremove){
    268 		if(e.gen != ~0)
    269 			e.gen++;
    270 		e.dsize = 0;
    271 		e.psize = 0;
    272 		e.flags = 0;
    273 	}else
    274 		e.flags &= ~VtEntryLocal;
    275 	e.type = 0;
    276 	e.size = 0;
    277 	memmove(e.score, vtzeroscore, VtScoreSize);
    278 	vtentrypack(&e, b->data, r->offset % r->epb);
    279 	vtblockput(b);
    280 
    281 	if(doremove){
    282 		vtfileunlock(r);
    283 		vtfileclose(r);
    284 	}
    285 
    286 	return 0;
    287 }
    288 
    289 int
    290 vtfileremove(VtFile *r)
    291 {
    292 	return vtfilekill(r, 1);
    293 }
    294 
    295 int
    296 vtfiletruncate(VtFile *r)
    297 {
    298 	return vtfilekill(r, 0);
    299 }
    300 
    301 uvlong
    302 vtfilegetsize(VtFile *r)
    303 {
    304 	VtEntry e;
    305 	VtBlock *b;
    306 
    307 	assert(ISLOCKED(r));
    308 	b = fileload(r, &e);
    309 	if(b == nil)
    310 		return ~(uvlong)0;
    311 	vtblockput(b);
    312 
    313 	return e.size;
    314 }
    315 
    316 static int
    317 shrinksize(VtFile *r, VtEntry *e, uvlong size)
    318 {
    319 	int i, depth, bsiz, type, isdir, ppb;
    320 	uvlong ptrsz;
    321 	uchar score[VtScoreSize];
    322 	VtBlock *b;
    323 
    324 	b = vtcacheglobal(r->c, e->score, e->type, r->dsize);
    325 	if(b == nil)
    326 		return -1;
    327 
    328 	ptrsz = e->dsize;
    329 	ppb = e->psize/VtScoreSize;
    330 	type = e->type;
    331 	depth = DEPTH(type);
    332 	for(i=0; i+1<depth; i++)
    333 		ptrsz *= ppb;
    334 
    335 	isdir = r->dir;
    336 	while(DEPTH(type) > 0){
    337 		if(b->addr == NilBlock){
    338 			/* not worth copying the block just so we can zero some of it */
    339 			vtblockput(b);
    340 			return -1;
    341 		}
    342 
    343 		/*
    344 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
    345 		 */
    346 
    347 		/* zero the pointers to unnecessary blocks */
    348 		i = (size+ptrsz-1)/ptrsz;
    349 		for(; i<ppb; i++)
    350 			memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
    351 
    352 		/* recurse (go around again) on the partially necessary block */
    353 		i = size/ptrsz;
    354 		size = size%ptrsz;
    355 		if(size == 0){
    356 			vtblockput(b);
    357 			return 0;
    358 		}
    359 		ptrsz /= ppb;
    360 		type--;
    361 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
    362 		vtblockput(b);
    363 		if(type == VtDataType || type == VtDirType)
    364 			bsiz = r->dsize;
    365 		else
    366 			bsiz = r->psize;
    367 		b = vtcacheglobal(r->c, score, type, bsiz);
    368 		if(b == nil)
    369 			return -1;
    370 	}
    371 
    372 	if(b->addr == NilBlock){
    373 		vtblockput(b);
    374 		return -1;
    375 	}
    376 
    377 	/*
    378 	 * No one ever truncates BtDir blocks.
    379 	 */
    380 	if(depth==0 && !isdir && e->dsize > size)
    381 		memset(b->data+size, 0, e->dsize-size);
    382 	vtblockput(b);
    383 	return 0;
    384 }
    385 
    386 int
    387 vtfilesetsize(VtFile *r, u64int size)
    388 {
    389 	int depth, edepth;
    390 	VtEntry e;
    391 	VtBlock *b;
    392 
    393 	assert(ISLOCKED(r));
    394 	if(size == 0)
    395 		return vtfiletruncate(r);
    396 
    397 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
    398 		werrstr(ETooBig);
    399 		return -1;
    400 	}
    401 
    402 	b = fileload(r, &e);
    403 	if(b == nil)
    404 		return -1;
    405 
    406 	/* quick out */
    407 	if(e.size == size){
    408 		vtblockput(b);
    409 		return 0;
    410 	}
    411 
    412 	depth = sizetodepth(size, e.psize, e.dsize);
    413 	edepth = DEPTH(e.type);
    414 	if(depth < edepth){
    415 		if(shrinkdepth(r, b, &e, depth) < 0){
    416 			vtblockput(b);
    417 			return -1;
    418 		}
    419 	}else if(depth > edepth){
    420 		if(growdepth(r, b, &e, depth) < 0){
    421 			vtblockput(b);
    422 			return -1;
    423 		}
    424 	}
    425 
    426 	if(size < e.size)
    427 		shrinksize(r, &e, size);
    428 
    429 	e.size = size;
    430 	vtentrypack(&e, b->data, r->offset % r->epb);
    431 	vtblockput(b);
    432 
    433 	return 0;
    434 }
    435 
    436 int
    437 vtfilesetdirsize(VtFile *r, u32int ds)
    438 {
    439 	uvlong size;
    440 	int epb;
    441 
    442 	assert(ISLOCKED(r));
    443 	epb = r->dsize/VtEntrySize;
    444 
    445 	size = (uvlong)r->dsize*(ds/epb);
    446 	size += VtEntrySize*(ds%epb);
    447 	return vtfilesetsize(r, size);
    448 }
    449 
    450 u32int
    451 vtfilegetdirsize(VtFile *r)
    452 {
    453 	ulong ds;
    454 	uvlong size;
    455 	int epb;
    456 
    457 	assert(ISLOCKED(r));
    458 	epb = r->dsize/VtEntrySize;
    459 
    460 	size = vtfilegetsize(r);
    461 	ds = epb*(size/r->dsize);
    462 	ds += (size%r->dsize)/VtEntrySize;
    463 	return ds;
    464 }
    465 
    466 int
    467 vtfilegetentry(VtFile *r, VtEntry *e)
    468 {
    469 	VtBlock *b;
    470 
    471 	assert(ISLOCKED(r));
    472 	b = fileload(r, e);
    473 	if(b == nil)
    474 		return -1;
    475 	vtblockput(b);
    476 
    477 	return 0;
    478 }
    479 
    480 int
    481 vtfilesetentry(VtFile *r, VtEntry *e)
    482 {
    483 	VtBlock *b;
    484 	VtEntry ee;
    485 
    486 	assert(ISLOCKED(r));
    487 	b = fileload(r, &ee);
    488 	if(b == nil)
    489 		return -1;
    490 	vtentrypack(e, b->data, r->offset % r->epb);
    491 	vtblockput(b);
    492 	return 0;
    493 }
    494 
    495 static VtBlock *
    496 blockwalk(VtFile *r, VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
    497 {
    498 	VtBlock *b;
    499 	int type, size;
    500 	uchar *score;
    501 
    502 	switch(p->type){
    503 	case VtDataType:
    504 		assert(0);
    505 	case VtDirType:
    506 		type = e->type;
    507 		score = e->score;
    508 		break;
    509 	default:
    510 		type = p->type - 1;
    511 		score = p->data+index*VtScoreSize;
    512 		break;
    513 	}
    514 /*print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type); */
    515 
    516 	if(type == VtDirType || type == VtDataType)
    517 		size = r->dsize;
    518 	else
    519 		size = r->psize;
    520 	if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
    521 		b = vtcacheallocblock(c, type, size);
    522 		if(b)
    523 			goto HaveCopy;
    524 	}else
    525 		b = vtcacheglobal(c, score, type, size);
    526 
    527 	if(b == nil || mode == VtOREAD)
    528 		return b;
    529 
    530 	if(vtglobaltolocal(b->score) != NilBlock)
    531 		return b;
    532 
    533 	/*
    534 	 * Copy on write.
    535 	 */
    536 	e->flags |= VtEntryLocal;
    537 
    538 	b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
    539 	if(b == nil)
    540 		return nil;
    541 
    542 HaveCopy:
    543 	if(p->type == VtDirType){
    544 		memmove(e->score, b->score, VtScoreSize);
    545 		vtentrypack(e, p->data, index);
    546 	}else{
    547 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
    548 	}
    549 	return b;
    550 }
    551 
    552 /*
    553  * Change the depth of the VtFile r.
    554  * The entry e for r is contained in block p.
    555  */
    556 static int
    557 growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
    558 {
    559 	VtBlock *b, *bb;
    560 
    561 	assert(ISLOCKED(r));
    562 	assert(depth <= VtPointerDepth);
    563 
    564 	b = vtcacheglobal(r->c, e->score, e->type, r->dsize);
    565 	if(b == nil)
    566 		return -1;
    567 
    568 	/*
    569 	 * Keep adding layers until we get to the right depth
    570 	 * or an error occurs.
    571 	 */
    572 	while(DEPTH(e->type) < depth){
    573 		bb = vtcacheallocblock(r->c, e->type+1, r->psize);
    574 		if(bb == nil)
    575 			break;
    576 		memmove(bb->data, b->score, VtScoreSize);
    577 		memmove(e->score, bb->score, VtScoreSize);
    578 		e->type++;
    579 		e->flags |= VtEntryLocal;
    580 		vtblockput(b);
    581 		b = bb;
    582 	}
    583 
    584 	vtentrypack(e, p->data, r->offset % r->epb);
    585 	vtblockput(b);
    586 
    587 	if(DEPTH(e->type) == depth)
    588 		return 0;
    589 	return -1;
    590 }
    591 
    592 static int
    593 shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
    594 {
    595 	VtBlock *b, *nb, *ob, *rb;
    596 
    597 	assert(ISLOCKED(r));
    598 	assert(depth <= VtPointerDepth);
    599 
    600 	rb = vtcacheglobal(r->c, e->score, e->type, r->psize);
    601 	if(rb == nil)
    602 		return -1;
    603 
    604 	/*
    605 	 * Walk down to the new root block.
    606 	 * We may stop early, but something is better than nothing.
    607 	 */
    608 
    609 	ob = nil;
    610 	b = rb;
    611 	for(; DEPTH(e->type) > depth; e->type--){
    612 		nb = vtcacheglobal(r->c, b->data, e->type-1, r->psize);
    613 		if(nb == nil)
    614 			break;
    615 		if(ob!=nil && ob!=rb)
    616 			vtblockput(ob);
    617 		ob = b;
    618 		b = nb;
    619 	}
    620 
    621 	if(b == rb){
    622 		vtblockput(rb);
    623 		return 0;
    624 	}
    625 
    626 	/*
    627 	 * Right now, e points at the root block rb, b is the new root block,
    628 	 * and ob points at b.  To update:
    629 	 *
    630 	 *	(i) change e to point at b
    631 	 *	(ii) zero the pointer ob -> b
    632 	 *	(iii) free the root block
    633 	 *
    634 	 * p (the block containing e) must be written before
    635 	 * anything else.
    636  	 */
    637 
    638 	/* (i) */
    639 	memmove(e->score, b->score, VtScoreSize);
    640 	vtentrypack(e, p->data, r->offset % r->epb);
    641 
    642 	/* (ii) */
    643 	memmove(ob->data, vtzeroscore, VtScoreSize);
    644 
    645 	/* (iii) */
    646 	vtblockput(rb);
    647 	if(ob!=nil && ob!=rb)
    648 		vtblockput(ob);
    649 	vtblockput(b);
    650 
    651 	if(DEPTH(e->type) == depth)
    652 		return 0;
    653 	return -1;
    654 }
    655 
    656 static int
    657 mkindices(VtEntry *e, u32int bn, int *index)
    658 {
    659 	int i, np;
    660 
    661 	memset(index, 0, (VtPointerDepth+1)*sizeof(int));
    662 
    663 	np = e->psize/VtScoreSize;
    664 	for(i=0; bn > 0; i++){
    665 		if(i >= VtPointerDepth){
    666 			werrstr("bad address 0x%lux", (ulong)bn);
    667 			return -1;
    668 		}
    669 		index[i] = bn % np;
    670 		bn /= np;
    671 	}
    672 	return i;
    673 }
    674 
    675 VtBlock *
    676 vtfileblock(VtFile *r, u32int bn, int mode)
    677 {
    678 	VtBlock *b, *bb;
    679 	int index[VtPointerDepth+1];
    680 	VtEntry e;
    681 	int i;
    682 	int m;
    683 
    684 	assert(ISLOCKED(r));
    685 	assert(bn != NilBlock);
    686 
    687 	b = fileload(r, &e);
    688 	if(b == nil)
    689 		return nil;
    690 
    691 	i = mkindices(&e, bn, index);
    692 	if(i < 0)
    693 		goto Err;
    694 	if(i > DEPTH(e.type)){
    695 		if(mode == VtOREAD){
    696 			werrstr("bad address 0x%lux", (ulong)bn);
    697 			goto Err;
    698 		}
    699 		index[i] = 0;
    700 		if(growdepth(r, b, &e, i) < 0)
    701 			goto Err;
    702 	}
    703 
    704 assert(b->type == VtDirType);
    705 
    706 	index[DEPTH(e.type)] = r->offset % r->epb;
    707 
    708 	/* mode for intermediate block */
    709 	m = mode;
    710 	if(m == VtOWRITE)
    711 		m = VtORDWR;
    712 
    713 	for(i=DEPTH(e.type); i>=0; i--){
    714 		bb = blockwalk(r, b, index[i], r->c, i==0 ? mode : m, &e);
    715 		if(bb == nil)
    716 			goto Err;
    717 		vtblockput(b);
    718 		b = bb;
    719 	}
    720 	b->pc = getcallerpc(&r);
    721 	return b;
    722 Err:
    723 	vtblockput(b);
    724 	return nil;
    725 }
    726 
    727 int
    728 vtfileblockscore(VtFile *r, u32int bn, uchar score[VtScoreSize])
    729 {
    730 	VtBlock *b, *bb;
    731 	int index[VtPointerDepth+1];
    732 	VtEntry e;
    733 	int i;
    734 
    735 	assert(ISLOCKED(r));
    736 	assert(bn != NilBlock);
    737 
    738 	b = fileload(r, &e);
    739 	if(b == nil)
    740 		return -1;
    741 
    742 	if(DEPTH(e.type) == 0){
    743 		memmove(score, e.score, VtScoreSize);
    744 		vtblockput(b);
    745 		return 0;
    746 	}
    747 
    748 	i = mkindices(&e, bn, index);
    749 	if(i < 0){
    750 		vtblockput(b);
    751 		return -1;
    752 	}
    753 	if(i > DEPTH(e.type)){
    754 		memmove(score, vtzeroscore, VtScoreSize);
    755 		vtblockput(b);
    756 		return 0;
    757 	}
    758 
    759 	index[DEPTH(e.type)] = r->offset % r->epb;
    760 
    761 	for(i=DEPTH(e.type); i>=1; i--){
    762 		bb = blockwalk(r, b, index[i], r->c, VtOREAD, &e);
    763 		if(bb == nil)
    764 			goto Err;
    765 		vtblockput(b);
    766 		b = bb;
    767 		if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
    768 			break;
    769 	}
    770 
    771 	memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
    772 	vtblockput(b);
    773 	return 0;
    774 
    775 Err:
    776 	vtblockput(b);
    777 	return -1;
    778 }
    779 
    780 void
    781 vtfileincref(VtFile *r)
    782 {
    783 	qlock(&r->lk);
    784 	r->ref++;
    785 	qunlock(&r->lk);
    786 }
    787 
    788 void
    789 vtfileclose(VtFile *r)
    790 {
    791 	if(r == nil)
    792 		return;
    793 	qlock(&r->lk);
    794 	r->ref--;
    795 	if(r->ref){
    796 		qunlock(&r->lk);
    797 		return;
    798 	}
    799 	assert(r->ref == 0);
    800 	qunlock(&r->lk);
    801 	if(r->parent)
    802 		vtfileclose(r->parent);
    803 	memset(r, ~0, sizeof(*r));
    804 	vtfree(r);
    805 }
    806 
    807 /*
    808  * Retrieve the block containing the entry for r.
    809  * If a snapshot has happened, we might need
    810  * to get a new copy of the block.  We avoid this
    811  * in the common case by caching the score for
    812  * the block and the last epoch in which it was valid.
    813  *
    814  * We use r->mode to tell the difference between active
    815  * file system VtFiles (VtORDWR) and VtFiles for the
    816  * snapshot file system (VtOREAD).
    817  */
    818 static VtBlock*
    819 fileloadblock(VtFile *r, int mode)
    820 {
    821 	char e[ERRMAX];
    822 	u32int addr;
    823 	VtBlock *b;
    824 
    825 	switch(r->mode){
    826 	default:
    827 		assert(0);
    828 	case VtORDWR:
    829 		assert(r->mode == VtORDWR);
    830 		if(r->local == 1){
    831 			b = vtcacheglobal(r->c, r->score, VtDirType, r->bsize);
    832 			if(b == nil)
    833 				return nil;
    834 			b->pc = getcallerpc(&r);
    835 			return b;
    836 		}
    837 		assert(r->parent != nil);
    838 		if(vtfilelock(r->parent, VtORDWR) < 0)
    839 			return nil;
    840 		b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
    841 		vtfileunlock(r->parent);
    842 		if(b == nil)
    843 			return nil;
    844 		memmove(r->score, b->score, VtScoreSize);
    845 		r->local = 1;
    846 		return b;
    847 
    848 	case VtOREAD:
    849 		if(mode == VtORDWR){
    850 			werrstr("read/write lock of read-only file");
    851 			return nil;
    852 		}
    853 		addr = vtglobaltolocal(r->score);
    854 		if(addr == NilBlock)
    855 			return vtcacheglobal(r->c, r->score, VtDirType, r->bsize);
    856 
    857 		b = vtcachelocal(r->c, addr, VtDirType);
    858 		if(b)
    859 			return b;
    860 
    861 		/*
    862 		 * If it failed because the epochs don't match, the block has been
    863 		 * archived and reclaimed.  Rewalk from the parent and get the
    864 		 * new pointer.  This can't happen in the VtORDWR case
    865 		 * above because blocks in the current epoch don't get
    866 		 * reclaimed.  The fact that we're VtOREAD means we're
    867 		 * a snapshot.  (Or else the file system is read-only, but then
    868 		 * the archiver isn't going around deleting blocks.)
    869 		 */
    870 		rerrstr(e, sizeof e);
    871 		if(strcmp(e, ELabelMismatch) == 0){
    872 			if(vtfilelock(r->parent, VtOREAD) < 0)
    873 				return nil;
    874 			b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
    875 			vtfileunlock(r->parent);
    876 			if(b){
    877 				fprint(2, "vtfilealloc: lost %V found %V\n",
    878 					r->score, b->score);
    879 				memmove(r->score, b->score, VtScoreSize);
    880 				return b;
    881 			}
    882 		}
    883 		return nil;
    884 	}
    885 }
    886 
    887 int
    888 vtfilelock(VtFile *r, int mode)
    889 {
    890 	VtBlock *b;
    891 
    892 	if(mode == -1)
    893 		mode = r->mode;
    894 
    895 	b = fileloadblock(r, mode);
    896 	if(b == nil)
    897 		return -1;
    898 	/*
    899 	 * The fact that we are holding b serves as the
    900 	 * lock entitling us to write to r->b.
    901 	 */
    902 	assert(r->b == nil);
    903 	r->b = b;
    904 	b->pc = getcallerpc(&r);
    905 	return 0;
    906 }
    907 
    908 /*
    909  * Lock two (usually sibling) VtFiles.  This needs special care
    910  * because the Entries for both vtFiles might be in the same block.
    911  * We also try to lock blocks in left-to-right order within the tree.
    912  */
    913 int
    914 vtfilelock2(VtFile *r, VtFile *rr, int mode)
    915 {
    916 	VtBlock *b, *bb;
    917 
    918 	if(rr == nil)
    919 		return vtfilelock(r, mode);
    920 
    921 	if(mode == -1)
    922 		mode = r->mode;
    923 
    924 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
    925 		b = fileloadblock(r, mode);
    926 		if(b == nil)
    927 			return -1;
    928 		vtblockduplock(b);
    929 		bb = b;
    930 	}else if(r->parent==rr->parent || r->offset > rr->offset){
    931 		bb = fileloadblock(rr, mode);
    932 		b = fileloadblock(r, mode);
    933 	}else{
    934 		b = fileloadblock(r, mode);
    935 		bb = fileloadblock(rr, mode);
    936 	}
    937 	if(b == nil || bb == nil){
    938 		if(b)
    939 			vtblockput(b);
    940 		if(bb)
    941 			vtblockput(bb);
    942 		return -1;
    943 	}
    944 
    945 	/*
    946 	 * The fact that we are holding b and bb serves
    947 	 * as the lock entitling us to write to r->b and rr->b.
    948 	 */
    949 	r->b = b;
    950 	rr->b = bb;
    951 	b->pc = getcallerpc(&r);
    952 	bb->pc = getcallerpc(&r);
    953 	return 0;
    954 }
    955 
    956 void
    957 vtfileunlock(VtFile *r)
    958 {
    959 	VtBlock *b;
    960 
    961 	if(r->b == nil){
    962 		fprint(2, "vtfileunlock: already unlocked\n");
    963 		abort();
    964 	}
    965 	b = r->b;
    966 	r->b = nil;
    967 	vtblockput(b);
    968 }
    969 
    970 static VtBlock*
    971 fileload(VtFile *r, VtEntry *e)
    972 {
    973 	VtBlock *b;
    974 
    975 	assert(ISLOCKED(r));
    976 	b = r->b;
    977 	if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
    978 		return nil;
    979 	vtblockduplock(b);
    980 	return b;
    981 }
    982 
    983 static int
    984 sizetodepth(uvlong s, int psize, int dsize)
    985 {
    986 	int np;
    987 	int d;
    988 
    989 	/* determine pointer depth */
    990 	np = psize/VtScoreSize;
    991 	s = (s + dsize - 1)/dsize;
    992 	for(d = 0; s > 1; d++)
    993 		s = (s + np - 1)/np;
    994 	return d;
    995 }
    996 
    997 long
    998 vtfileread(VtFile *f, void *data, long count, vlong offset)
    999 {
   1000 	int frag;
   1001 	VtBlock *b;
   1002 	VtEntry e;
   1003 
   1004 	assert(ISLOCKED(f));
   1005 
   1006 	vtfilegetentry(f, &e);
   1007 	if(count == 0)
   1008 		return 0;
   1009 	if(count < 0 || offset < 0){
   1010 		werrstr("vtfileread: bad offset or count");
   1011 		return -1;
   1012 	}
   1013 	if(offset >= e.size)
   1014 		return 0;
   1015 
   1016 	if(offset+count > e.size)
   1017 		count = e.size - offset;
   1018 
   1019 	frag = offset % e.dsize;
   1020 	if(frag+count > e.dsize)
   1021 		count = e.dsize - frag;
   1022 
   1023 	b = vtfileblock(f, offset/e.dsize, VtOREAD);
   1024 	if(b == nil)
   1025 		return -1;
   1026 
   1027 	memmove(data, b->data+frag, count);
   1028 	vtblockput(b);
   1029 	return count;
   1030 }
   1031 
   1032 static long
   1033 filewrite1(VtFile *f, void *data, long count, vlong offset)
   1034 {
   1035 	int frag, m;
   1036 	VtBlock *b;
   1037 	VtEntry e;
   1038 
   1039 	vtfilegetentry(f, &e);
   1040 	if(count < 0 || offset < 0){
   1041 		werrstr("vtfilewrite: bad offset or count");
   1042 		return -1;
   1043 	}
   1044 
   1045 	frag = offset % e.dsize;
   1046 	if(frag+count > e.dsize)
   1047 		count = e.dsize - frag;
   1048 
   1049 	m = VtORDWR;
   1050 	if(frag == 0 && count == e.dsize)
   1051 		m = VtOWRITE;
   1052 
   1053 	b = vtfileblock(f, offset/e.dsize, m);
   1054 	if(b == nil)
   1055 		return -1;
   1056 
   1057 	memmove(b->data+frag, data, count);
   1058 	if(m == VtOWRITE && frag+count < e.dsize)
   1059 		memset(b->data+frag+count, 0, e.dsize-frag-count);
   1060 
   1061 	if(offset+count > e.size){
   1062 		vtfilegetentry(f, &e);
   1063 		e.size = offset+count;
   1064 		vtfilesetentry(f, &e);
   1065 	}
   1066 
   1067 	vtblockput(b);
   1068 	return count;
   1069 }
   1070 
   1071 long
   1072 vtfilewrite(VtFile *f, void *data, long count, vlong offset)
   1073 {
   1074 	long tot, m;
   1075 
   1076 	assert(ISLOCKED(f));
   1077 
   1078 	tot = 0;
   1079 	m = 0;
   1080 	while(tot < count){
   1081 		m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
   1082 		if(m <= 0)
   1083 			break;
   1084 		tot += m;
   1085 	}
   1086 	if(tot==0)
   1087 		return m;
   1088 	return tot;
   1089 }
   1090 
   1091 static int
   1092 flushblock(VtCache *c, VtBlock *bb, uchar score[VtScoreSize], int ppb, int epb,
   1093 	int type)
   1094 {
   1095 	u32int addr;
   1096 	VtBlock *b;
   1097 	VtEntry e;
   1098 	int i;
   1099 
   1100 	addr = vtglobaltolocal(score);
   1101 	if(addr == NilBlock)
   1102 		return 0;
   1103 
   1104 	if(bb){
   1105 		b = bb;
   1106 		if(memcmp(b->score, score, VtScoreSize) != 0)
   1107 			abort();
   1108 	}else
   1109 		if((b = vtcachelocal(c, addr, type)) == nil)
   1110 			return -1;
   1111 
   1112 	switch(type){
   1113 	case VtDataType:
   1114 		break;
   1115 
   1116 	case VtDirType:
   1117 		for(i=0; i<epb; i++){
   1118 			if(vtentryunpack(&e, b->data, i) < 0)
   1119 				goto Err;
   1120 			if(!(e.flags&VtEntryActive))
   1121 				continue;
   1122 			if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
   1123 				e.type) < 0)
   1124 				goto Err;
   1125 			vtentrypack(&e, b->data, i);
   1126 		}
   1127 		break;
   1128 
   1129 	default:	/* VtPointerTypeX */
   1130 		for(i=0; i<ppb; i++){
   1131 			if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
   1132 				goto Err;
   1133 		}
   1134 		break;
   1135 	}
   1136 
   1137 	if(vtblockwrite(b) < 0)
   1138 		goto Err;
   1139 	memmove(score, b->score, VtScoreSize);
   1140 	if(b != bb)
   1141 		vtblockput(b);
   1142 	return 0;
   1143 
   1144 Err:
   1145 	if(b != bb)
   1146 		vtblockput(b);
   1147 	return -1;
   1148 }
   1149 
   1150 int
   1151 vtfileflush(VtFile *f)
   1152 {
   1153 	int ret;
   1154 	VtBlock *b;
   1155 	VtEntry e;
   1156 
   1157 	assert(ISLOCKED(f));
   1158 	b = fileload(f, &e);
   1159 	if(!(e.flags&VtEntryLocal)){
   1160 		vtblockput(b);
   1161 		return 0;
   1162 	}
   1163 
   1164 	ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
   1165 		e.type);
   1166 	if(ret < 0){
   1167 		vtblockput(b);
   1168 		return -1;
   1169 	}
   1170 
   1171 	vtentrypack(&e, b->data, f->offset % f->epb);
   1172 	vtblockput(b);
   1173 	return 0;
   1174 }
   1175 
   1176 int
   1177 vtfileflushbefore(VtFile *r, u64int offset)
   1178 {
   1179 	VtBlock *b, *bb;
   1180 	VtEntry e;
   1181 	int i, base, depth, ppb, epb, doflush;
   1182 	int index[VtPointerDepth+1], j, ret;
   1183 	VtBlock *bi[VtPointerDepth+2];
   1184 	uchar *score;
   1185 
   1186 	assert(ISLOCKED(r));
   1187 	if(offset == 0)
   1188 		return 0;
   1189 
   1190 	b = fileload(r, &e);
   1191 	if(b == nil)
   1192 		return -1;
   1193 
   1194 	/*
   1195 	 * compute path through tree for the last written byte and the next one.
   1196 	 */
   1197 	ret = -1;
   1198 	memset(bi, 0, sizeof bi);
   1199 	depth = DEPTH(e.type);
   1200 	bi[depth+1] = b;
   1201 	i = mkindices(&e, (offset-1)/e.dsize, index);
   1202 	if(i < 0)
   1203 		goto Err;
   1204 	if(i > depth)
   1205 		goto Err;
   1206 	ppb = e.psize / VtScoreSize;
   1207 	epb = e.dsize / VtEntrySize;
   1208 
   1209 	/*
   1210 	 * load the blocks along the last written byte
   1211 	 */
   1212 	index[depth] = r->offset % r->epb;
   1213 	for(i=depth; i>=0; i--){
   1214 		bb = blockwalk(r, b, index[i], r->c, VtORDWR, &e);
   1215 		if(bb == nil)
   1216 			goto Err;
   1217 		bi[i] = bb;
   1218 		b = bb;
   1219 	}
   1220 	ret = 0;
   1221 
   1222 	/*
   1223 	 * walk up the path from leaf to root, flushing anything that
   1224 	 * has been finished.
   1225 	 */
   1226 	base = e.type&~VtTypeDepthMask;
   1227 	for(i=0; i<=depth; i++){
   1228 		doflush = 0;
   1229 		if(i == 0){
   1230 			/* leaf: data or dir block */
   1231 			if(offset%e.dsize == 0)
   1232 				doflush = 1;
   1233 		}else{
   1234 			/*
   1235 			 * interior node: pointer blocks.
   1236 			 * specifically, b = bi[i] is a block whose index[i-1]'th entry
   1237 			 * points at bi[i-1].
   1238 			 */
   1239 			b = bi[i];
   1240 
   1241 			/*
   1242 			 * the index entries up to but not including index[i-1] point at
   1243 			 * finished blocks, so flush them for sure.
   1244 			 */
   1245 			for(j=0; j<index[i-1]; j++)
   1246 				if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
   1247 					goto Err;
   1248 
   1249 			/*
   1250 			 * if index[i-1] is the last entry in the block and is global
   1251 			 * (i.e. the kid is flushed), then we can flush this block.
   1252 			 */
   1253 			if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock)
   1254 				doflush = 1;
   1255 		}
   1256 		if(doflush){
   1257 			if(i == depth)
   1258 				score = e.score;
   1259 			else
   1260 				score = bi[i+1]->data+index[i]*VtScoreSize;
   1261 			if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
   1262 				goto Err;
   1263 		}
   1264 	}
   1265 
   1266 Err:
   1267 	/* top: entry.  do this always so that the score is up-to-date */
   1268 	vtentrypack(&e, bi[depth+1]->data, index[depth]);
   1269 	for(i=0; i<nelem(bi); i++)
   1270 		if(bi[i])
   1271 			vtblockput(bi[i]);
   1272 	return ret;
   1273 }