plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

file.c (40202B)


      1 #include "stdinc.h"
      2 #include "vac.h"
      3 #include "dat.h"
      4 #include "fns.h"
      5 #include "error.h"
      6 
      7 #define debug 0
      8 
      9 /*
     10  * Vac file system.  This is a simplified version of the same code in Fossil.
     11  *
     12  * The locking order in the tree is upward: a thread can hold the lock
     13  * for a VacFile and then acquire the lock of f->up (the parent),
     14  * but not vice-versa.
     15  *
     16  * A vac file is one or two venti files.  Plain data files are one venti file,
     17  * while directores are two: a venti data file containing traditional
     18  * directory entries, and a venti directory file containing venti
     19  * directory entries.  The traditional directory entries in the data file
     20  * contain integers indexing into the venti directory entry file.
     21  * It's a little complicated, but it makes the data usable by standard
     22  * tools like venti/copy.
     23  *
     24  */
     25 
     26 static int filemetaflush(VacFile*, char*);
     27 
     28 struct VacFile
     29 {
     30 	VacFs	*fs;	/* immutable */
     31 
     32 	/* meta data for file: protected by the lk in the parent */
     33 	int		ref;	/* holds this data structure up */
     34 
     35 	int		partial;	/* file was never really open */
     36 	int		removed;	/* file has been removed */
     37 	int		dirty;	/* dir is dirty with respect to meta data in block */
     38 	u32int	boff;		/* block offset within msource for this file's metadata */
     39 	VacDir	dir;		/* metadata for this file */
     40 	VacFile	*up;		/* parent file */
     41 	VacFile	*next;	/* sibling */
     42 
     43 	RWLock	lk;		/* lock for the following */
     44 	VtFile	*source;	/* actual data */
     45 	VtFile	*msource;	/* metadata for children in a directory */
     46 	VacFile	*down;	/* children */
     47 	int		mode;
     48 
     49 	uvlong	qidoffset;	/* qid offset */
     50 };
     51 
     52 static VacFile*
     53 filealloc(VacFs *fs)
     54 {
     55 	VacFile *f;
     56 
     57 	f = vtmallocz(sizeof(VacFile));
     58 	f->ref = 1;
     59 	f->fs = fs;
     60 	f->boff = NilBlock;
     61 	f->mode = fs->mode;
     62 	return f;
     63 }
     64 
     65 static void
     66 filefree(VacFile *f)
     67 {
     68 	vtfileclose(f->source);
     69 	vtfileclose(f->msource);
     70 	vdcleanup(&f->dir);
     71 	memset(f, ~0, sizeof *f);	/* paranoia */
     72 	vtfree(f);
     73 }
     74 
     75 static int
     76 chksource(VacFile *f)
     77 {
     78 	if(f->partial)
     79 		return 0;
     80 
     81 	if(f->source == nil
     82 	|| ((f->dir.mode & ModeDir) && f->msource == nil)){
     83 		werrstr(ERemoved);
     84 		return -1;
     85 	}
     86 	return 0;
     87 }
     88 
     89 static int
     90 filelock(VacFile *f)
     91 {
     92 	wlock(&f->lk);
     93 	if(chksource(f) < 0){
     94 		wunlock(&f->lk);
     95 		return -1;
     96 	}
     97 	return 0;
     98 }
     99 
    100 static void
    101 fileunlock(VacFile *f)
    102 {
    103 	wunlock(&f->lk);
    104 }
    105 
    106 static int
    107 filerlock(VacFile *f)
    108 {
    109 	rlock(&f->lk);
    110 	if(chksource(f) < 0){
    111 		runlock(&f->lk);
    112 		return -1;
    113 	}
    114 	return 0;
    115 }
    116 
    117 static void
    118 filerunlock(VacFile *f)
    119 {
    120 	runlock(&f->lk);
    121 }
    122 
    123 /*
    124  * The file metadata, like f->dir and f->ref,
    125  * are synchronized via the parent's lock.
    126  * This is why locking order goes up.
    127  */
    128 static void
    129 filemetalock(VacFile *f)
    130 {
    131 	assert(f->up != nil);
    132 	wlock(&f->up->lk);
    133 }
    134 
    135 static void
    136 filemetaunlock(VacFile *f)
    137 {
    138 	wunlock(&f->up->lk);
    139 }
    140 
    141 uvlong
    142 vacfilegetid(VacFile *f)
    143 {
    144 	/* immutable */
    145 	return f->qidoffset + f->dir.qid;
    146 }
    147 
    148 uvlong
    149 vacfilegetqidoffset(VacFile *f)
    150 {
    151 	return f->qidoffset;
    152 }
    153 
    154 ulong
    155 vacfilegetmcount(VacFile *f)
    156 {
    157 	ulong mcount;
    158 
    159 	filemetalock(f);
    160 	mcount = f->dir.mcount;
    161 	filemetaunlock(f);
    162 	return mcount;
    163 }
    164 
    165 ulong
    166 vacfilegetmode(VacFile *f)
    167 {
    168 	ulong mode;
    169 
    170 	filemetalock(f);
    171 	mode = f->dir.mode;
    172 	filemetaunlock(f);
    173 	return mode;
    174 }
    175 
    176 int
    177 vacfileisdir(VacFile *f)
    178 {
    179 	/* immutable */
    180 	return (f->dir.mode & ModeDir) != 0;
    181 }
    182 
    183 int
    184 vacfileisroot(VacFile *f)
    185 {
    186 	return f == f->fs->root;
    187 }
    188 
    189 /*
    190  * The files are reference counted, and while the reference
    191  * is bigger than zero, each file can be found in its parent's
    192  * f->down list (chains via f->next), so that multiple threads
    193  * end up sharing a VacFile* when referring to the same file.
    194  *
    195  * Each VacFile holds a reference to its parent.
    196  */
    197 VacFile*
    198 vacfileincref(VacFile *vf)
    199 {
    200 	filemetalock(vf);
    201 	assert(vf->ref > 0);
    202 	vf->ref++;
    203 	filemetaunlock(vf);
    204 	return vf;
    205 }
    206 
    207 int
    208 vacfiledecref(VacFile *f)
    209 {
    210 	VacFile *p, *q, **qq;
    211 
    212 	if(f->up == nil){
    213 		/* never linked in */
    214 		assert(f->ref == 1);
    215 		filefree(f);
    216 		return 0;
    217 	}
    218 
    219 	filemetalock(f);
    220 	f->ref--;
    221 	if(f->ref > 0){
    222 		filemetaunlock(f);
    223 		return -1;
    224 	}
    225 	assert(f->ref == 0);
    226 	assert(f->down == nil);
    227 
    228 	if(f->source && vtfilelock(f->source, -1) >= 0){
    229 		vtfileflush(f->source);
    230 		vtfileunlock(f->source);
    231 	}
    232 	if(f->msource && vtfilelock(f->msource, -1) >= 0){
    233 		vtfileflush(f->msource);
    234 		vtfileunlock(f->msource);
    235 	}
    236 
    237 	/*
    238 	 * Flush f's directory information to the cache.
    239 	 */
    240 	filemetaflush(f, nil);
    241 
    242 	p = f->up;
    243 	qq = &p->down;
    244 	for(q = *qq; q; q = *qq){
    245 		if(q == f)
    246 			break;
    247 		qq = &q->next;
    248 	}
    249 	assert(q != nil);
    250 	*qq = f->next;
    251 
    252 	filemetaunlock(f);
    253 	filefree(f);
    254 	vacfiledecref(p);
    255 	return 0;
    256 }
    257 
    258 
    259 /*
    260  * Construct a vacfile for the root of a vac tree, given the
    261  * venti file for the root information.  That venti file is a
    262  * directory file containing VtEntries for three more venti files:
    263  * the two venti files making up the root directory, and a
    264  * third venti file that would be the metadata half of the
    265  * "root's parent".
    266  *
    267  * Fossil generates slightly different vac files, due to a now
    268  * impossible-to-change bug, which contain a VtEntry
    269  * for just one venti file, that itself contains the expected
    270  * three directory entries.  Sigh.
    271  */
    272 VacFile*
    273 _vacfileroot(VacFs *fs, VtFile *r)
    274 {
    275 	int redirected;
    276 	char err[ERRMAX];
    277 	VtBlock *b;
    278 	VtFile *r0, *r1, *r2;
    279 	MetaBlock mb;
    280 	MetaEntry me;
    281 	VacFile *root, *mr;
    282 
    283 	redirected = 0;
    284 Top:
    285 	b = nil;
    286 	root = nil;
    287 	mr = nil;
    288 	r1 = nil;
    289 	r2 = nil;
    290 
    291 	if(vtfilelock(r, -1) < 0)
    292 		return nil;
    293 	r0 = vtfileopen(r, 0, fs->mode);
    294 	if(debug)
    295 		fprint(2, "r0 %p\n", r0);
    296 	if(r0 == nil)
    297 		goto Err;
    298 	r2 = vtfileopen(r, 2, fs->mode);
    299 	if(debug)
    300 		fprint(2, "r2 %p\n", r2);
    301 	if(r2 == nil){
    302 		/*
    303 		 * some vac files (e.g., from fossil)
    304 		 * have an extra layer of indirection.
    305 		 */
    306 		rerrstr(err, sizeof err);
    307 		if(!redirected && strstr(err, "not active")){
    308 			redirected = 1;
    309 			vtfileunlock(r);
    310 			r = r0;
    311 			goto Top;
    312 		}
    313 		goto Err;
    314 	}
    315 	r1 = vtfileopen(r, 1, fs->mode);
    316 	if(debug)
    317 		fprint(2, "r1 %p\n", r1);
    318 	if(r1 == nil)
    319 		goto Err;
    320 
    321 	mr = filealloc(fs);
    322 	mr->msource = r2;
    323 	r2 = nil;
    324 
    325 	root = filealloc(fs);
    326 	root->boff = 0;
    327 	root->up = mr;
    328 	root->source = r0;
    329 	r0 = nil;
    330 	root->msource = r1;
    331 	r1 = nil;
    332 
    333 	mr->down = root;
    334 	vtfileunlock(r);
    335 
    336 	if(vtfilelock(mr->msource, VtOREAD) < 0)
    337 		goto Err1;
    338 	b = vtfileblock(mr->msource, 0, VtOREAD);
    339 	vtfileunlock(mr->msource);
    340 	if(b == nil)
    341 		goto Err1;
    342 
    343 	if(mbunpack(&mb, b->data, mr->msource->dsize) < 0)
    344 		goto Err1;
    345 
    346 	meunpack(&me, &mb, 0);
    347 	if(vdunpack(&root->dir, &me) < 0)
    348 		goto Err1;
    349 	vtblockput(b);
    350 
    351 	return root;
    352 Err:
    353 	vtfileunlock(r);
    354 Err1:
    355 	vtblockput(b);
    356 	if(r0)
    357 		vtfileclose(r0);
    358 	if(r1)
    359 		vtfileclose(r1);
    360 	if(r2)
    361 		vtfileclose(r2);
    362 	if(mr)
    363 		filefree(mr);
    364 	if(root)
    365 		filefree(root);
    366 
    367 	return nil;
    368 }
    369 
    370 /*
    371  * Vac directories are a sequence of metablocks, each of which
    372  * contains a bunch of metaentries sorted by file name.
    373  * The whole sequence isn't sorted, though, so you still have
    374  * to look at every block to find a given name.
    375  * Dirlookup looks in f for an element name elem.
    376  * It returns a new VacFile with the dir, boff, and mode
    377  * filled in, but the sources (venti files) are not, and f is
    378  * not yet linked into the tree.  These details must be taken
    379  * care of by the caller.
    380  *
    381  * f must be locked, f->msource must not.
    382  */
    383 static VacFile*
    384 dirlookup(VacFile *f, char *elem)
    385 {
    386 	int i;
    387 	MetaBlock mb;
    388 	MetaEntry me;
    389 	VtBlock *b;
    390 	VtFile *meta;
    391 	VacFile *ff;
    392 	u32int bo, nb;
    393 
    394 	meta = f->msource;
    395 	b = nil;
    396 	if(vtfilelock(meta, -1) < 0)
    397 		return nil;
    398 	nb = (vtfilegetsize(meta)+meta->dsize-1)/meta->dsize;
    399 	for(bo=0; bo<nb; bo++){
    400 		b = vtfileblock(meta, bo, VtOREAD);
    401 		if(b == nil)
    402 			goto Err;
    403 		if(mbunpack(&mb, b->data, meta->dsize) < 0)
    404 			goto Err;
    405 		if(mbsearch(&mb, elem, &i, &me) >= 0){
    406 			ff = filealloc(f->fs);
    407 			if(vdunpack(&ff->dir, &me) < 0){
    408 				filefree(ff);
    409 				goto Err;
    410 			}
    411 			ff->qidoffset = f->qidoffset + ff->dir.qidoffset;
    412 			vtfileunlock(meta);
    413 			vtblockput(b);
    414 			ff->boff = bo;
    415 			ff->mode = f->mode;
    416 			return ff;
    417 		}
    418 		vtblockput(b);
    419 		b = nil;
    420 	}
    421 	werrstr(ENoFile);
    422 	/* fall through */
    423 Err:
    424 	vtfileunlock(meta);
    425 	vtblockput(b);
    426 	return nil;
    427 }
    428 
    429 /*
    430  * Open the venti file at offset in the directory f->source.
    431  * f is locked.
    432  */
    433 static VtFile *
    434 fileopensource(VacFile *f, u32int offset, u32int gen, int dir, uint mode)
    435 {
    436 	VtFile *r;
    437 
    438 	if((r = vtfileopen(f->source, offset, mode)) == nil)
    439 		return nil;
    440 	if(r == nil)
    441 		return nil;
    442 	if(r->gen != gen){
    443 		werrstr(ERemoved);
    444 		vtfileclose(r);
    445 		return nil;
    446 	}
    447 	if(r->dir != dir && r->mode != -1){
    448 		werrstr(EBadMeta);
    449 		vtfileclose(r);
    450 		return nil;
    451 	}
    452 	return r;
    453 }
    454 
    455 VacFile*
    456 vacfilegetparent(VacFile *f)
    457 {
    458 	if(vacfileisroot(f))
    459 		return vacfileincref(f);
    460 	return vacfileincref(f->up);
    461 }
    462 
    463 /*
    464  * Given an unlocked vacfile (directory) f,
    465  * return the vacfile named elem in f.
    466  * Interprets . and .. as a convenience to callers.
    467  */
    468 VacFile*
    469 vacfilewalk(VacFile *f, char *elem)
    470 {
    471 	VacFile *ff;
    472 
    473 	if(elem[0] == 0){
    474 		werrstr(EBadPath);
    475 		return nil;
    476 	}
    477 
    478 	if(!vacfileisdir(f)){
    479 		werrstr(ENotDir);
    480 		return nil;
    481 	}
    482 
    483 	if(strcmp(elem, ".") == 0)
    484 		return vacfileincref(f);
    485 
    486 	if(strcmp(elem, "..") == 0)
    487 		return vacfilegetparent(f);
    488 
    489 	if(filelock(f) < 0)
    490 		return nil;
    491 
    492 	for(ff = f->down; ff; ff=ff->next){
    493 		if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){
    494 			ff->ref++;
    495 			goto Exit;
    496 		}
    497 	}
    498 
    499 	ff = dirlookup(f, elem);
    500 	if(ff == nil)
    501 		goto Err;
    502 
    503 	if(ff->dir.mode & ModeSnapshot)
    504 		ff->mode = VtOREAD;
    505 
    506 	if(vtfilelock(f->source, f->mode) < 0)
    507 		goto Err;
    508 	if(ff->dir.mode & ModeDir){
    509 		ff->source = fileopensource(f, ff->dir.entry, ff->dir.gen, 1, ff->mode);
    510 		ff->msource = fileopensource(f, ff->dir.mentry, ff->dir.mgen, 0, ff->mode);
    511 		if(ff->source == nil || ff->msource == nil)
    512 			goto Err1;
    513 	}else{
    514 		ff->source = fileopensource(f, ff->dir.entry, ff->dir.gen, 0, ff->mode);
    515 		if(ff->source == nil)
    516 			goto Err1;
    517 	}
    518 	vtfileunlock(f->source);
    519 
    520 	/* link in and up parent ref count */
    521 	ff->next = f->down;
    522 	f->down = ff;
    523 	ff->up = f;
    524 	vacfileincref(f);
    525 Exit:
    526 	fileunlock(f);
    527 	return ff;
    528 
    529 Err1:
    530 	vtfileunlock(f->source);
    531 Err:
    532 	fileunlock(f);
    533 	if(ff != nil)
    534 		vacfiledecref(ff);
    535 	return nil;
    536 }
    537 
    538 /*
    539  * Open a path in the vac file system:
    540  * just walk each element one at a time.
    541  */
    542 VacFile*
    543 vacfileopen(VacFs *fs, char *path)
    544 {
    545 	VacFile *f, *ff;
    546 	char *p, elem[VtMaxStringSize], *opath;
    547 	int n;
    548 
    549 	f = fs->root;
    550 	vacfileincref(f);
    551 	opath = path;
    552 	while(*path != 0){
    553 		for(p = path; *p && *p != '/'; p++)
    554 			;
    555 		n = p - path;
    556 		if(n > 0){
    557 			if(n > VtMaxStringSize){
    558 				werrstr("%s: element too long", EBadPath);
    559 				goto Err;
    560 			}
    561 			memmove(elem, path, n);
    562 			elem[n] = 0;
    563 			ff = vacfilewalk(f, elem);
    564 			if(ff == nil){
    565 				werrstr("%.*s: %r", utfnlen(opath, p-opath), opath);
    566 				goto Err;
    567 			}
    568 			vacfiledecref(f);
    569 			f = ff;
    570 		}
    571 		if(*p == '/')
    572 			p++;
    573 		path = p;
    574 	}
    575 	return f;
    576 Err:
    577 	vacfiledecref(f);
    578 	return nil;
    579 }
    580 
    581 /*
    582  * Extract the score for the bn'th block in f.
    583  */
    584 int
    585 vacfileblockscore(VacFile *f, u32int bn, u8int *score)
    586 {
    587 	VtFile *s;
    588 	uvlong size;
    589 	int dsize, ret;
    590 
    591 	ret = -1;
    592 	if(filerlock(f) < 0)
    593 		return -1;
    594 	if(vtfilelock(f->source, VtOREAD) < 0)
    595 		goto out;
    596 
    597 	s = f->source;
    598 	dsize = s->dsize;
    599 	size = vtfilegetsize(s);
    600 	if((uvlong)bn*dsize >= size)
    601 		goto out1;
    602 	ret = vtfileblockscore(f->source, bn, score);
    603 
    604 out1:
    605 	vtfileunlock(f->source);
    606 out:
    607 	filerunlock(f);
    608 	return ret;
    609 }
    610 
    611 /*
    612  * Read data from f.
    613  */
    614 int
    615 vacfileread(VacFile *f, void *buf, int cnt, vlong offset)
    616 {
    617 	int n;
    618 
    619 	if(offset < 0){
    620 		werrstr(EBadOffset);
    621 		return -1;
    622 	}
    623 	if(filerlock(f) < 0)
    624 		return -1;
    625 	if(vtfilelock(f->source, VtOREAD) < 0){
    626 		filerunlock(f);
    627 		return -1;
    628 	}
    629 	n = vtfileread(f->source, buf, cnt, offset);
    630 	vtfileunlock(f->source);
    631 	filerunlock(f);
    632 	return n;
    633 }
    634 
    635 static int
    636 getentry(VtFile *f, VtEntry *e)
    637 {
    638 	if(vtfilelock(f, VtOREAD) < 0)
    639 		return -1;
    640 	if(vtfilegetentry(f, e) < 0){
    641 		vtfileunlock(f);
    642 		return -1;
    643 	}
    644 	vtfileunlock(f);
    645 	if(vtglobaltolocal(e->score) != NilBlock){
    646 		werrstr("internal error - data not on venti");
    647 		return -1;
    648 	}
    649 	return 0;
    650 }
    651 
    652 /*
    653  * Get the VtEntries for the data contained in f.
    654  */
    655 int
    656 vacfilegetentries(VacFile *f, VtEntry *e, VtEntry *me)
    657 {
    658 	if(filerlock(f) < 0)
    659 		return -1;
    660 	if(e && getentry(f->source, e) < 0){
    661 		filerunlock(f);
    662 		return -1;
    663 	}
    664 	if(me){
    665 		if(f->msource == nil)
    666 			memset(me, 0, sizeof *me);
    667 		else if(getentry(f->msource, me) < 0){
    668 			filerunlock(f);
    669 			return -1;
    670 		}
    671 	}
    672 	filerunlock(f);
    673 	return 0;
    674 }
    675 
    676 /*
    677  * Get the file's size.
    678  */
    679 int
    680 vacfilegetsize(VacFile *f, uvlong *size)
    681 {
    682 	if(filerlock(f) < 0)
    683 		return -1;
    684 	if(vtfilelock(f->source, VtOREAD) < 0){
    685 		filerunlock(f);
    686 		return -1;
    687 	}
    688 	*size = vtfilegetsize(f->source);
    689 	vtfileunlock(f->source);
    690 	filerunlock(f);
    691 
    692 	return 0;
    693 }
    694 
    695 /*
    696  * Directory reading.
    697  *
    698  * A VacDirEnum is a buffer containing directory entries.
    699  * Directory entries contain malloced strings and need to
    700  * be cleaned up with vdcleanup.  The invariant in the
    701  * VacDirEnum is that the directory entries between
    702  * vde->i and vde->n are owned by the vde and need to
    703  * be cleaned up if it is closed.  Those from 0 up to vde->i
    704  * have been handed to the reader, and the reader must
    705  * take care of calling vdcleanup as appropriate.
    706  */
    707 VacDirEnum*
    708 vdeopen(VacFile *f)
    709 {
    710 	VacDirEnum *vde;
    711 	VacFile *p;
    712 
    713 	if(!vacfileisdir(f)){
    714 		werrstr(ENotDir);
    715 		return nil;
    716 	}
    717 
    718 	/*
    719 	 * There might be changes to this directory's children
    720 	 * that have not been flushed out into the cache yet.
    721 	 * Those changes are only available if we look at the
    722 	 * VacFile structures directory.  But the directory reader
    723 	 * is going to read the cache blocks directly, so update them.
    724 	 */
    725 	if(filelock(f) < 0)
    726 		return nil;
    727 	for(p=f->down; p; p=p->next)
    728 		filemetaflush(p, nil);
    729 	fileunlock(f);
    730 
    731 	vde = vtmallocz(sizeof(VacDirEnum));
    732 	vde->file = vacfileincref(f);
    733 
    734 	return vde;
    735 }
    736 
    737 /*
    738  * Figure out the size of the directory entry at offset.
    739  * The rest of the metadata is kept in the data half,
    740  * but since venti has to track the data size anyway,
    741  * we just use that one and avoid updating the directory
    742  * each time the file size changes.
    743  */
    744 static int
    745 direntrysize(VtFile *s, ulong offset, ulong gen, uvlong *size)
    746 {
    747 	VtBlock *b;
    748 	ulong bn;
    749 	VtEntry e;
    750 	int epb;
    751 
    752 	epb = s->dsize/VtEntrySize;
    753 	bn = offset/epb;
    754 	offset -= bn*epb;
    755 
    756 	b = vtfileblock(s, bn, VtOREAD);
    757 	if(b == nil)
    758 		goto Err;
    759 	if(vtentryunpack(&e, b->data, offset) < 0)
    760 		goto Err;
    761 
    762 	/* dangling entries are returned as zero size */
    763 	if(!(e.flags & VtEntryActive) || e.gen != gen)
    764 		*size = 0;
    765 	else
    766 		*size = e.size;
    767 	vtblockput(b);
    768 	return 0;
    769 
    770 Err:
    771 	vtblockput(b);
    772 	return -1;
    773 }
    774 
    775 /*
    776  * Fill in vde with a new batch of directory entries.
    777  */
    778 static int
    779 vdefill(VacDirEnum *vde)
    780 {
    781 	int i, n;
    782 	VtFile *meta, *source;
    783 	MetaBlock mb;
    784 	MetaEntry me;
    785 	VacFile *f;
    786 	VtBlock *b;
    787 	VacDir *de;
    788 
    789 	/* clean up first */
    790 	for(i=vde->i; i<vde->n; i++)
    791 		vdcleanup(vde->buf+i);
    792 	vtfree(vde->buf);
    793 	vde->buf = nil;
    794 	vde->i = 0;
    795 	vde->n = 0;
    796 
    797 	f = vde->file;
    798 
    799 	source = f->source;
    800 	meta = f->msource;
    801 
    802 	b = vtfileblock(meta, vde->boff, VtOREAD);
    803 	if(b == nil)
    804 		goto Err;
    805 	if(mbunpack(&mb, b->data, meta->dsize) < 0)
    806 		goto Err;
    807 
    808 	n = mb.nindex;
    809 	vde->buf = vtmalloc(n * sizeof(VacDir));
    810 
    811 	for(i=0; i<n; i++){
    812 		de = vde->buf + i;
    813 		meunpack(&me, &mb, i);
    814 		if(vdunpack(de, &me) < 0)
    815 			goto Err;
    816 		vde->n++;
    817 		if(!(de->mode & ModeDir))
    818 		if(direntrysize(source, de->entry, de->gen, &de->size) < 0)
    819 			goto Err;
    820 	}
    821 	vde->boff++;
    822 	vtblockput(b);
    823 	return 0;
    824 Err:
    825 	vtblockput(b);
    826 	return -1;
    827 }
    828 
    829 /*
    830  * Read a single directory entry from vde into de.
    831  * Returns -1 on error, 0 on EOF, and 1 on success.
    832  * When it returns 1, it becomes the caller's responsibility
    833  * to call vdcleanup(de) to free the strings contained
    834  * inside, or else to call vdunread to give it back.
    835  */
    836 int
    837 vderead(VacDirEnum *vde, VacDir *de)
    838 {
    839 	int ret;
    840 	VacFile *f;
    841 	u32int nb;
    842 
    843 	f = vde->file;
    844 	if(filerlock(f) < 0)
    845 		return -1;
    846 
    847 	if(vtfilelock2(f->source, f->msource, VtOREAD) < 0){
    848 		filerunlock(f);
    849 		return -1;
    850 	}
    851 
    852 	nb = (vtfilegetsize(f->msource)+f->msource->dsize-1)/f->msource->dsize;
    853 
    854 	while(vde->i >= vde->n){
    855 		if(vde->boff >= nb){
    856 			ret = 0;
    857 			goto Return;
    858 		}
    859 		if(vdefill(vde) < 0){
    860 			ret = -1;
    861 			goto Return;
    862 		}
    863 	}
    864 
    865 	memmove(de, vde->buf + vde->i, sizeof(VacDir));
    866 	vde->i++;
    867 	ret = 1;
    868 
    869 Return:
    870 	vtfileunlock(f->source);
    871 	vtfileunlock(f->msource);
    872 	filerunlock(f);
    873 
    874 	return ret;
    875 }
    876 
    877 /*
    878  * "Unread" the last directory entry that was read,
    879  * so that the next vderead will return the same one.
    880  * If the caller calls vdeunread(vde) it should not call
    881  * vdcleanup on the entry being "unread".
    882  */
    883 int
    884 vdeunread(VacDirEnum *vde)
    885 {
    886 	if(vde->i > 0){
    887 		vde->i--;
    888 		return 0;
    889 	}
    890 	return -1;
    891 }
    892 
    893 /*
    894  * Close the enumerator.
    895  */
    896 void
    897 vdeclose(VacDirEnum *vde)
    898 {
    899 	int i;
    900 	if(vde == nil)
    901 		return;
    902 	/* free the strings */
    903 	for(i=vde->i; i<vde->n; i++)
    904 		vdcleanup(vde->buf+i);
    905 	vtfree(vde->buf);
    906 	vacfiledecref(vde->file);
    907 	vtfree(vde);
    908 }
    909 
    910 
    911 /*
    912  * On to mutation.  If the vac file system has been opened
    913  * read-write, then the files and directories can all be edited.
    914  * Changes are kept in the in-memory cache until flushed out
    915  * to venti, so we must be careful to explicitly flush data
    916  * that we're not likely to modify again.
    917  *
    918  * Each VacFile has its own copy of its VacDir directory entry
    919  * in f->dir, but otherwise the cache is the authoratative source
    920  * for data.  Thus, for the most part, it suffices if we just
    921  * call vtfileflushbefore and vtfileflush when we modify things.
    922  * There are a few places where we have to remember to write
    923  * changed VacDirs back into the cache.  If f->dir *is* out of sync,
    924  * then f->dirty should be set.
    925  *
    926  * The metadata in a directory is, to venti, a plain data file,
    927  * but as mentioned above it is actually a sequence of
    928  * MetaBlocks that contain sorted lists of VacDir entries.
    929  * The filemetaxxx routines manipulate that stream.
    930  */
    931 
    932 /*
    933  * Find space in fp for the directory entry dir (not yet written to disk)
    934  * and write it to disk, returning NilBlock on failure,
    935  * or the block number on success.
    936  *
    937  * Start is a suggested block number to try.
    938  * The caller must have filemetalock'ed f and have
    939  * vtfilelock'ed f->up->msource.
    940  */
    941 static u32int
    942 filemetaalloc(VacFile *fp, VacDir *dir, u32int start)
    943 {
    944 	u32int nb, bo;
    945 	VtBlock *b;
    946 	MetaBlock mb;
    947 	int nn;
    948 	uchar *p;
    949 	int i, n;
    950 	MetaEntry me;
    951 	VtFile *ms;
    952 
    953 	ms = fp->msource;
    954 	n = vdsize(dir, VacDirVersion);
    955 
    956 	/* Look for a block with room for a new entry of size n. */
    957 	nb = (vtfilegetsize(ms)+ms->dsize-1)/ms->dsize;
    958 	if(start == NilBlock){
    959 		if(nb > 0)
    960 			start = nb - 1;
    961 		else
    962 			start = 0;
    963 	}
    964 
    965 	if(start > nb)
    966 		start = nb;
    967 	for(bo=start; bo<nb; bo++){
    968 		if((b = vtfileblock(ms, bo, VtOREAD)) == nil)
    969 			goto Err;
    970 		if(mbunpack(&mb, b->data, ms->dsize) < 0)
    971 			goto Err;
    972 		nn = (mb.maxsize*FullPercentage/100) - mb.size + mb.free;
    973 		if(n <= nn && mb.nindex < mb.maxindex){
    974 			/* reopen for writing */
    975 			vtblockput(b);
    976 			if((b = vtfileblock(ms, bo, VtORDWR)) == nil)
    977 				goto Err;
    978 			mbunpack(&mb, b->data, ms->dsize);
    979 			goto Found;
    980 		}
    981 		vtblockput(b);
    982 	}
    983 
    984 	/* No block found, extend the file by one metablock. */
    985 	vtfileflushbefore(ms, nb*(uvlong)ms->dsize);
    986 	if((b = vtfileblock(ms, nb, VtORDWR)) == nil)
    987 		goto Err;
    988 	vtfilesetsize(ms, (nb+1)*ms->dsize);
    989 	mbinit(&mb, b->data, ms->dsize, ms->dsize/BytesPerEntry);
    990 
    991 Found:
    992 	/* Now we have a block; allocate space to write the entry. */
    993 	p = mballoc(&mb, n);
    994 	if(p == nil){
    995 		/* mballoc might have changed block */
    996 		mbpack(&mb);
    997 		werrstr(EBadMeta);
    998 		goto Err;
    999 	}
   1000 
   1001 	/* Figure out where to put the index entry, and write it. */
   1002 	mbsearch(&mb, dir->elem, &i, &me);
   1003 	assert(me.p == nil);	/* not already there */
   1004 	me.p = p;
   1005 	me.size = n;
   1006 	vdpack(dir, &me, VacDirVersion);
   1007 	mbinsert(&mb, i, &me);
   1008 	mbpack(&mb);
   1009 	vtblockput(b);
   1010 	return bo;
   1011 
   1012 Err:
   1013 	vtblockput(b);
   1014 	return NilBlock;
   1015 }
   1016 
   1017 /*
   1018  * Update f's directory entry in the block cache.
   1019  * We look for the directory entry by name;
   1020  * if we're trying to rename the file, oelem is the old name.
   1021  *
   1022  * Assumes caller has filemetalock'ed f.
   1023  */
   1024 static int
   1025 filemetaflush(VacFile *f, char *oelem)
   1026 {
   1027 	int i, n;
   1028 	MetaBlock mb;
   1029 	MetaEntry me, me2;
   1030 	VacFile *fp;
   1031 	VtBlock *b;
   1032 	u32int bo;
   1033 
   1034 	if(!f->dirty)
   1035 		return 0;
   1036 
   1037 	if(oelem == nil)
   1038 		oelem = f->dir.elem;
   1039 
   1040 	/*
   1041 	 * Locate f's old metadata in the parent's metadata file.
   1042 	 * We know which block it was in, but not exactly where
   1043 	 * in the block.
   1044 	 */
   1045 	fp = f->up;
   1046 	if(vtfilelock(fp->msource, -1) < 0)
   1047 		return -1;
   1048 	/* can happen if source is clri'ed out from under us */
   1049 	if(f->boff == NilBlock)
   1050 		goto Err1;
   1051 	b = vtfileblock(fp->msource, f->boff, VtORDWR);
   1052 	if(b == nil)
   1053 		goto Err1;
   1054 	if(mbunpack(&mb, b->data, fp->msource->dsize) < 0)
   1055 		goto Err;
   1056 	if(mbsearch(&mb, oelem, &i, &me) < 0)
   1057 		goto Err;
   1058 
   1059 	/*
   1060 	 * Check whether we can resize the entry and keep it
   1061 	 * in this block.
   1062 	 */
   1063 	n = vdsize(&f->dir, VacDirVersion);
   1064 	if(mbresize(&mb, &me, n) >= 0){
   1065 		/* Okay, can be done without moving to another block. */
   1066 
   1067 		/* Remove old data */
   1068 		mbdelete(&mb, i, &me);
   1069 
   1070 		/* Find new location if renaming */
   1071 		if(strcmp(f->dir.elem, oelem) != 0)
   1072 			mbsearch(&mb, f->dir.elem, &i, &me2);
   1073 
   1074 		/* Pack new data into new location. */
   1075 		vdpack(&f->dir, &me, VacDirVersion);
   1076 vdunpack(&f->dir, &me);
   1077 		mbinsert(&mb, i, &me);
   1078 		mbpack(&mb);
   1079 
   1080 		/* Done */
   1081 		vtblockput(b);
   1082 		vtfileunlock(fp->msource);
   1083 		f->dirty = 0;
   1084 		return 0;
   1085 	}
   1086 
   1087 	/*
   1088 	 * The entry must be moved to another block.
   1089 	 * This can only really happen on renames that
   1090 	 * make the name very long.
   1091 	 */
   1092 
   1093 	/* Allocate a spot in a new block. */
   1094 	if((bo = filemetaalloc(fp, &f->dir, f->boff+1)) == NilBlock){
   1095 		/* mbresize above might have modified block */
   1096 		mbpack(&mb);
   1097 		goto Err;
   1098 	}
   1099 	f->boff = bo;
   1100 
   1101 	/* Now we're committed.  Delete entry in old block. */
   1102 	mbdelete(&mb, i, &me);
   1103 	mbpack(&mb);
   1104 	vtblockput(b);
   1105 	vtfileunlock(fp->msource);
   1106 
   1107 	f->dirty = 0;
   1108 	return 0;
   1109 
   1110 Err:
   1111 	vtblockput(b);
   1112 Err1:
   1113 	vtfileunlock(fp->msource);
   1114 	return -1;
   1115 }
   1116 
   1117 /*
   1118  * Remove the directory entry for f.
   1119  */
   1120 static int
   1121 filemetaremove(VacFile *f)
   1122 {
   1123 	VtBlock *b;
   1124 	MetaBlock mb;
   1125 	MetaEntry me;
   1126 	int i;
   1127 	VacFile *fp;
   1128 
   1129 	b = nil;
   1130 	fp = f->up;
   1131 	filemetalock(f);
   1132 
   1133 	if(vtfilelock(fp->msource, VtORDWR) < 0)
   1134 		goto Err;
   1135 	b = vtfileblock(fp->msource, f->boff, VtORDWR);
   1136 	if(b == nil)
   1137 		goto Err;
   1138 
   1139 	if(mbunpack(&mb, b->data, fp->msource->dsize) < 0)
   1140 		goto Err;
   1141 	if(mbsearch(&mb, f->dir.elem, &i, &me) < 0)
   1142 		goto Err;
   1143 	mbdelete(&mb, i, &me);
   1144 	mbpack(&mb);
   1145 	vtblockput(b);
   1146 	vtfileunlock(fp->msource);
   1147 
   1148 	f->removed = 1;
   1149 	f->boff = NilBlock;
   1150 	f->dirty = 0;
   1151 
   1152 	filemetaunlock(f);
   1153 	return 0;
   1154 
   1155 Err:
   1156 	vtfileunlock(fp->msource);
   1157 	vtblockput(b);
   1158 	filemetaunlock(f);
   1159 	return -1;
   1160 }
   1161 
   1162 /*
   1163  * That was far too much effort for directory entries.
   1164  * Now we can write code that *does* things.
   1165  */
   1166 
   1167 /*
   1168  * Flush all data associated with f out of the cache and onto venti.
   1169  * If recursive is set, flush f's children too.
   1170  * Vacfiledecref knows how to flush source and msource too.
   1171  */
   1172 int
   1173 vacfileflush(VacFile *f, int recursive)
   1174 {
   1175 	int ret;
   1176 	VacFile **kids, *p;
   1177 	int i, nkids;
   1178 
   1179 	if(f->mode == VtOREAD)
   1180 		return 0;
   1181 
   1182 	ret = 0;
   1183 	filemetalock(f);
   1184 	if(filemetaflush(f, nil) < 0)
   1185 		ret = -1;
   1186 	filemetaunlock(f);
   1187 
   1188 	if(filelock(f) < 0)
   1189 		return -1;
   1190 
   1191 	/*
   1192 	 * Lock order prevents us from flushing kids while holding
   1193 	 * lock, so make a list and then flush without the lock.
   1194 	 */
   1195 	nkids = 0;
   1196 	kids = nil;
   1197 	if(recursive){
   1198 		nkids = 0;
   1199 		for(p=f->down; p; p=p->next)
   1200 			nkids++;
   1201 		kids = vtmalloc(nkids*sizeof(VacFile*));
   1202 		i = 0;
   1203 		for(p=f->down; p; p=p->next){
   1204 			kids[i++] = p;
   1205 			p->ref++;
   1206 		}
   1207 	}
   1208 	if(nkids > 0){
   1209 		fileunlock(f);
   1210 		for(i=0; i<nkids; i++){
   1211 			if(vacfileflush(kids[i], 1) < 0)
   1212 				ret = -1;
   1213 			vacfiledecref(kids[i]);
   1214 		}
   1215 		filelock(f);
   1216 	}
   1217 	free(kids);
   1218 
   1219 	/*
   1220 	 * Now we can flush our own data.
   1221 	 */
   1222 	vtfilelock(f->source, -1);
   1223 	if(vtfileflush(f->source) < 0)
   1224 		ret = -1;
   1225 	vtfileunlock(f->source);
   1226 	if(f->msource){
   1227 		vtfilelock(f->msource, -1);
   1228 		if(vtfileflush(f->msource) < 0)
   1229 			ret = -1;
   1230 		vtfileunlock(f->msource);
   1231 	}
   1232 	fileunlock(f);
   1233 
   1234 	return ret;
   1235 }
   1236 
   1237 /*
   1238  * Create a new file named elem in fp with the given mode.
   1239  * The mode can be changed later except for the ModeDir bit.
   1240  */
   1241 VacFile*
   1242 vacfilecreate(VacFile *fp, char *elem, ulong mode)
   1243 {
   1244 	VacFile *ff;
   1245 	VacDir *dir;
   1246 	VtFile *pr, *r, *mr;
   1247 	int type;
   1248 	u32int bo;
   1249 
   1250 	if(filelock(fp) < 0)
   1251 		return nil;
   1252 
   1253 	/*
   1254 	 * First, look to see that there's not a file in memory
   1255 	 * with the same name.
   1256 	 */
   1257 	for(ff = fp->down; ff; ff=ff->next){
   1258 		if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){
   1259 			ff = nil;
   1260 			werrstr(EExists);
   1261 			goto Err1;
   1262 		}
   1263 	}
   1264 
   1265 	/*
   1266 	 * Next check the venti blocks.
   1267 	 */
   1268 	ff = dirlookup(fp, elem);
   1269 	if(ff != nil){
   1270 		werrstr(EExists);
   1271 		goto Err1;
   1272 	}
   1273 
   1274 	/*
   1275 	 * By the way, you can't create in a read-only file system.
   1276 	 */
   1277 	pr = fp->source;
   1278 	if(pr->mode != VtORDWR){
   1279 		werrstr(EReadOnly);
   1280 		goto Err1;
   1281 	}
   1282 
   1283 	/*
   1284 	 * Okay, time to actually create something.  Lock the two
   1285 	 * halves of the directory and create a file.
   1286 	 */
   1287 	if(vtfilelock2(fp->source, fp->msource, -1) < 0)
   1288 		goto Err1;
   1289 	ff = filealloc(fp->fs);
   1290 	ff->qidoffset = fp->qidoffset;	/* hopefully fp->qidoffset == 0 */
   1291 	type = VtDataType;
   1292 	if(mode & ModeDir)
   1293 		type = VtDirType;
   1294 	mr = nil;
   1295 	if((r = vtfilecreate(pr, pr->psize, pr->dsize, type)) == nil)
   1296 		goto Err;
   1297 	if(mode & ModeDir)
   1298 	if((mr = vtfilecreate(pr, pr->psize, pr->dsize, VtDataType)) == nil)
   1299 		goto Err;
   1300 
   1301 	/*
   1302 	 * Fill in the directory entry and write it to disk.
   1303 	 */
   1304 	dir = &ff->dir;
   1305 	dir->elem = vtstrdup(elem);
   1306 	dir->entry = r->offset;
   1307 	dir->gen = r->gen;
   1308 	if(mode & ModeDir){
   1309 		dir->mentry = mr->offset;
   1310 		dir->mgen = mr->gen;
   1311 	}
   1312 	dir->size = 0;
   1313 	if(_vacfsnextqid(fp->fs, &dir->qid) < 0)
   1314 		goto Err;
   1315 	dir->uid = vtstrdup(fp->dir.uid);
   1316 	dir->gid = vtstrdup(fp->dir.gid);
   1317 	dir->mid = vtstrdup("");
   1318 	dir->mtime = time(0L);
   1319 	dir->mcount = 0;
   1320 	dir->ctime = dir->mtime;
   1321 	dir->atime = dir->mtime;
   1322 	dir->mode = mode;
   1323 	if((bo = filemetaalloc(fp, &ff->dir, NilBlock)) == NilBlock)
   1324 		goto Err;
   1325 
   1326 	/*
   1327 	 * Now we're committed.
   1328 	 */
   1329 	vtfileunlock(fp->source);
   1330 	vtfileunlock(fp->msource);
   1331 	ff->source = r;
   1332 	ff->msource = mr;
   1333 	ff->boff = bo;
   1334 
   1335 	/* Link into tree. */
   1336 	ff->next = fp->down;
   1337 	fp->down = ff;
   1338 	ff->up = fp;
   1339 	vacfileincref(fp);
   1340 
   1341 	fileunlock(fp);
   1342 
   1343 	filelock(ff);
   1344 	vtfilelock(ff->source, -1);
   1345 	vtfileunlock(ff->source);
   1346 	fileunlock(ff);
   1347 
   1348 	return ff;
   1349 
   1350 Err:
   1351 	vtfileunlock(fp->source);
   1352 	vtfileunlock(fp->msource);
   1353 	if(r){
   1354 		vtfilelock(r, -1);
   1355 		vtfileremove(r);
   1356 	}
   1357 	if(mr){
   1358 		vtfilelock(mr, -1);
   1359 		vtfileremove(mr);
   1360 	}
   1361 Err1:
   1362 	if(ff)
   1363 		vacfiledecref(ff);
   1364 	fileunlock(fp);
   1365 	return nil;
   1366 }
   1367 
   1368 /*
   1369  * Change the size of the file f.
   1370  */
   1371 int
   1372 vacfilesetsize(VacFile *f, uvlong size)
   1373 {
   1374 	if(vacfileisdir(f)){
   1375 		werrstr(ENotFile);
   1376 		return -1;
   1377 	}
   1378 
   1379 	if(filelock(f) < 0)
   1380 		return -1;
   1381 
   1382 	if(f->source->mode != VtORDWR){
   1383 		werrstr(EReadOnly);
   1384 		goto Err;
   1385 	}
   1386 	if(vtfilelock(f->source, -1) < 0)
   1387 		goto Err;
   1388 	if(vtfilesetsize(f->source, size) < 0){
   1389 		vtfileunlock(f->source);
   1390 		goto Err;
   1391 	}
   1392 	vtfileunlock(f->source);
   1393 	fileunlock(f);
   1394 	return 0;
   1395 
   1396 Err:
   1397 	fileunlock(f);
   1398 	return -1;
   1399 }
   1400 
   1401 /*
   1402  * Write data to f.
   1403  */
   1404 int
   1405 vacfilewrite(VacFile *f, void *buf, int cnt, vlong offset)
   1406 {
   1407 	if(vacfileisdir(f)){
   1408 		werrstr(ENotFile);
   1409 		return -1;
   1410 	}
   1411 	if(filelock(f) < 0)
   1412 		return -1;
   1413 	if(f->source->mode != VtORDWR){
   1414 		werrstr(EReadOnly);
   1415 		goto Err;
   1416 	}
   1417 	if(offset < 0){
   1418 		werrstr(EBadOffset);
   1419 		goto Err;
   1420 	}
   1421 
   1422 	if(vtfilelock(f->source, -1) < 0)
   1423 		goto Err;
   1424 	if(f->dir.mode & ModeAppend)
   1425 		offset = vtfilegetsize(f->source);
   1426 	if(vtfilewrite(f->source, buf, cnt, offset) != cnt
   1427 	|| vtfileflushbefore(f->source, offset) < 0){
   1428 		vtfileunlock(f->source);
   1429 		goto Err;
   1430 	}
   1431 	vtfileunlock(f->source);
   1432 	fileunlock(f);
   1433 	return cnt;
   1434 
   1435 Err:
   1436 	fileunlock(f);
   1437 	return -1;
   1438 }
   1439 
   1440 /*
   1441  * Set (!) the VtEntry for the data contained in f.
   1442  * This let's us efficiently copy data from one file to another.
   1443  */
   1444 int
   1445 vacfilesetentries(VacFile *f, VtEntry *e, VtEntry *me)
   1446 {
   1447 	int ret;
   1448 
   1449 	vacfileflush(f, 0);	/* flush blocks to venti, since we won't see them again */
   1450 
   1451 	if(!(e->flags&VtEntryActive)){
   1452 		werrstr("missing entry for source");
   1453 		return -1;
   1454 	}
   1455 	if(me && !(me->flags&VtEntryActive))
   1456 		me = nil;
   1457 	if(f->msource && !me){
   1458 		werrstr("missing entry for msource");
   1459 		return -1;
   1460 	}
   1461 	if(me && !f->msource){
   1462 		werrstr("no msource to set");
   1463 		return -1;
   1464 	}
   1465 
   1466 	if(filelock(f) < 0)
   1467 		return -1;
   1468 	if(f->source->mode != VtORDWR
   1469 	|| (f->msource && f->msource->mode != VtORDWR)){
   1470 		werrstr(EReadOnly);
   1471 		fileunlock(f);
   1472 		return -1;
   1473 	}
   1474 	if(vtfilelock2(f->source, f->msource, -1) < 0){
   1475 		fileunlock(f);
   1476 		return -1;
   1477 	}
   1478 	ret = 0;
   1479 	if(vtfilesetentry(f->source, e) < 0)
   1480 		ret = -1;
   1481 	else if(me && vtfilesetentry(f->msource, me) < 0)
   1482 		ret = -1;
   1483 
   1484 	vtfileunlock(f->source);
   1485 	if(f->msource)
   1486 		vtfileunlock(f->msource);
   1487 	fileunlock(f);
   1488 	return ret;
   1489 }
   1490 
   1491 /*
   1492  * Get the directory entry for f.
   1493  */
   1494 int
   1495 vacfilegetdir(VacFile *f, VacDir *dir)
   1496 {
   1497 	if(filerlock(f) < 0)
   1498 		return -1;
   1499 
   1500 	filemetalock(f);
   1501 	vdcopy(dir, &f->dir);
   1502 	filemetaunlock(f);
   1503 
   1504 	if(!vacfileisdir(f)){
   1505 		if(vtfilelock(f->source, VtOREAD) < 0){
   1506 			filerunlock(f);
   1507 			return -1;
   1508 		}
   1509 		dir->size = vtfilegetsize(f->source);
   1510 		vtfileunlock(f->source);
   1511 	}
   1512 	filerunlock(f);
   1513 
   1514 	return 0;
   1515 }
   1516 
   1517 /*
   1518  * Set the directory entry for f.
   1519  */
   1520 int
   1521 vacfilesetdir(VacFile *f, VacDir *dir)
   1522 {
   1523 	VacFile *ff;
   1524 	char *oelem;
   1525 	u32int mask;
   1526 	u64int size;
   1527 
   1528 	/* can not set permissions for the root */
   1529 	if(vacfileisroot(f)){
   1530 		werrstr(ERoot);
   1531 		return -1;
   1532 	}
   1533 
   1534 	if(filelock(f) < 0)
   1535 		return -1;
   1536 	filemetalock(f);
   1537 
   1538 	if(f->source->mode != VtORDWR){
   1539 		werrstr(EReadOnly);
   1540 		goto Err;
   1541 	}
   1542 
   1543 	/* On rename, check new name does not already exist */
   1544 	if(strcmp(f->dir.elem, dir->elem) != 0){
   1545 		for(ff = f->up->down; ff; ff=ff->next){
   1546 			if(strcmp(dir->elem, ff->dir.elem) == 0 && !ff->removed){
   1547 				werrstr(EExists);
   1548 				goto Err;
   1549 			}
   1550 		}
   1551 		ff = dirlookup(f->up, dir->elem);
   1552 		if(ff != nil){
   1553 			vacfiledecref(ff);
   1554 			werrstr(EExists);
   1555 			goto Err;
   1556 		}
   1557 		werrstr("");	/* "failed" dirlookup poisoned it */
   1558 	}
   1559 
   1560 	/* Get ready... */
   1561 	if(vtfilelock2(f->source, f->msource, -1) < 0)
   1562 		goto Err;
   1563 	if(!vacfileisdir(f)){
   1564 		size = vtfilegetsize(f->source);
   1565 		if(size != dir->size){
   1566 			if(vtfilesetsize(f->source, dir->size) < 0){
   1567 				vtfileunlock(f->source);
   1568 				if(f->msource)
   1569 					vtfileunlock(f->msource);
   1570 				goto Err;
   1571 			}
   1572 		}
   1573 	}
   1574 	/* ... now commited to changing it. */
   1575 	vtfileunlock(f->source);
   1576 	if(f->msource)
   1577 		vtfileunlock(f->msource);
   1578 
   1579 	oelem = nil;
   1580 	if(strcmp(f->dir.elem, dir->elem) != 0){
   1581 		oelem = f->dir.elem;
   1582 		f->dir.elem = vtstrdup(dir->elem);
   1583 	}
   1584 
   1585 	if(strcmp(f->dir.uid, dir->uid) != 0){
   1586 		vtfree(f->dir.uid);
   1587 		f->dir.uid = vtstrdup(dir->uid);
   1588 	}
   1589 
   1590 	if(strcmp(f->dir.gid, dir->gid) != 0){
   1591 		vtfree(f->dir.gid);
   1592 		f->dir.gid = vtstrdup(dir->gid);
   1593 	}
   1594 
   1595 	if(strcmp(f->dir.mid, dir->mid) != 0){
   1596 		vtfree(f->dir.mid);
   1597 		f->dir.mid = vtstrdup(dir->mid);
   1598 	}
   1599 
   1600 	f->dir.mtime = dir->mtime;
   1601 	f->dir.atime = dir->atime;
   1602 
   1603 	mask = ~(ModeDir|ModeSnapshot);
   1604 	f->dir.mode &= ~mask;
   1605 	f->dir.mode |= mask & dir->mode;
   1606 	f->dirty = 1;
   1607 
   1608 	if(filemetaflush(f, oelem) < 0){
   1609 		vtfree(oelem);
   1610 		goto Err;	/* that sucks */
   1611 	}
   1612 	vtfree(oelem);
   1613 
   1614 	filemetaunlock(f);
   1615 	fileunlock(f);
   1616 	return 0;
   1617 
   1618 Err:
   1619 	filemetaunlock(f);
   1620 	fileunlock(f);
   1621 	return -1;
   1622 }
   1623 
   1624 /*
   1625  * Set the qid space.
   1626  */
   1627 int
   1628 vacfilesetqidspace(VacFile *f, u64int offset, u64int max)
   1629 {
   1630 	int ret;
   1631 
   1632 	if(filelock(f) < 0)
   1633 		return -1;
   1634 	if(f->source->mode != VtORDWR){
   1635 		fileunlock(f);
   1636 		werrstr(EReadOnly);
   1637 		return -1;
   1638 	}
   1639 	filemetalock(f);
   1640 	f->dir.qidspace = 1;
   1641 	f->dir.qidoffset = offset;
   1642 	f->dir.qidmax = max;
   1643 	f->dirty = 1;
   1644 	ret = filemetaflush(f, nil);
   1645 	filemetaunlock(f);
   1646 	fileunlock(f);
   1647 	return ret;
   1648 }
   1649 
   1650 /*
   1651  * Check that the file is empty, returning 0 if it is.
   1652  * Returns -1 on error (and not being empty is an error).
   1653  */
   1654 static int
   1655 filecheckempty(VacFile *f)
   1656 {
   1657 	u32int i, n;
   1658 	VtBlock *b;
   1659 	MetaBlock mb;
   1660 	VtFile *r;
   1661 
   1662 	r = f->msource;
   1663 	n = (vtfilegetsize(r)+r->dsize-1)/r->dsize;
   1664 	for(i=0; i<n; i++){
   1665 		b = vtfileblock(r, i, VtOREAD);
   1666 		if(b == nil)
   1667 			return -1;
   1668 		if(mbunpack(&mb, b->data, r->dsize) < 0)
   1669 			goto Err;
   1670 		if(mb.nindex > 0){
   1671 			werrstr(ENotEmpty);
   1672 			goto Err;
   1673 		}
   1674 		vtblockput(b);
   1675 	}
   1676 	return 0;
   1677 
   1678 Err:
   1679 	vtblockput(b);
   1680 	return -1;
   1681 }
   1682 
   1683 /*
   1684  * Remove the vac file f.
   1685  */
   1686 int
   1687 vacfileremove(VacFile *f)
   1688 {
   1689 	VacFile *ff;
   1690 
   1691 	/* Cannot remove the root */
   1692 	if(vacfileisroot(f)){
   1693 		werrstr(ERoot);
   1694 		return -1;
   1695 	}
   1696 
   1697 	if(filelock(f) < 0)
   1698 		return -1;
   1699 	if(f->source->mode != VtORDWR){
   1700 		werrstr(EReadOnly);
   1701 		goto Err1;
   1702 	}
   1703 	if(vtfilelock2(f->source, f->msource, -1) < 0)
   1704 		goto Err1;
   1705 	if(vacfileisdir(f) && filecheckempty(f)<0)
   1706 		goto Err;
   1707 
   1708 	for(ff=f->down; ff; ff=ff->next)
   1709 		assert(ff->removed);
   1710 
   1711 	vtfileremove(f->source);
   1712 	f->source = nil;
   1713 	if(f->msource){
   1714 		vtfileremove(f->msource);
   1715 		f->msource = nil;
   1716 	}
   1717 	fileunlock(f);
   1718 
   1719 	if(filemetaremove(f) < 0)
   1720 		return -1;
   1721 	return 0;
   1722 
   1723 Err:
   1724 	vtfileunlock(f->source);
   1725 	if(f->msource)
   1726 		vtfileunlock(f->msource);
   1727 Err1:
   1728 	fileunlock(f);
   1729 	return -1;
   1730 }
   1731 
   1732 /*
   1733  * Vac file system format.
   1734  */
   1735 static char EBadVacFormat[] = "bad format for vac file";
   1736 
   1737 static VacFs *
   1738 vacfsalloc(VtConn *z, int bsize, ulong cachemem, int mode)
   1739 {
   1740 	VacFs *fs;
   1741 
   1742 	fs = vtmallocz(sizeof(VacFs));
   1743 	fs->z = z;
   1744 	fs->bsize = bsize;
   1745 	fs->mode = mode;
   1746 	fs->cache = vtcachealloc(z, cachemem);
   1747 	return fs;
   1748 }
   1749 
   1750 static int
   1751 readscore(int fd, uchar score[VtScoreSize])
   1752 {
   1753 	char buf[45], *pref;
   1754 	int n;
   1755 
   1756 	n = readn(fd, buf, sizeof(buf)-1);
   1757 	if(n < sizeof(buf)-1) {
   1758 		werrstr("short read");
   1759 		return -1;
   1760 	}
   1761 	buf[n] = 0;
   1762 
   1763 	if(vtparsescore(buf, &pref, score) < 0){
   1764 		werrstr(EBadVacFormat);
   1765 		return -1;
   1766 	}
   1767 	if(pref==nil || strcmp(pref, "vac") != 0) {
   1768 		werrstr("not a vac file");
   1769 		return -1;
   1770 	}
   1771 	return 0;
   1772 }
   1773 
   1774 VacFs*
   1775 vacfsopen(VtConn *z, char *file, int mode, ulong cachemem)
   1776 {
   1777 	int fd;
   1778 	uchar score[VtScoreSize];
   1779 	char *prefix;
   1780 
   1781 	if(vtparsescore(file, &prefix, score) >= 0){
   1782 		if(prefix == nil || strcmp(prefix, "vac") != 0){
   1783 			werrstr("not a vac file");
   1784 			return nil;
   1785 		}
   1786 	}else{
   1787 		fd = open(file, OREAD);
   1788 		if(fd < 0)
   1789 			return nil;
   1790 		if(readscore(fd, score) < 0){
   1791 			close(fd);
   1792 			return nil;
   1793 		}
   1794 		close(fd);
   1795 	}
   1796 if(debug) fprint(2, "vacfsopen %V\n", score);
   1797 	return vacfsopenscore(z, score, mode, cachemem);
   1798 }
   1799 
   1800 VacFs*
   1801 vacfsopenscore(VtConn *z, u8int *score, int mode, ulong cachemem)
   1802 {
   1803 	VacFs *fs;
   1804 	int n;
   1805 	VtRoot rt;
   1806 	uchar buf[VtRootSize];
   1807 	VacFile *root;
   1808 	VtFile *r;
   1809 	VtEntry e;
   1810 
   1811 	n = vtread(z, score, VtRootType, buf, VtRootSize);
   1812 	if(n < 0) {
   1813 if(debug) fprint(2, "read %r\n");
   1814 		return nil;
   1815 	}
   1816 	if(n != VtRootSize){
   1817 		werrstr("vtread on root too short");
   1818 if(debug) fprint(2, "size %d\n", n);
   1819 		return nil;
   1820 	}
   1821 
   1822 	if(vtrootunpack(&rt, buf) < 0) {
   1823 if(debug) fprint(2, "unpack: %r\n");
   1824 		return nil;
   1825 	}
   1826 
   1827 	if(strcmp(rt.type, "vac") != 0) {
   1828 if(debug) fprint(2, "bad type %s\n", rt.type);
   1829 		werrstr("not a vac root");
   1830 		return nil;
   1831 	}
   1832 
   1833 	fs = vacfsalloc(z, rt.blocksize, cachemem, mode);
   1834 	memmove(fs->score, score, VtScoreSize);
   1835 	fs->mode = mode;
   1836 
   1837 	memmove(e.score, rt.score, VtScoreSize);
   1838 	e.gen = 0;
   1839 
   1840 	// Don't waste cache memory on pointer blocks
   1841 	// when rt.blocksize is large.
   1842 	e.psize = (rt.blocksize/VtEntrySize)*VtEntrySize;
   1843 	if(e.psize > 60000)
   1844 		e.psize = (60000/VtEntrySize)*VtEntrySize;
   1845 
   1846 	e.dsize = rt.blocksize;
   1847 if(debug) fprint(2, "openscore %d psize %d dsize %d\n", (int)rt.blocksize, (int)e.psize, (int)e.dsize);
   1848 	e.type = VtDirType;
   1849 	e.flags = VtEntryActive;
   1850 	e.size = 3*VtEntrySize;
   1851 
   1852 	root = nil;
   1853 	if((r = vtfileopenroot(fs->cache, &e)) == nil)
   1854 		goto Err;
   1855 	if(debug)
   1856 		fprint(2, "r %p\n", r);
   1857 	root = _vacfileroot(fs, r);
   1858 	if(debug)
   1859 		fprint(2, "root %p\n", root);
   1860 	vtfileclose(r);
   1861 	if(root == nil)
   1862 		goto Err;
   1863 	fs->root = root;
   1864 	return fs;
   1865 Err:
   1866 	if(root)
   1867 		vacfiledecref(root);
   1868 	vacfsclose(fs);
   1869 	return nil;
   1870 }
   1871 
   1872 int
   1873 vacfsmode(VacFs *fs)
   1874 {
   1875 	return fs->mode;
   1876 }
   1877 
   1878 VacFile*
   1879 vacfsgetroot(VacFs *fs)
   1880 {
   1881 	return vacfileincref(fs->root);
   1882 }
   1883 
   1884 int
   1885 vacfsgetblocksize(VacFs *fs)
   1886 {
   1887 	return fs->bsize;
   1888 }
   1889 
   1890 int
   1891 vacfsgetscore(VacFs *fs, u8int *score)
   1892 {
   1893 	memmove(score, fs->score, VtScoreSize);
   1894 	return 0;
   1895 }
   1896 
   1897 int
   1898 _vacfsnextqid(VacFs *fs, uvlong *qid)
   1899 {
   1900 	++fs->qid;
   1901 	*qid = fs->qid;
   1902 	return 0;
   1903 }
   1904 
   1905 void
   1906 vacfsjumpqid(VacFs *fs, uvlong step)
   1907 {
   1908 	fs->qid += step;
   1909 }
   1910 
   1911 /*
   1912  * Set *maxqid to the maximum qid expected in this file system.
   1913  * In newer vac archives, the maximum qid is stored in the
   1914  * qidspace VacDir annotation.  In older vac archives, the root
   1915  * got created last, so it had the maximum qid.
   1916  */
   1917 int
   1918 vacfsgetmaxqid(VacFs *fs, uvlong *maxqid)
   1919 {
   1920 	VacDir vd;
   1921 
   1922 	if(vacfilegetdir(fs->root, &vd) < 0)
   1923 		return -1;
   1924 	if(vd.qidspace)
   1925 		*maxqid = vd.qidmax;
   1926 	else
   1927 		*maxqid = vd.qid;
   1928 	vdcleanup(&vd);
   1929 	return 0;
   1930 }
   1931 
   1932 
   1933 void
   1934 vacfsclose(VacFs *fs)
   1935 {
   1936 	if(fs->root)
   1937 		vacfiledecref(fs->root);
   1938 	fs->root = nil;
   1939 	vtcachefree(fs->cache);
   1940 	vtfree(fs);
   1941 }
   1942 
   1943 /*
   1944  * Create a fresh vac fs.
   1945  */
   1946 VacFs *
   1947 vacfscreate(VtConn *z, int bsize, ulong cachemem)
   1948 {
   1949 	VacFs *fs;
   1950 	VtFile *f;
   1951 	uchar buf[VtEntrySize], metascore[VtScoreSize];
   1952 	VtEntry e;
   1953 	VtBlock *b;
   1954 	MetaBlock mb;
   1955 	VacDir vd;
   1956 	MetaEntry me;
   1957 	int psize;
   1958 
   1959 	if((fs = vacfsalloc(z, bsize, cachemem, VtORDWR)) == nil)
   1960 		return nil;
   1961 
   1962 	/*
   1963 	 * Fake up an empty vac fs.
   1964 	 */
   1965 	psize = bsize/VtScoreSize*VtScoreSize;
   1966 	if(psize > 60000)
   1967 		psize = 60000/VtScoreSize*VtScoreSize;
   1968 if(debug) fprint(2, "create bsize %d psize %d\n", bsize, psize);
   1969 
   1970 	f = vtfilecreateroot(fs->cache, psize, bsize, VtDirType);
   1971 	if(f == nil)
   1972 		sysfatal("vtfilecreateroot: %r");
   1973 	vtfilelock(f, VtORDWR);
   1974 
   1975 	/* Write metablock containing root directory VacDir. */
   1976 	b = vtcacheallocblock(fs->cache, VtDataType, bsize);
   1977 	mbinit(&mb, b->data, bsize, bsize/BytesPerEntry);
   1978 	memset(&vd, 0, sizeof vd);
   1979 	vd.elem = "/";
   1980 	vd.mode = 0777|ModeDir;
   1981 	vd.uid = "vac";
   1982 	vd.gid = "vac";
   1983 	vd.mid = "";
   1984 	me.size = vdsize(&vd, VacDirVersion);
   1985 	me.p = mballoc(&mb, me.size);
   1986 	vdpack(&vd, &me, VacDirVersion);
   1987 	mbinsert(&mb, 0, &me);
   1988 	mbpack(&mb);
   1989 	vtblockwrite(b);
   1990 	memmove(metascore, b->score, VtScoreSize);
   1991 	vtblockput(b);
   1992 
   1993 	/* First entry: empty venti directory stream. */
   1994 	memset(&e, 0, sizeof e);
   1995 	e.flags = VtEntryActive;
   1996 	e.psize = psize;
   1997 	e.dsize = bsize;
   1998 	e.type = VtDirType;
   1999 	memmove(e.score, vtzeroscore, VtScoreSize);
   2000 	vtentrypack(&e, buf, 0);
   2001 	vtfilewrite(f, buf, VtEntrySize, 0);
   2002 
   2003 	/* Second entry: empty metadata stream. */
   2004 	e.type = VtDataType;
   2005 	vtentrypack(&e, buf, 0);
   2006 	vtfilewrite(f, buf, VtEntrySize, VtEntrySize);
   2007 
   2008 	/* Third entry: metadata stream with root directory. */
   2009 	memmove(e.score, metascore, VtScoreSize);
   2010 	e.size = bsize;
   2011 	vtentrypack(&e, buf, 0);
   2012 	vtfilewrite(f, buf, VtEntrySize, VtEntrySize*2);
   2013 
   2014 	vtfileflush(f);
   2015 	vtfileunlock(f);
   2016 
   2017 	/* Now open it as a vac fs. */
   2018 	fs->root = _vacfileroot(fs, f);
   2019 	if(fs->root == nil){
   2020 		werrstr("vacfileroot: %r");
   2021 		vacfsclose(fs);
   2022 		return nil;
   2023 	}
   2024 
   2025 	return fs;
   2026 }
   2027 
   2028 int
   2029 vacfssync(VacFs *fs)
   2030 {
   2031 	uchar buf[1024];
   2032 	VtEntry e;
   2033 	VtFile *f;
   2034 	VtRoot root;
   2035 
   2036 	/* Sync the entire vacfs to disk. */
   2037 	if(vacfileflush(fs->root, 1) < 0)
   2038 		return -1;
   2039 	if(vtfilelock(fs->root->up->msource, -1) < 0)
   2040 		return -1;
   2041 	if(vtfileflush(fs->root->up->msource) < 0){
   2042 		vtfileunlock(fs->root->up->msource);
   2043 		return -1;
   2044 	}
   2045 	vtfileunlock(fs->root->up->msource);
   2046 
   2047 	/* Prepare the dir stream for the root block. */
   2048 	if(getentry(fs->root->source, &e) < 0)
   2049 		return -1;
   2050 	vtentrypack(&e, buf, 0);
   2051 	if(getentry(fs->root->msource, &e) < 0)
   2052 		return -1;
   2053 	vtentrypack(&e, buf, 1);
   2054 	if(getentry(fs->root->up->msource, &e) < 0)
   2055 		return -1;
   2056 	vtentrypack(&e, buf, 2);
   2057 
   2058 	f = vtfilecreateroot(fs->cache, fs->bsize, fs->bsize, VtDirType);
   2059 	vtfilelock(f, VtORDWR);
   2060 	if(vtfilewrite(f, buf, 3*VtEntrySize, 0) < 0
   2061 	|| vtfileflush(f) < 0){
   2062 		vtfileunlock(f);
   2063 		vtfileclose(f);
   2064 		return -1;
   2065 	}
   2066 	vtfileunlock(f);
   2067 	if(getentry(f, &e) < 0){
   2068 		vtfileclose(f);
   2069 		return -1;
   2070 	}
   2071 	vtfileclose(f);
   2072 
   2073 	/* Build a root block. */
   2074 	memset(&root, 0, sizeof root);
   2075 	strcpy(root.type, "vac");
   2076 	strcpy(root.name, fs->name);
   2077 	memmove(root.score, e.score, VtScoreSize);
   2078 	root.blocksize = fs->bsize;
   2079 	memmove(root.prev, fs->score, VtScoreSize);
   2080 	vtrootpack(&root, buf);
   2081 	if(vtwrite(fs->z, fs->score, VtRootType, buf, VtRootSize) < 0){
   2082 		werrstr("writing root: %r");
   2083 		return -1;
   2084 	}
   2085 	if(vtsync(fs->z) < 0)
   2086 		return -1;
   2087 	return 0;
   2088 }
   2089 
   2090 int
   2091 vacfiledsize(VacFile *f)
   2092 {
   2093 	VtEntry e;
   2094 
   2095 	if(vacfilegetentries(f,&e,nil) < 0)
   2096 		return -1;
   2097 	return e.dsize;
   2098 }
   2099 
   2100 /*
   2101  * Does block b of f have the same SHA1 hash as the n bytes at buf?
   2102  */
   2103 int
   2104 sha1matches(VacFile *f, ulong b, uchar *buf, int n)
   2105 {
   2106 	uchar fscore[VtScoreSize];
   2107 	uchar bufscore[VtScoreSize];
   2108 
   2109 	if(vacfileblockscore(f, b, fscore) < 0)
   2110 		return 0;
   2111 	n = vtzerotruncate(VtDataType, buf, n);
   2112 	sha1(buf, n, bufscore, nil);
   2113 	if(memcmp(bufscore, fscore, VtScoreSize) == 0)
   2114 		return 1;
   2115 	return 0;
   2116 }