plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

fs.c (21701B)


      1 #include "stdinc.h"
      2 #include "dat.h"
      3 #include "fns.h"
      4 #include "error.h"
      5 
      6 static void fsMetaFlush(void *a);
      7 static Snap *snapInit(Fs*);
      8 static void snapClose(Snap*);
      9 
     10 Fs *
     11 fsOpen(char *file, VtConn *z, long ncache, int mode)
     12 {
     13 	int fd, m;
     14 	uchar oscore[VtScoreSize];
     15 	Block *b, *bs;
     16 	Disk *disk;
     17 	Fs *fs;
     18 	Super super;
     19 	char e[ERRMAX];
     20 
     21 	switch(mode){
     22 	default:
     23 		werrstr(EBadMode);
     24 		return nil;
     25 	case OReadOnly:
     26 		m = OREAD;
     27 		break;
     28 	case OReadWrite:
     29 		m = ORDWR;
     30 		break;
     31 	}
     32 	fd = open(file, m);
     33 	if(fd < 0){
     34 		werrstr("open %s: %r", file);
     35 		return nil;
     36 	}
     37 
     38 	bwatchInit();
     39 	disk = diskAlloc(fd);
     40 	if(disk == nil){
     41 		werrstr("diskAlloc: %r");
     42 		close(fd);
     43 		return nil;
     44 	}
     45 
     46 	fs = vtmallocz(sizeof(Fs));
     47 	fs->mode = mode;
     48 	fs->name = vtstrdup(file);
     49 	fs->blockSize = diskBlockSize(disk);
     50 	fs->cache = cacheAlloc(disk, z, ncache, mode);
     51 	if(mode == OReadWrite && z)
     52 		fs->arch = archInit(fs->cache, disk, fs, z);
     53 	fs->z = z;
     54 
     55 	b = cacheLocal(fs->cache, PartSuper, 0, mode);
     56 	if(b == nil)
     57 		goto Err;
     58 	if(!superUnpack(&super, b->data)){
     59 		blockPut(b);
     60 		werrstr("bad super block");
     61 		goto Err;
     62 	}
     63 	blockPut(b);
     64 
     65 	fs->ehi = super.epochHigh;
     66 	fs->elo = super.epochLow;
     67 
     68 //fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active);
     69 
     70 	fs->source = sourceRoot(fs, super.active, mode);
     71 	if(fs->source == nil){
     72 		/*
     73 		 * Perhaps it failed because the block is copy-on-write.
     74 		 * Do the copy and try again.
     75 		 */
     76 		rerrstr(e, sizeof e);
     77 		if(mode == OReadOnly || strcmp(e, EBadRoot) != 0)
     78 			goto Err;
     79 		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag,
     80 			OReadWrite, 0);
     81 		if(b == nil){
     82 			werrstr("cacheLocalData: %r");
     83 			goto Err;
     84 		}
     85 		if(b->l.epoch == fs->ehi){
     86 			blockPut(b);
     87 			werrstr("bad root source block");
     88 			goto Err;
     89 		}
     90 		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
     91 		if(b == nil)
     92 			goto Err;
     93 		localToGlobal(super.active, oscore);
     94 		super.active = b->addr;
     95 		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
     96 		if(bs == nil){
     97 			blockPut(b);
     98 			werrstr("cacheLocal: %r");
     99 			goto Err;
    100 		}
    101 		superPack(&super, bs->data);
    102 		blockDependency(bs, b, 0, oscore, nil);
    103 		blockPut(b);
    104 		blockDirty(bs);
    105 		blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
    106 		blockPut(bs);
    107 		fs->source = sourceRoot(fs, super.active, mode);
    108 		if(fs->source == nil){
    109 			werrstr("sourceRoot: %r");
    110 			goto Err;
    111 		}
    112 	}
    113 
    114 //fprint(2, "%s: got fs source\n", argv0);
    115 
    116 	rlock(&fs->elk);
    117 	fs->file = fileRoot(fs->source);
    118 	fs->source->file = fs->file;		/* point back */
    119 	runlock(&fs->elk);
    120 	if(fs->file == nil){
    121 		werrstr("fileRoot: %r");
    122 		goto Err;
    123 	}
    124 
    125 //fprint(2, "%s: got file root\n", argv0);
    126 
    127 	if(mode == OReadWrite){
    128 		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
    129 		fs->snap = snapInit(fs);
    130 	}
    131 	return fs;
    132 
    133 Err:
    134 fprint(2, "%s: fsOpen error\n", argv0);
    135 	fsClose(fs);
    136 	return nil;
    137 }
    138 
    139 void
    140 fsClose(Fs *fs)
    141 {
    142 	rlock(&fs->elk);
    143 	periodicKill(fs->metaFlush);
    144 	snapClose(fs->snap);
    145 	if(fs->file){
    146 		fileMetaFlush(fs->file, 0);
    147 		if(!fileDecRef(fs->file))
    148 			sysfatal("fsClose: files still in use: %r");
    149 	}
    150 	fs->file = nil;
    151 	sourceClose(fs->source);
    152 	cacheFree(fs->cache);
    153 	if(fs->arch)
    154 		archFree(fs->arch);
    155 	vtfree(fs->name);
    156 	runlock(&fs->elk);
    157 	memset(fs, ~0, sizeof(Fs));
    158 	vtfree(fs);
    159 }
    160 
    161 int
    162 fsRedial(Fs *fs, char *host)
    163 {
    164 	if(vtredial(fs->z, host) < 0)
    165 		return 0;
    166 	if(vtconnect(fs->z) < 0)
    167 		return 0;
    168 	return 1;
    169 }
    170 
    171 File *
    172 fsGetRoot(Fs *fs)
    173 {
    174 	return fileIncRef(fs->file);
    175 }
    176 
    177 int
    178 fsGetBlockSize(Fs *fs)
    179 {
    180 	return fs->blockSize;
    181 }
    182 
    183 Block*
    184 superGet(Cache *c, Super* super)
    185 {
    186 	Block *b;
    187 
    188 	if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
    189 		fprint(2, "%s: superGet: cacheLocal failed: %r\n", argv0);
    190 		return nil;
    191 	}
    192 	if(!superUnpack(super, b->data)){
    193 		fprint(2, "%s: superGet: superUnpack failed: %r\n", argv0);
    194 		blockPut(b);
    195 		return nil;
    196 	}
    197 
    198 	return b;
    199 }
    200 
    201 void
    202 superWrite(Block* b, Super* super, int forceWrite)
    203 {
    204 	superPack(super, b->data);
    205 	blockDirty(b);
    206 	if(forceWrite){
    207 		while(!blockWrite(b, Waitlock)){
    208 			/* this should no longer happen */
    209 			fprint(2, "%s: could not write super block; "
    210 				"waiting 10 seconds\n", argv0);
    211 			sleep(10*1000);
    212 		}
    213 		while(b->iostate != BioClean && b->iostate != BioDirty){
    214 			assert(b->iostate == BioWriting);
    215 			rsleep(&b->ioready);
    216 		}
    217 		/*
    218 		 * it's okay that b might still be dirty.
    219 		 * that means it got written out but with an old root pointer,
    220 		 * but the other fields went out, and those are the ones
    221 		 * we really care about.  (specifically, epochHigh; see fsSnapshot).
    222 		 */
    223 	}
    224 }
    225 
    226 /*
    227  * Prepare the directory to store a snapshot.
    228  * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
    229  * Archival snapshots go into /archive/yyyy/mmdd[.#].
    230  *
    231  * TODO This should be rewritten to eliminate most of the duplication.
    232  */
    233 static File*
    234 fileOpenSnapshot(Fs *fs, char *dstpath, int doarchive)
    235 {
    236 	int n;
    237 	char buf[30], *s, *p, *elem;
    238 	File *dir, *f;
    239 	Tm now;
    240 
    241 	if(dstpath){
    242 		if((p = strrchr(dstpath, '/')) != nil){
    243 			*p++ = '\0';
    244 			elem = p;
    245 			p = dstpath;
    246 			if(*p == '\0')
    247 				p = "/";
    248 		}else{
    249 			p = "/";
    250 			elem = dstpath;
    251 		}
    252 		if((dir = fileOpen(fs, p)) == nil)
    253 			return nil;
    254 		f = fileCreate(dir, elem, ModeDir|ModeSnapshot|0555, "adm");
    255 		fileDecRef(dir);
    256 		return f;
    257 	}else if(doarchive){
    258 		/*
    259 		 * a snapshot intended to be archived to venti.
    260 		 */
    261 		dir = fileOpen(fs, "/archive");
    262 		if(dir == nil)
    263 			return nil;
    264 		now = *localtime(time(0));
    265 
    266 		/* yyyy */
    267 		snprint(buf, sizeof(buf), "%d", now.year+1900);
    268 		f = fileWalk(dir, buf);
    269 		if(f == nil)
    270 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
    271 		fileDecRef(dir);
    272 		if(f == nil)
    273 			return nil;
    274 		dir = f;
    275 
    276 		/* mmdd[#] */
    277 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
    278 		s = buf+strlen(buf);
    279 		for(n=0;; n++){
    280 			if(n)
    281 				seprint(s, buf+sizeof(buf), ".%d", n);
    282 			f = fileWalk(dir, buf);
    283 			if(f != nil){
    284 				fileDecRef(f);
    285 				continue;
    286 			}
    287 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
    288 			break;
    289 		}
    290 		fileDecRef(dir);
    291 		return f;
    292 	}else{
    293 		/*
    294 		 * Just a temporary snapshot
    295 		 * We'll use /snapshot/yyyy/mmdd/hhmm.
    296 		 * There may well be a better naming scheme.
    297 		 * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
    298 		 */
    299 		dir = fileOpen(fs, "/snapshot");
    300 		if(dir == nil)
    301 			return nil;
    302 
    303 		now = *localtime(time(0));
    304 
    305 		/* yyyy */
    306 		snprint(buf, sizeof(buf), "%d", now.year+1900);
    307 		f = fileWalk(dir, buf);
    308 		if(f == nil)
    309 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
    310 		fileDecRef(dir);
    311 		if(f == nil)
    312 			return nil;
    313 		dir = f;
    314 
    315 		/* mmdd */
    316 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
    317 		f = fileWalk(dir, buf);
    318 		if(f == nil)
    319 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
    320 		fileDecRef(dir);
    321 		if(f == nil)
    322 			return nil;
    323 		dir = f;
    324 
    325 		/* hhmm[.#] */
    326 		snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
    327 		s = buf+strlen(buf);
    328 		for(n=0;; n++){
    329 			if(n)
    330 				seprint(s, buf+sizeof(buf), ".%d", n);
    331 			f = fileWalk(dir, buf);
    332 			if(f != nil){
    333 				fileDecRef(f);
    334 				continue;
    335 			}
    336 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
    337 			break;
    338 		}
    339 		fileDecRef(dir);
    340 		return f;
    341 	}
    342 }
    343 
    344 static int
    345 fsNeedArch(Fs *fs, uint archMinute)
    346 {
    347 	int need;
    348 	File *f;
    349 	char buf[100];
    350 	Tm now;
    351 	ulong then;
    352 
    353 	then = time(0);
    354 	now = *localtime(then);
    355 
    356 	/* back up to yesterday if necessary */
    357 	if(now.hour < archMinute/60
    358 	|| now.hour == archMinute/60 && now.min < archMinute%60)
    359 		now = *localtime(then-86400);
    360 
    361 	snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
    362 		now.year+1900, now.mon+1, now.mday);
    363 	need = 1;
    364 	rlock(&fs->elk);
    365 	f = fileOpen(fs, buf);
    366 	if(f){
    367 		need = 0;
    368 		fileDecRef(f);
    369 	}
    370 	runlock(&fs->elk);
    371 	return need;
    372 }
    373 
    374 int
    375 fsEpochLow(Fs *fs, u32int low)
    376 {
    377 	Block *bs;
    378 	Super super;
    379 
    380 	wlock(&fs->elk);
    381 	if(low > fs->ehi){
    382 		werrstr("bad low epoch (must be <= %ud)", fs->ehi);
    383 		wunlock(&fs->elk);
    384 		return 0;
    385 	}
    386 
    387 	if((bs = superGet(fs->cache, &super)) == nil){
    388 		wunlock(&fs->elk);
    389 		return 0;
    390 	}
    391 
    392 	super.epochLow = low;
    393 	fs->elo = low;
    394 	superWrite(bs, &super, 1);
    395 	blockPut(bs);
    396 	wunlock(&fs->elk);
    397 
    398 	return 1;
    399 }
    400 
    401 static int
    402 bumpEpoch(Fs *fs, int doarchive)
    403 {
    404 	uchar oscore[VtScoreSize];
    405 	u32int oldaddr;
    406 	Block *b, *bs;
    407 	Entry e;
    408 	Source *r;
    409 	Super super;
    410 
    411 	/*
    412 	 * Duplicate the root block.
    413 	 *
    414 	 * As a hint to flchk, the garbage collector,
    415 	 * and any (human) debuggers, store a pointer
    416 	 * to the old root block in entry 1 of the new root block.
    417 	 */
    418 	r = fs->source;
    419 	b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
    420 	if(b == nil)
    421 		return 0;
    422 
    423 	memset(&e, 0, sizeof e);
    424 	e.flags = VtEntryActive | VtEntryLocal | _VtEntryDir;
    425 	memmove(e.score, b->score, VtScoreSize);
    426 	e.tag = RootTag;
    427 	e.snap = b->l.epoch;
    428 
    429 	b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
    430 	if(b == nil){
    431 		fprint(2, "%s: bumpEpoch: blockCopy: %r\n", argv0);
    432 		return 0;
    433 	}
    434 
    435 	if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr);
    436 	entryPack(&e, b->data, 1);
    437 	blockDirty(b);
    438 
    439 	/*
    440 	 * Update the superblock with the new root and epoch.
    441 	 */
    442 	if((bs = superGet(fs->cache, &super)) == nil)
    443 		return 0;
    444 
    445 	fs->ehi++;
    446 	memmove(r->score, b->score, VtScoreSize);
    447 	r->epoch = fs->ehi;
    448 
    449 	super.epochHigh = fs->ehi;
    450 	oldaddr = super.active;
    451 	super.active = b->addr;
    452 	if(doarchive)
    453 		super.next = oldaddr;
    454 
    455 	/*
    456 	 * Record that the new super.active can't get written out until
    457 	 * the new b gets written out.  Until then, use the old value.
    458 	 */
    459 	localToGlobal(oldaddr, oscore);
    460 	blockDependency(bs, b, 0, oscore, nil);
    461 	blockPut(b);
    462 
    463 	/*
    464 	 * We force the super block to disk so that super.epochHigh gets updated.
    465 	 * Otherwise, if we crash and come back, we might incorrectly treat as active
    466 	 * some of the blocks that making up the snapshot we just created.
    467 	 * Basically every block in the active file system and all the blocks in
    468 	 * the recently-created snapshot depend on the super block now.
    469 	 * Rather than record all those dependencies, we just force the block to disk.
    470 	 *
    471 	 * Note that blockWrite might actually (will probably) send a slightly outdated
    472 	 * super.active to disk.  It will be the address of the most recent root that has
    473 	 * gone to disk.
    474 	 */
    475 	superWrite(bs, &super, 1);
    476 	blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
    477 	blockPut(bs);
    478 
    479 	return 1;
    480 }
    481 
    482 int
    483 saveQid(Fs *fs)
    484 {
    485 	Block *b;
    486 	Super super;
    487 	u64int qidMax;
    488 
    489 	if((b = superGet(fs->cache, &super)) == nil)
    490 		return 0;
    491 	qidMax = super.qid;
    492 	blockPut(b);
    493 
    494 	if(!fileSetQidSpace(fs->file, 0, qidMax))
    495 		return 0;
    496 
    497 	return 1;
    498 }
    499 
    500 int
    501 fsSnapshot(Fs *fs, char *srcpath, char *dstpath, int doarchive)
    502 {
    503 	File *src, *dst;
    504 
    505 	assert(fs->mode == OReadWrite);
    506 
    507 	dst = nil;
    508 
    509 	if(fs->halted){
    510 		werrstr("file system is halted");
    511 		return 0;
    512 	}
    513 
    514 	/*
    515 	 * Freeze file system activity.
    516 	 */
    517 	wlock(&fs->elk);
    518 
    519 	/*
    520 	 * Get the root of the directory we're going to save.
    521 	 */
    522 	if(srcpath == nil)
    523 		srcpath = "/active";
    524 	src = fileOpen(fs, srcpath);
    525 	if(src == nil)
    526 		goto Err;
    527 
    528 	/*
    529 	 * It is important that we maintain the invariant that:
    530 	 *	if both b and bb are marked as Active with start epoch e
    531 	 *	and b points at bb, then no other pointers to bb exist.
    532 	 *
    533 	 * When bb is unlinked from b, its close epoch is set to b's epoch.
    534 	 * A block with epoch == close epoch is
    535 	 * treated as free by cacheAllocBlock; this aggressively
    536 	 * reclaims blocks after they have been stored to Venti.
    537 	 *
    538 	 * Let's say src->source is block sb, and src->msource is block
    539 	 * mb.  Let's also say that block b holds the Entry structures for
    540 	 * both src->source and src->msource (their Entry structures might
    541 	 * be in different blocks, but the argument is the same).
    542 	 * That is, right now we have:
    543 	 *
    544 	 *	b	Active w/ epoch e, holds ptrs to sb and mb.
    545 	 *	sb	Active w/ epoch e.
    546 	 *	mb	Active w/ epoch e.
    547 	 *
    548 	 * With things as they are now, the invariant requires that
    549 	 * b holds the only pointers to sb and mb.  We want to record
    550 	 * pointers to sb and mb in new Entries corresponding to dst,
    551 	 * which breaks the invariant.  Thus we need to do something
    552 	 * about b.  Specifically, we bump the file system's epoch and
    553 	 * then rewalk the path from the root down to and including b.
    554 	 * This will copy-on-write as we walk, so now the state will be:
    555 	 *
    556 	 *	b	Snap w/ epoch e, holds ptrs to sb and mb.
    557 	 *	new-b	Active w/ epoch e+1, holds ptrs to sb and mb.
    558 	 *	sb	Active w/ epoch e.
    559 	 *	mb	Active w/ epoch e.
    560 	 *
    561 	 * In this state, it's perfectly okay to make more pointers to sb and mb.
    562 	 */
    563 	if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
    564 		goto Err;
    565 
    566 	/*
    567 	 * Sync to disk.  I'm not sure this is necessary, but better safe than sorry.
    568 	 */
    569 	cacheFlush(fs->cache, 1);
    570 
    571 	/*
    572 	 * Create the directory where we will store the copy of src.
    573 	 */
    574 	dst = fileOpenSnapshot(fs, dstpath, doarchive);
    575 	if(dst == nil)
    576 		goto Err;
    577 
    578 	/*
    579 	 * Actually make the copy by setting dst's source and msource
    580 	 * to be src's.
    581 	 */
    582 	if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
    583 		goto Err;
    584 
    585 	fileDecRef(src);
    586 	fileDecRef(dst);
    587 	src = nil;
    588 	dst = nil;
    589 
    590 	/*
    591 	 * Make another copy of the file system.  This one is for the
    592 	 * archiver, so that the file system we archive has the recently
    593 	 * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
    594 	 */
    595 	if(doarchive){
    596 		if(!saveQid(fs))
    597 			goto Err;
    598 		if(!bumpEpoch(fs, 1))
    599 			goto Err;
    600 	}
    601 
    602 	wunlock(&fs->elk);
    603 
    604 	/* BUG? can fs->arch fall out from under us here? */
    605 	if(doarchive && fs->arch)
    606 		archKick(fs->arch);
    607 
    608 	return 1;
    609 
    610 Err:
    611 	fprint(2, "%s: fsSnapshot: %r\n", argv0);
    612 	if(src)
    613 		fileDecRef(src);
    614 	if(dst)
    615 		fileDecRef(dst);
    616 	wunlock(&fs->elk);
    617 	return 0;
    618 }
    619 
    620 int
    621 fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
    622 {
    623 	int r;
    624 	DirEntry de;
    625 	Entry e, ee;
    626 	File *f;
    627 
    628 	rlock(&fs->elk);
    629 	f = fileOpen(fs, name);
    630 	if(f == nil){
    631 		runlock(&fs->elk);
    632 		return 0;
    633 	}
    634 
    635 	if(!fileGetSources(f, &e, &ee) || !fileGetDir(f, &de)){
    636 		fileDecRef(f);
    637 		runlock(&fs->elk);
    638 		return 0;
    639 	}
    640 	fileDecRef(f);
    641 
    642 	r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
    643 	runlock(&fs->elk);
    644 	return r;
    645 }
    646 
    647 static int
    648 vtWriteBlock(VtConn *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
    649 {
    650 	if(vtwrite(z, score, type, buf, n) < 0)
    651 		return 0;
    652 	if(vtsha1check(score, buf, n) < 0)
    653 		return 0;
    654 	return 1;
    655 }
    656 
    657 int
    658 mkVac(VtConn *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
    659 {
    660 	uchar buf[8192];
    661 	int i;
    662 	uchar *p;
    663 	uint n;
    664 	DirEntry de;
    665 	Entry e, ee, eee;
    666 	MetaBlock mb;
    667 	MetaEntry me;
    668 	VtRoot root;
    669 
    670 	e = *pe;
    671 	ee = *pee;
    672 	de = *pde;
    673 
    674 	if(globalToLocal(e.score) != NilBlock
    675 	|| (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
    676 		werrstr("can only vac paths already stored on venti");
    677 		return 0;
    678 	}
    679 
    680 	/*
    681 	 * Build metadata source for root.
    682 	 */
    683 	n = deSize(&de);
    684 	if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
    685 		werrstr("DirEntry too big");
    686 		return 0;
    687 	}
    688 	memset(buf, 0, sizeof buf);
    689 	mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
    690 	p = mbAlloc(&mb, n);
    691 	if(p == nil)
    692 		abort();
    693 	mbSearch(&mb, de.elem, &i, &me);
    694 	assert(me.p == nil);
    695 	me.p = p;
    696 	me.size = n;
    697 	dePack(&de, &me);
    698 	mbInsert(&mb, i, &me);
    699 	mbPack(&mb);
    700 
    701 	eee.size = n+MetaHeaderSize+MetaIndexSize;
    702 	if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
    703 		return 0;
    704 	eee.psize = 8192;
    705 	eee.dsize = 8192;
    706 	eee.depth = 0;
    707 	eee.flags = VtEntryActive;
    708 
    709 	/*
    710 	 * Build root source with three entries in it.
    711 	 */
    712 	entryPack(&e, buf, 0);
    713 	entryPack(&ee, buf, 1);
    714 	entryPack(&eee, buf, 2);
    715 
    716 	n = VtEntrySize*3;
    717 	memset(&root, 0, sizeof root);
    718 	if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
    719 		return 0;
    720 
    721 	/*
    722 	 * Save root.
    723 	 */
    724 	strecpy(root.type, root.type+sizeof root.type, "vac");
    725 	strecpy(root.name, root.name+sizeof root.name, de.elem);
    726 	root.blocksize = blockSize;
    727 	vtrootpack(&root, buf);
    728 	if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
    729 		return 0;
    730 
    731 	return 1;
    732 }
    733 
    734 int
    735 fsSync(Fs *fs)
    736 {
    737 	wlock(&fs->elk);
    738 	fileMetaFlush(fs->file, 1);
    739 	cacheFlush(fs->cache, 1);
    740 	wunlock(&fs->elk);
    741 	return 1;
    742 }
    743 
    744 int
    745 fsHalt(Fs *fs)
    746 {
    747 	wlock(&fs->elk);
    748 	fs->halted = 1;
    749 	fileMetaFlush(fs->file, 1);
    750 	cacheFlush(fs->cache, 1);
    751 	return 1;
    752 }
    753 
    754 int
    755 fsUnhalt(Fs *fs)
    756 {
    757 	if(!fs->halted)
    758 		return 0;
    759 	fs->halted = 0;
    760 	wunlock(&fs->elk);
    761 	return 1;
    762 }
    763 
    764 int
    765 fsNextQid(Fs *fs, u64int *qid)
    766 {
    767 	Block *b;
    768 	Super super;
    769 
    770 	if((b = superGet(fs->cache, &super)) == nil)
    771 		return 0;
    772 
    773 	*qid = super.qid++;
    774 
    775 	/*
    776 	 * It's okay if the super block doesn't go to disk immediately,
    777 	 * since fileMetaAlloc will record a dependency between the
    778 	 * block holding this qid and the super block.  See file.c:/^fileMetaAlloc.
    779 	 */
    780 	superWrite(b, &super, 0);
    781 	blockPut(b);
    782 	return 1;
    783 }
    784 
    785 static void
    786 fsMetaFlush(void *a)
    787 {
    788 	int rv;
    789 	Fs *fs = a;
    790 
    791 	rlock(&fs->elk);
    792 	rv = fileMetaFlush(fs->file, 1);
    793 	runlock(&fs->elk);
    794 	if(rv > 0)
    795 		cacheFlush(fs->cache, 0);
    796 }
    797 
    798 static int
    799 fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
    800 {
    801 	int n, r;
    802 	DirEntry de;
    803 	DirEntryEnum *dee;
    804 	File *ff;
    805 	Entry e, ee;
    806 	char *t;
    807 
    808 	dee = deeOpen(f);
    809 	if(dee == nil)
    810 		return 0;
    811 
    812 	n = 0;
    813 	for(;;){
    814 		r = deeRead(dee, &de);
    815 		if(r <= 0)
    816 			break;
    817 		if(de.mode & ModeSnapshot){
    818 			if((ff = fileWalk(f, de.elem)) != nil){
    819 				if(fileGetSources(ff, &e, &ee))
    820 					if(de.mtime >= savetime && e.snap != 0)
    821 						if(e.snap < *plo)
    822 							*plo = e.snap;
    823 				fileDecRef(ff);
    824 			}
    825 		}
    826 		else if(de.mode & ModeDir){
    827 			if((ff = fileWalk(f, de.elem)) != nil){
    828 				t = smprint("%s/%s", path, de.elem);
    829 				n += fsEsearch1(ff, t, savetime, plo);
    830 				vtfree(t);
    831 				fileDecRef(ff);
    832 			}
    833 		}
    834 		deCleanup(&de);
    835 		if(r < 0)
    836 			break;
    837 	}
    838 	deeClose(dee);
    839 
    840 	return n;
    841 }
    842 
    843 static int
    844 fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
    845 {
    846 	int n;
    847 	File *f;
    848 	DirEntry de;
    849 
    850 	f = fileOpen(fs, path);
    851 	if(f == nil)
    852 		return 0;
    853 	if(!fileGetDir(f, &de)){
    854 		fileDecRef(f);
    855 		return 0;
    856 	}
    857 	if((de.mode & ModeDir) == 0){
    858 		fileDecRef(f);
    859 		deCleanup(&de);
    860 		return 0;
    861 	}
    862 	deCleanup(&de);
    863 	n = fsEsearch1(f, path, savetime, plo);
    864 	fileDecRef(f);
    865 	return n;
    866 }
    867 
    868 void
    869 fsSnapshotCleanup(Fs *fs, u32int age)
    870 {
    871 	u32int lo;
    872 
    873 	/*
    874 	 * Find the best low epoch we can use,
    875 	 * given that we need to save all the unventied archives
    876 	 * and all the snapshots younger than age.
    877 	 */
    878 	rlock(&fs->elk);
    879 	lo = fs->ehi;
    880 	fsEsearch(fs, "/archive", 0, &lo);
    881 	fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
    882 	runlock(&fs->elk);
    883 
    884 	fsEpochLow(fs, lo);
    885 	fsSnapshotRemove(fs);
    886 }
    887 
    888 /* remove all snapshots that have expired */
    889 /* return number of directory entries remaining */
    890 static int
    891 fsRsearch1(File *f, char *s)
    892 {
    893 	int n, r;
    894 	DirEntry de;
    895 	DirEntryEnum *dee;
    896 	File *ff;
    897 	char *t, e[ERRMAX];
    898 
    899 	dee = deeOpen(f);
    900 	if(dee == nil)
    901 		return 0;
    902 
    903 	n = 0;
    904 	for(;;){
    905 		r = deeRead(dee, &de);
    906 		if(r <= 0)
    907 			break;
    908 		n++;
    909 		if(de.mode & ModeSnapshot){
    910 			rerrstr(e, sizeof e);
    911 			if((ff = fileWalk(f, de.elem)) != nil)
    912 				fileDecRef(ff);
    913 			else if(strcmp(e, ESnapOld) == 0){
    914 				if(fileClri(f, de.elem, "adm"))
    915 					n--;
    916 			}
    917 		}
    918 		else if(de.mode & ModeDir){
    919 			if((ff = fileWalk(f, de.elem)) != nil){
    920 				t = smprint("%s/%s", s, de.elem);
    921 				if(fsRsearch1(ff, t) == 0)
    922 					if(fileRemove(ff, "adm"))
    923 						n--;
    924 				vtfree(t);
    925 				fileDecRef(ff);
    926 			}
    927 		}
    928 		deCleanup(&de);
    929 		if(r < 0)
    930 			break;
    931 	}
    932 	deeClose(dee);
    933 
    934 	return n;
    935 }
    936 
    937 static int
    938 fsRsearch(Fs *fs, char *path)
    939 {
    940 	File *f;
    941 	DirEntry de;
    942 
    943 	f = fileOpen(fs, path);
    944 	if(f == nil)
    945 		return 0;
    946 	if(!fileGetDir(f, &de)){
    947 		fileDecRef(f);
    948 		return 0;
    949 	}
    950 	if((de.mode & ModeDir) == 0){
    951 		fileDecRef(f);
    952 		deCleanup(&de);
    953 		return 0;
    954 	}
    955 	deCleanup(&de);
    956 	fsRsearch1(f, path);
    957 	fileDecRef(f);
    958 	return 1;
    959 }
    960 
    961 void
    962 fsSnapshotRemove(Fs *fs)
    963 {
    964 	rlock(&fs->elk);
    965 	fsRsearch(fs, "/snapshot");
    966 	runlock(&fs->elk);
    967 }
    968 
    969 struct Snap
    970 {
    971 	Fs	*fs;
    972 	Periodic*tick;
    973 	QLock	lk;
    974 	uint	snapMinutes;
    975 	uint	archMinute;
    976 	uint	snapLife;
    977 	u32int	lastSnap;
    978 	u32int	lastArch;
    979 	u32int	lastCleanup;
    980 	uint	ignore;
    981 };
    982 
    983 static void
    984 snapEvent(void *v)
    985 {
    986 	Snap *s;
    987 	u32int now, min;
    988 	Tm tm;
    989 	int need;
    990 	u32int snaplife;
    991 
    992 	s = v;
    993 
    994 	now = time(0)/60;
    995 	qlock(&s->lk);
    996 
    997 	/*
    998 	 * Snapshots happen every snapMinutes minutes.
    999 	 * If we miss a snapshot (for example, because we
   1000 	 * were down), we wait for the next one.
   1001 	 */
   1002 	if(s->snapMinutes != ~0 && s->snapMinutes != 0
   1003 	&& now%s->snapMinutes==0 && now != s->lastSnap){
   1004 		if(!fsSnapshot(s->fs, nil, nil, 0))
   1005 			fprint(2, "%s: fsSnapshot snap: %r\n", argv0);
   1006 		s->lastSnap = now;
   1007 	}
   1008 
   1009 	/*
   1010 	 * Archival snapshots happen at archMinute.
   1011 	 * If we miss an archive (for example, because we
   1012 	 * were down), we do it as soon as possible.
   1013 	 */
   1014 	tm = *localtime(now*60);
   1015 	min = tm.hour*60+tm.min;
   1016 	if(s->archMinute != ~0){
   1017 		need = 0;
   1018 		if(min == s->archMinute && now != s->lastArch)
   1019 			need = 1;
   1020 		if(s->lastArch == 0){
   1021 			s->lastArch = 1;
   1022 			if(fsNeedArch(s->fs, s->archMinute))
   1023 				need = 1;
   1024 		}
   1025 		if(need){
   1026 			fsSnapshot(s->fs, nil, nil, 1);
   1027 			s->lastArch = now;
   1028 		}
   1029 	}
   1030 
   1031 	/*
   1032 	 * Snapshot cleanup happens every snaplife or every day.
   1033 	 */
   1034 	snaplife = s->snapLife;
   1035 	if(snaplife == ~0)
   1036 		snaplife = 24*60;
   1037 	if(s->lastCleanup+snaplife < now){
   1038 		fsSnapshotCleanup(s->fs, s->snapLife);
   1039 		s->lastCleanup = now;
   1040 	}
   1041 	qunlock(&s->lk);
   1042 }
   1043 
   1044 static Snap*
   1045 snapInit(Fs *fs)
   1046 {
   1047 	Snap *s;
   1048 
   1049 	s = vtmallocz(sizeof(Snap));
   1050 	s->fs = fs;
   1051 	s->tick = periodicAlloc(snapEvent, s, 10*1000);
   1052 	s->snapMinutes = -1;
   1053 	s->archMinute = -1;
   1054 	s->snapLife = -1;
   1055 	s->ignore = 5*2;	/* wait five minutes for clock to stabilize */
   1056 	return s;
   1057 }
   1058 
   1059 void
   1060 snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
   1061 {
   1062 	if(s == nil){
   1063 		*snap = -1;
   1064 		*arch = -1;
   1065 		*snaplen = -1;
   1066 		return;
   1067 	}
   1068 
   1069 	qlock(&s->lk);
   1070 	*snap = s->snapMinutes;
   1071 	*arch = s->archMinute;
   1072 	*snaplen = s->snapLife;
   1073 	qunlock(&s->lk);
   1074 }
   1075 
   1076 void
   1077 snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
   1078 {
   1079 	if(s == nil)
   1080 		return;
   1081 
   1082 	qlock(&s->lk);
   1083 	s->snapMinutes = snap;
   1084 	s->archMinute = arch;
   1085 	s->snapLife = snaplen;
   1086 	qunlock(&s->lk);
   1087 }
   1088 
   1089 static void
   1090 snapClose(Snap *s)
   1091 {
   1092 	if(s == nil)
   1093 		return;
   1094 
   1095 	periodicKill(s->tick);
   1096 	vtfree(s);
   1097 }