commit 056fe1ba7fa0b70f871dfb9005b24eb8e4cc230b
parent 9df487d720a59bf8cb0dc4ccffc30ad8eb48256a
Author: rsc <devnull@localhost>
Date:   Sun, 23 Nov 2003 18:19:58 +0000
new venti library.
Diffstat:
28 files changed, 4635 insertions(+), 0 deletions(-)
diff --git a/src/libventi/cache.c b/src/libventi/cache.c
@@ -0,0 +1,560 @@
+/*
+ * Memory-only VtBlock cache.
+ *
+ * The cached Venti blocks are in the hash chains.
+ * The cached local blocks are only in the blocks array.
+ * The free blocks are in the heap, which is supposed to
+ * be indexed by second-to-last use but actually
+ * appears to be last use.
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+int nread, ncopy, nwrite;
+
+enum {
+	BioLocal = 1,
+	BioVenti,
+	BioReading,
+	BioWriting,
+	BioEmpty,
+	BioVentiError,
+};
+enum {
+	BadHeap = ~0,
+};
+struct VtCache
+{
+	QLock	lk;
+	VtConn	*z;
+	u32int	blocksize;
+	u32int	now;		/* ticks for usage time stamps */
+	VtBlock	**hash;	/* hash table for finding addresses */
+	int		nhash;
+	VtBlock	**heap;	/* heap for finding victims */
+	int		nheap;
+	VtBlock	*block;	/* all allocated blocks */
+	int		nblock;
+	uchar	*mem;	/* memory for all blocks and data */
+	int		mode;
+};
+
+static void cachecheck(VtCache*);
+
+VtCache*
+vtcachealloc(VtConn *z, int blocksize, ulong nblock, int mode)
+{
+	uchar *p;
+	VtCache *c;
+	int i;
+	VtBlock *b;
+
+	c = vtmallocz(sizeof(VtCache));
+
+	c->z = z;
+	c->blocksize = (blocksize + 127) & ~127;
+	c->nblock = nblock;
+
+	c->nhash = nblock;
+	c->hash = vtmallocz(nblock*sizeof(VtBlock*));
+	c->heap = vtmallocz(nblock*sizeof(VtBlock*));
+	c->block = vtmallocz(nblock*sizeof(VtBlock));
+	c->mem = vtmallocz(nblock*c->blocksize);
+	c->mode = mode;
+
+	p = c->mem;
+	for(i=0; i<nblock; i++){
+		b = &c->block[i];
+		b->addr = NilBlock;
+		b->c = c;
+		b->data = p;
+		b->heap = i;
+		c->heap[i] = b;
+		p += c->blocksize;
+	}
+	c->nheap = nblock;
+	cachecheck(c);
+	return c;
+}
+
+void
+vtcachefree(VtCache *c)
+{
+	int i;
+
+	qlock(&c->lk);
+
+	cachecheck(c);
+	for(i=0; i<c->nblock; i++)
+		assert(c->block[i].ref == 0);
+
+	vtfree(c->hash);
+	vtfree(c->heap);
+	vtfree(c->block);
+	vtfree(c->mem);
+	vtfree(c);
+}
+
+static void
+vtcachedump(VtCache *c)
+{
+	int i;
+	VtBlock *b;
+
+	for(i=0; i<c->nblock; i++){
+		b = &c->block[i];
+		print("cache block %d: type %d score %V iostate %d addr %d ref %d nlock %d\n",
+			i, b->type, b->score, b->iostate, b->addr, b->ref, b->nlock);
+	}
+}
+
+static void
+cachecheck(VtCache *c)
+{
+	u32int size, now;
+	int i, k, refed;
+	VtBlock *b;
+
+	size = c->blocksize;
+	now = c->now;
+
+	for(i = 0; i < c->nheap; i++){
+		if(c->heap[i]->heap != i)
+			sysfatal("mis-heaped at %d: %d", i, c->heap[i]->heap);
+		if(i > 0 && c->heap[(i - 1) >> 1]->used - now > c->heap[i]->used - now)
+			sysfatal("bad heap ordering");
+		k = (i << 1) + 1;
+		if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
+			sysfatal("bad heap ordering");
+		k++;
+		if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
+			sysfatal("bad heap ordering");
+	}
+
+	refed = 0;
+	for(i = 0; i < c->nblock; i++){
+		b = &c->block[i];
+		if(b->data != &c->mem[i * size])
+			sysfatal("mis-blocked at %d", i);
+		if(b->ref && b->heap == BadHeap)
+			refed++;
+		else if(b->addr != NilBlock)
+			refed++;
+	}
+if(c->nheap + refed != c->nblock){
+fprint(2, "cachecheck: nheap %d refed %d nblocks %d\n", c->nheap, refed, c->nblock);
+//vtcachedump(c);
+}
+	assert(c->nheap + refed == c->nblock);
+	refed = 0;
+	for(i = 0; i < c->nblock; i++){
+		b = &c->block[i];
+		if(b->ref){
+if(1)fprint(2, "a=%ud %V ref=%d\n", b->addr, b->score, b->ref);
+			refed++;
+		}
+	}
+if(refed > 0)fprint(2, "cachecheck: in used %d\n", refed);
+}
+
+static int
+upheap(int i, VtBlock *b)
+{
+	VtBlock *bb;
+	u32int now;
+	int p;
+	VtCache *c;
+	
+	c = b->c;
+	now = c->now;
+	for(; i != 0; i = p){
+		p = (i - 1) >> 1;
+		bb = c->heap[p];
+		if(b->used - now >= bb->used - now)
+			break;
+		c->heap[i] = bb;
+		bb->heap = i;
+	}
+	c->heap[i] = b;
+	b->heap = i;
+
+	return i;
+}
+
+static int
+downheap(int i, VtBlock *b)
+{
+	VtBlock *bb;
+	u32int now;
+	int k;
+	VtCache *c;
+	
+	c = b->c;
+	now = c->now;
+	for(; ; i = k){
+		k = (i << 1) + 1;
+		if(k >= c->nheap)
+			break;
+		if(k + 1 < c->nheap && c->heap[k]->used - now > c->heap[k + 1]->used - now)
+			k++;
+		bb = c->heap[k];
+		if(b->used - now <= bb->used - now)
+			break;
+		c->heap[i] = bb;
+		bb->heap = i;
+	}
+	c->heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+/*
+ * Delete a block from the heap.
+ * Called with c->lk held.
+ */
+static void
+heapdel(VtBlock *b)
+{
+	int i, si;
+	VtCache *c;
+
+	c = b->c;
+
+	si = b->heap;
+	if(si == BadHeap)
+		return;
+	b->heap = BadHeap;
+	c->nheap--;
+	if(si == c->nheap)
+		return;
+	b = c->heap[c->nheap];
+	i = upheap(si, b);
+	if(i == si)
+		downheap(i, b);
+}
+
+/*
+ * Insert a block into the heap.
+ * Called with c->lk held.
+ */
+static void
+heapins(VtBlock *b)
+{
+	assert(b->heap == BadHeap);
+	upheap(b->c->nheap++, b);
+}
+
+/*
+ * locate the vtBlock with the oldest second to last use.
+ * remove it from the heap, and fix up the heap.
+ */
+/* called with c->lk held */
+static VtBlock*
+vtcachebumpblock(VtCache *c)
+{
+	VtBlock *b;
+
+	/*
+	 * locate the vtBlock with the oldest second to last use.
+	 * remove it from the heap, and fix up the heap.
+	 */
+	if(c->nheap == 0){
+		vtcachedump(c);
+abort();
+		sysfatal("vtcachebumpblock: no free blocks in vtCache");
+	}
+	b = c->heap[0];
+	heapdel(b);
+
+	assert(b->heap == BadHeap);
+	assert(b->ref == 0);
+
+	/*
+	 * unchain the vtBlock from hash chain if any
+	 */
+	if(b->prev){
+		*(b->prev) = b->next;
+		if(b->next)
+			b->next->prev = b->prev;
+		b->prev = nil;
+	}
+
+ 	
+if(0)fprint(2, "droping %x:%V\n", b->addr, b->score);
+	/* set vtBlock to a reasonable state */
+	b->ref = 1;
+	b->iostate = BioEmpty;
+	return b;
+}
+
+/*
+ * fetch a local block from the memory cache.
+ * if it's not there, load it, bumping some other Block.
+ * if we're out of free blocks, we're screwed.
+ */
+VtBlock*
+vtcachelocal(VtCache *c, u32int addr, int type)
+{
+	VtBlock *b;
+
+	if(addr >= c->nblock)
+		sysfatal("vtcachelocal: asked for block #%ud; only %d blocks\n",
+			addr, c->nblock);
+
+	b = &c->block[addr];
+	if(b->addr == NilBlock || b->iostate != BioLocal)
+{
+abort();
+		sysfatal("vtcachelocal: block is not local");
+}
+
+	if(b->type != type)
+{
+print("%d != %d\n", b->type, type);
+abort();
+		sysfatal("vtcachelocal: block has wrong type %d != %d", b->type, type);
+}
+
+	qlock(&c->lk);
+	b->ref++;
+	qunlock(&c->lk);
+
+	qlock(&b->lk);
+	b->nlock = 1;
+	return b;
+}
+
+VtBlock*
+vtcacheallocblock(VtCache *c, int type)
+{
+	VtBlock *b;
+
+if(type >= VtMaxType)
+	abort();
+
+	qlock(&c->lk);
+	b = vtcachebumpblock(c);
+	b->iostate = BioLocal;
+	b->type = type;
+	b->addr = b - c->block;
+	vtzeroextend(type, b->data, 0, c->blocksize);
+	vtlocaltoglobal(b->addr, b->score);
+	qunlock(&c->lk);
+
+	qlock(&b->lk);
+	b->nlock = 1;
+
+	return b;
+}
+
+/*
+ * fetch a global (Venti) block from the memory cache.
+ * if it's not there, load it, bumping some other block.
+ */
+VtBlock*
+vtcacheglobal(VtCache *c, uchar score[VtScoreSize], int type)
+{
+	VtBlock *b;
+	ulong h;
+	int n;
+	u32int addr;
+
+	addr = vtglobaltolocal(score);
+	if(addr != NilBlock)
+		return vtcachelocal(c, addr, type);
+
+	h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->nhash;
+
+	/*
+	 * look for the block in the cache
+	 */
+	qlock(&c->lk);
+	for(b = c->hash[h]; b != nil; b = b->next){
+		if(b->addr != NilBlock || memcmp(b->score, score, VtScoreSize) != 0 || b->type != type)
+			continue;
+		heapdel(b);
+		b->ref++;
+		qunlock(&c->lk);
+		qlock(&b->lk);
+		b->nlock = 1;
+		return b;
+	}
+
+	/*
+	 * not found
+	 */
+	b = vtcachebumpblock(c);
+	b->addr = NilBlock;
+	b->type = type;
+	memmove(b->score, score, VtScoreSize);
+	/* chain onto correct hash */
+	b->next = c->hash[h];
+	c->hash[h] = b;
+	if(b->next != nil)
+		b->next->prev = &b->next;
+	b->prev = &c->hash[h];
+
+	/*
+	 * Lock b before unlocking c, so that others wait while we read.
+	 * 
+	 * You might think there is a race between this qlock(b) before qunlock(c)
+	 * and the qlock(c) while holding a qlock(b) in vtblockwrite.  However,
+	 * the block here can never be the block in a vtblockwrite, so we're safe.
+	 * We're certainly living on the edge.
+	 */
+	qlock(&b->lk);
+	b->nlock = 1;
+	qunlock(&c->lk);
+
+	n = vtread(c->z, score, type, b->data, c->blocksize);
+	if(n < 0){
+fprint(2, "vtread: %r\n");
+		b->iostate = BioVentiError;
+		vtblockput(b);
+		return nil;
+	}
+	vtzeroextend(type, b->data, n, c->blocksize);
+	b->iostate = BioVenti;
+	b->nlock = 1;
+	b->decrypted = 0;
+	return b;
+}
+
+/*
+ * The thread that has locked b may refer to it by
+ * multiple names.  Nlock counts the number of
+ * references the locking thread holds.  It will call
+ * vtblockput once per reference.
+ */
+void
+vtblockduplock(VtBlock *b)
+{
+	assert(b->nlock > 0);
+	b->nlock++;
+}
+
+/*
+ * we're done with the block.
+ * unlock it.  can't use it after calling this.
+ */
+void
+vtblockput(VtBlock* b)
+{
+	VtCache *c;
+
+	if(b == nil)
+		return;
+
+if(0)fprint(2, "vtblockput: %d: %x %d %d\n", getpid(), b->addr, c->nheap, b->iostate);
+
+	if(--b->nlock > 0)
+		return;
+
+	/*
+	 * b->nlock should probably stay at zero while
+	 * the vtBlock is unlocked, but diskThread and vtSleep
+	 * conspire to assume that they can just qlock(&b->lk); vtblockput(b),
+	 * so we have to keep b->nlock set to 1 even 
+	 * when the vtBlock is unlocked.
+	 */
+	assert(b->nlock == 0);
+	b->nlock = 1;
+
+	qunlock(&b->lk);
+	c = b->c;
+	qlock(&c->lk);
+
+	if(--b->ref > 0){
+		qunlock(&c->lk);
+		return;
+	}
+
+	assert(b->ref == 0);
+	switch(b->iostate){
+	case BioVenti:
+//if(b->addr != NilBlock) print("blockput %d\n", b->addr);
+		b->used = c->now++;
+	case BioVentiError:
+		heapins(b);
+		break;
+	case BioLocal:
+		break;
+	}
+	qunlock(&c->lk);
+}
+
+int
+vtblockwrite(VtBlock *b)
+{
+	uchar score[VtScoreSize];
+	VtCache *c;
+	uint h;
+	int n;
+
+	if(b->iostate != BioLocal){
+		abort();
+		sysfatal("vtBlockWrite: not a local block");
+	}
+
+	c = b->c;
+	n = vtzerotruncate(b->type, b->data, c->blocksize);
+	if(vtwrite(c->z, score, b->type, b->data, n) < 0)
+		return -1;
+
+	memmove(b->score, score, VtScoreSize);
+
+	qlock(&c->lk);
+	b->iostate = BioVenti;
+	h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->nhash;
+	b->next = c->hash[h];
+	c->hash[h] = b;
+	if(b->next != nil)
+		b->next->prev = &b->next;
+	b->prev = &c->hash[h];
+	qunlock(&c->lk);
+	return 0;
+}
+
+uint
+vtcacheblocksize(VtCache *c)
+{
+	return c->blocksize;
+}
+
+VtBlock*
+vtblockcopy(VtBlock *b)
+{
+	VtBlock *bb;
+
+ncopy++;
+	bb = vtcacheallocblock(b->c, b->type);
+	if(bb == nil){
+		vtblockput(b);
+		return nil;
+	}
+	memmove(bb->data, b->data, b->c->blocksize);
+	vtblockput(b);
+	return bb;
+}
+
+void
+vtlocaltoglobal(u32int addr, uchar score[VtScoreSize])
+{
+	memset(score, 0, 16);
+	score[16] = addr>>24;
+	score[17] = addr>>16;
+	score[18] = addr>>8;
+	score[19] = addr;
+}
+
+
+u32int
+vtglobaltolocal(uchar score[VtScoreSize])
+{
+	static uchar zero[16];
+	if(memcmp(score, zero, 16) != 0)
+		return NilBlock;
+	return (score[16]<<24)|(score[17]<<16)|(score[18]<<8)|score[19];
+}
diff --git a/src/libventi/client.c b/src/libventi/client.c
@@ -0,0 +1,151 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+static int
+vtfcallrpc(VtConn *z, VtFcall *ou, VtFcall *in)
+{
+	Packet *p;
+
+	p = vtfcallpack(ou);
+	if(p == nil)
+		return -1;
+	if((p = vtrpc(z, p)) == nil)
+		return -1;
+	if(vtfcallunpack(in, p) < 0){
+		packetfree(p);
+		return -1;
+	}
+	if(in->type == VtRerror){
+		werrstr(in->error);
+		vtfcallclear(in);
+		packetfree(p);
+		return -1;
+	}
+	if(in->type != ou->type+1){
+		werrstr("type mismatch: sent %c%d got %c%d",
+			"TR"[ou->type&1], ou->type>>1,
+			"TR"[in->type&1], in->type>>1);
+		vtfcallclear(in);
+		packetfree(p);
+		return -1;
+	}
+	packetfree(p);
+	return 0;
+}
+
+int
+vthello(VtConn *z)
+{
+	VtFcall tx, rx;
+
+	memset(&tx, 0, sizeof tx);
+	tx.type = VtThello;
+	tx.version = z->version;
+	tx.uid = z->uid;
+	if(tx.uid == nil)
+		tx.uid = "anonymous";
+	if(vtfcallrpc(z, &tx, &rx) < 0)
+		return -1;
+	z->sid = rx.sid;
+	rx.sid = 0;
+	vtfcallclear(&rx);
+	return 0;
+}
+
+Packet*
+vtreadpacket(VtConn *z, uchar score[VtScoreSize], uint type, int n)
+{
+	VtFcall tx, rx;
+
+	memset(&tx, 0, sizeof tx);
+	tx.type = VtTread;
+	tx.dtype = type;
+	tx.count = n;
+	memmove(tx.score, score, VtScoreSize);
+	if(vtfcallrpc(z, &tx, &rx) < 0)
+		return nil;
+	if(packetsize(rx.data) > n){
+		werrstr("read returned too much data");
+		packetfree(rx.data);
+		return nil;
+	}
+	packetsha1(rx.data, tx.score);
+	if(memcmp(score, tx.score, VtScoreSize) != 0){
+		werrstr("read asked for %V got %V", score, tx.score);
+		packetfree(rx.data);
+		return nil;
+	}
+
+	return rx.data;
+}
+
+int
+vtread(VtConn *z, uchar score[VtScoreSize], uint type, uchar *buf, int n)
+{
+	int nn;
+	Packet *p;
+
+	if((p = vtreadpacket(z, score, type, n)) == nil)
+		return -1;
+	nn = packetsize(p);
+	if(packetconsume(p, buf, nn) < 0)
+		abort();
+	return nn;
+}
+
+int
+vtwritepacket(VtConn *z, uchar score[VtScoreSize], uint type, Packet *p)
+{
+	VtFcall tx, rx;
+
+	tx.type = VtTwrite;
+	tx.dtype = type;
+	tx.data = p;
+	packetsha1(p, score);
+	if(vtfcallrpc(z, &tx, &rx) < 0)
+		return -1;
+	if(memcmp(score, rx.score, VtScoreSize) != 0){
+		werrstr("sha1 hash mismatch: want %V got %V", score, rx.score);
+		return -1;
+	}
+	return 0;
+}
+
+int
+vtwrite(VtConn *z, uchar score[VtScoreSize], uint type, uchar *buf, int n)
+{
+	Packet *p;
+
+	p = packetforeign(buf, n, nil, nil);
+	return vtwritepacket(z, score, type, p);
+}
+
+int
+vtsync(VtConn *z)
+{
+	VtFcall tx, rx;
+
+	tx.type = VtTsync;
+	return vtfcallrpc(z, &tx, &rx);
+}
+
+int
+vtping(VtConn *z)
+{
+	VtFcall tx, rx;
+
+	tx.type = VtTping;
+	return vtfcallrpc(z, &tx, &rx);
+}
+
+int
+vtconnect(VtConn *z)
+{
+	if(vtversion(z) < 0)
+		return -1;
+	if(vthello(z) < 0)
+		return -1;
+	return 0;
+}
+
diff --git a/src/libventi/conn.c b/src/libventi/conn.c
@@ -0,0 +1,36 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include "queue.h"
+
+VtConn*
+vtconn(int infd, int outfd)
+{
+	VtConn *z;
+
+	z = vtmallocz(sizeof(VtConn));
+	z->tagrend.l = &z->lk;
+	z->rpcfork.l = &z->lk;
+	z->infd = infd;
+	z->outfd = outfd;
+	z->part = packetalloc();
+	return z;
+}
+
+void
+vtfreeconn(VtConn *z)
+{
+	vthangup(z);
+	qlock(&z->lk);
+	for(;;){
+		if(z->readq)
+			_vtqhangup(z->readq);
+		else if(z->writeq)
+			_vtqhangup(z->writeq);
+		else
+			break;
+		rsleep(&z->rpcfork);
+	}
+	packetfree(z->part);
+	vtfree(z);
+}
diff --git a/src/libventi/cvt.h b/src/libventi/cvt.h
@@ -0,0 +1,15 @@
+/*
+ * integer conversion routines
+ */
+#define	U8GET(p)	((p)[0])
+#define	U16GET(p)	(((p)[0]<<8)|(p)[1])
+#define	U32GET(p)	((u32int)(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3]))
+#define	U48GET(p)	(((vlong)U16GET(p)<<32)|(vlong)U32GET((p)+2))
+#define	U64GET(p)	(((vlong)U32GET(p)<<32)|(vlong)U32GET((p)+4))
+
+#define	U8PUT(p,v)	(p)[0]=(v)
+#define	U16PUT(p,v)	(p)[0]=(v)>>8;(p)[1]=(v)
+#define	U32PUT(p,v)	(p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
+#define	U48PUT(p,v,t32)	t32=(v)>>32;U16PUT(p,t32);t32=(v);U32PUT((p)+2,t32)
+#define	U64PUT(p,v,t32)	t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
+
diff --git a/src/libventi/debug.c b/src/libventi/debug.c
@@ -0,0 +1,17 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+void
+vtdebug(VtConn *z, char *fmt, ...)
+{
+	va_list arg;
+
+	if(z->debug == 0)
+		return;
+
+	va_start(arg, fmt);
+	vfprint(2, fmt, arg);
+	va_end(arg);
+}
+
diff --git a/src/libventi/dial.c b/src/libventi/dial.c
@@ -0,0 +1,21 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+VtConn*
+vtdial(char *addr)
+{
+	char *na;
+	int fd;
+
+	if(addr == nil)
+		addr = getenv("venti");
+	if(addr == nil)
+		addr = "$venti";
+
+	na = netmkaddr(addr, "net", "venti");
+	if((fd = dial(na, nil, nil, nil)) < 0)
+		return nil;
+
+	return vtconn(fd, fd);
+}
diff --git a/src/libventi/dtype.c b/src/libventi/dtype.c
@@ -0,0 +1,78 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+enum {
+	OVtErrType,		/* illegal */
+
+	OVtRootType,
+	OVtDirType,
+	OVtPointerType0,
+	OVtPointerType1,
+	OVtPointerType2,
+	OVtPointerType3,
+	OVtPointerType4,
+	OVtPointerType5,
+	OVtPointerType6,
+	OVtPointerType7,		/* not used */
+	OVtPointerType8,		/* not used */
+	OVtPointerType9,		/* not used */
+	OVtDataType,
+
+	OVtMaxType
+};
+
+
+uint todisk[] = {
+	OVtDataType,
+	OVtPointerType0,
+	OVtPointerType1,
+	OVtPointerType2,
+	OVtPointerType3,
+	OVtPointerType4,
+	OVtPointerType5,
+	OVtPointerType6,
+	OVtDirType,
+	OVtPointerType0,
+	OVtPointerType1,
+	OVtPointerType2,
+	OVtPointerType3,
+	OVtPointerType4,
+	OVtPointerType5,
+	OVtPointerType6,
+	OVtRootType,
+};
+
+uint fromdisk[] = {
+	~0,
+	VtRootType,
+	VtDirType,
+	VtDirType+1,
+	VtDirType+2,
+	VtDirType+3,
+	VtDirType+4,
+	VtDirType+5,
+	VtDirType+6,
+	VtDirType+7,
+	~0,
+	~0,
+	~0,
+	VtDataType,
+};
+
+uint
+vttodisktype(uint n)
+{
+	if(n >= nelem(todisk))
+		return ~0;
+	return todisk[n];
+}
+
+uint
+vtfromdisktype(uint n)
+{
+	if(n >= nelem(fromdisk))
+		return ~0;
+	return fromdisk[n];
+}
+
diff --git a/src/libventi/entry.c b/src/libventi/entry.c
@@ -0,0 +1,85 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include "cvt.h"
+
+static int
+checksize(int n)
+{
+	if(n < 256 || n > VtMaxLumpSize) {
+		werrstr("bad block size");
+		return -1;
+	}
+	return 0;
+}
+
+void
+vtentrypack(VtEntry *e, uchar *p, int index)
+{
+	ulong t32;
+	int flags;
+	uchar *op;
+	int depth;
+
+	p += index * VtEntrySize;
+	op = p;
+
+	U32PUT(p, e->gen);
+	p += 4;
+	U16PUT(p, e->psize);
+	p += 2;
+	U16PUT(p, e->dsize);
+	p += 2;
+	depth = e->type&VtTypeDepthMask;
+	flags = (e->flags&~(VtEntryDir|VtEntryDepthShift));
+	flags |= depth << VtEntryDepthShift;
+	if(e->type - depth == VtEntryDir)
+		flags |= VtEntryDir;
+	U8PUT(p, flags);
+	p++;
+	memset(p, 0, 5);
+	p += 5;
+	U48PUT(p, e->size, t32);
+	p += 6;
+	memmove(p, e->score, VtScoreSize);
+	p += VtScoreSize;
+
+	assert(p-op == VtEntrySize);
+}
+
+int
+vtentryunpack(VtEntry *e, uchar *p, int index)
+{
+	uchar *op;
+
+	p += index * VtEntrySize;
+	op = p;
+
+	e->gen = U32GET(p);
+	p += 4;
+	e->psize = U16GET(p);
+	p += 2;
+	e->dsize = U16GET(p);
+	p += 2;
+	e->flags = U8GET(p);
+	e->type = (e->flags&VtEntryDir) ? VtDirType : VtDataType;
+	e->type += (e->flags & VtEntryDepthMask) >> VtEntryDepthShift;
+	e->flags &= ~(VtEntryDir|VtEntryDepthMask);
+	p++;
+	p += 5;
+	e->size = U48GET(p);
+	p += 6;
+	memmove(e->score, p, VtScoreSize);
+	p += VtScoreSize;
+
+	assert(p-op == VtEntrySize);
+	
+	if(!(e->flags & VtEntryActive))
+		return 0;
+
+	if(checksize(e->psize) < 0 || checksize(e->dsize) < 0)
+		return -1;
+
+	return 0;
+}
+
diff --git a/src/libventi/fcall.c b/src/libventi/fcall.c
@@ -0,0 +1,230 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+Packet*
+vtfcallpack(VtFcall *f)
+{
+	uchar buf[4];
+	Packet *p;
+
+	p = packetalloc();
+
+	buf[0] = f->type;
+	buf[1] = f->tag;
+	packetappend(p, buf, 2);
+
+	switch(f->type){
+	default:
+		werrstr("vtfcallpack: unknown packet type %d", f->type);
+		goto Err;
+
+	case VtRerror:
+		if(vtputstring(p, f->error) < 0)
+			goto Err;
+		break;
+
+	case VtTping:
+		break;
+
+	case VtRping:
+		break;
+
+	case VtThello:
+		if(vtputstring(p, f->version) < 0
+		|| vtputstring(p, f->uid) < 0)
+			goto Err;
+		buf[0] = f->strength;
+		buf[1] = f->ncrypto;
+		packetappend(p, buf, 2);
+		packetappend(p, f->crypto, f->ncrypto);
+		buf[0] = f->ncodec;
+		packetappend(p, buf, 1);
+		packetappend(p, f->codec, f->ncodec);
+		break;
+
+	case VtRhello:
+		if(vtputstring(p, f->sid) < 0)
+			goto Err;
+		buf[0] = f->rcrypto;
+		buf[1] = f->rcodec;
+		packetappend(p, buf, 2);
+		break;
+
+	case VtTgoodbye:
+		break;
+
+	case VtTread:
+		packetappend(p, f->score, VtScoreSize);
+		buf[0] = vttodisktype(f->dtype);
+		if(~buf[0] == 0)
+			goto Err;
+		buf[1] = 0;
+		buf[2] = f->count >> 8;
+		buf[3] = f->count;
+		packetappend(p, buf, 4);
+		break;
+
+	case VtRread:
+		packetconcat(p, f->data);
+		break;
+
+	case VtTwrite:
+		buf[0] = vttodisktype(f->dtype);
+		if(~buf[0] == 0)
+			goto Err;
+		buf[1] = 0;
+		buf[2] = 0;
+		buf[3] = 0;
+		packetappend(p, buf, 4);
+		packetconcat(p, f->data);
+		break;
+
+	case VtRwrite:
+		packetappend(p, f->score, VtScoreSize);
+		break;
+
+	case VtTsync:
+		break;
+
+	case VtRsync:
+		break;
+	}
+
+	return p;
+
+Err:
+	packetfree(p);
+	return nil;
+}
+
+int
+vtfcallunpack(VtFcall *f, Packet *p)
+{
+	uchar buf[4];
+
+	memset(f, 0, sizeof *f);
+
+	if(packetconsume(p, buf, 2) < 0)
+		return -1;
+
+	f->type = buf[0];
+	f->tag = buf[1];
+
+	switch(f->type){
+	default:
+		werrstr("vtfcallunpack: unknown bad packet type %d", f->type);
+		vtfcallclear(f);
+		return -1;
+
+	case VtRerror:
+		if(vtgetstring(p, &f->error) < 0)
+			goto Err;
+		break;
+
+	case VtTping:
+		break;
+
+	case VtRping:
+		break;
+
+	case VtThello:
+		if(vtgetstring(p, &f->version) < 0
+		|| vtgetstring(p, &f->uid) < 0
+		|| packetconsume(p, buf, 2) < 0)
+			goto Err;
+		f->strength = buf[0];
+		f->ncrypto = buf[1];
+		if(f->ncrypto){
+			f->crypto = vtmalloc(f->ncrypto);
+			if(packetconsume(p, buf, f->ncrypto) < 0)
+				goto Err;
+		}
+		if(packetconsume(p, buf, 1) < 0)
+			goto Err;
+		f->ncodec = buf[0];
+		if(f->ncodec){
+			f->codec = vtmalloc(f->ncodec);
+			if(packetconsume(p, buf, f->ncodec) < 0)
+				goto Err;
+		}
+		break;
+
+	case VtRhello:
+		if(vtgetstring(p, &f->sid) < 0
+		|| packetconsume(p, buf, 2) < 0)
+			goto Err;
+		f->rcrypto = buf[0];
+		f->rcodec = buf[1];
+		break;
+
+	case VtTgoodbye:
+		break;
+
+	case VtTread:
+		if(packetconsume(p, f->score, VtScoreSize) < 0
+		|| packetconsume(p, buf, 4) < 0)
+			goto Err;
+		f->dtype = vtfromdisktype(buf[0]);
+		if(~f->dtype == 0)
+			goto Err;
+		f->count = (buf[2] << 8) | buf[3];
+		break;
+
+	case VtRread:
+		f->data = packetalloc();
+		packetconcat(f->data, p);
+		break;
+
+	case VtTwrite:
+		if(packetconsume(p, buf, 4) < 0)
+			goto Err;
+		f->dtype = vtfromdisktype(buf[0]);
+		if(~f->dtype == 0)
+			goto Err;
+		f->data = packetalloc();
+		packetconcat(f->data, p);
+		break;
+
+	case VtRwrite:
+		if(packetconsume(p, f->score, VtScoreSize) < 0)
+			goto Err;
+		break;
+
+	case VtTsync:
+		break;
+
+	case VtRsync:
+		break;
+	}
+
+	if(packetsize(p) != 0)
+		goto Err;
+
+	return 0;
+
+Err:
+	werrstr("bad packet");
+	return -1;
+}
+
+void
+vtfcallclear(VtFcall *f)
+{
+	vtfree(f->error);
+	f->error = nil;
+	vtfree(f->uid);
+	f->uid = nil;
+	vtfree(f->sid);
+	f->sid = nil;
+	vtfree(f->version);
+	f->version = nil;
+	vtfree(f->crypto);
+	f->crypto = nil;
+	vtfree(f->codec);
+	f->codec = nil;
+	vtfree(f->auth);
+	f->auth = nil;
+	packetfree(f->data);
+	f->auth = nil;
+}
diff --git a/src/libventi/fcallfmt.c b/src/libventi/fcallfmt.c
@@ -0,0 +1,55 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+int
+vtfcallfmt(Fmt *f)
+{
+	VtFcall *t;
+
+	t = va_arg(f->args, VtFcall*);
+	if(t == nil){
+		fmtprint(f, "<nil fcall>");
+		return 0;
+	}
+	switch(t->type){
+	default:
+		return fmtprint(f, "%c%d tag %ud", "TR"[t->type&1], t->type>>1, t->tag);
+	case VtRerror:
+		return fmtprint(f, "Rerror tag %ud error %s", t->tag, t->error);
+	case VtTping:
+		return fmtprint(f, "Tping tag %ud", t->tag);
+	case VtRping:
+		return fmtprint(f, "Rping tag %ud", t->tag);
+	case VtThello:
+		return fmtprint(f, "Thello tag %ud vers %s uid %s strength %d crypto %d:%.*H codec %d:%.*H", t->tag,
+			t->version, t->uid, t->strength, t->ncrypto, t->ncrypto, t->crypto,
+			t->ncodec, t->ncodec, t->codec);
+	case VtRhello:
+		return fmtprint(f, "Rhello tag %ud sid %s rcrypto %d rcodec %d", t->tag, t->sid, t->rcrypto, t->rcodec);
+	case VtTgoodbye:
+		return fmtprint(f, "Tgoodbye tag %ud", t->tag);
+	case VtRgoodbye:
+		return fmtprint(f, "Rgoodbye tag %ud", t->tag);
+	case VtTauth0:
+		return fmtprint(f, "Tauth0 tag %ud auth %.*H", t->tag, t->nauth, t->auth);
+	case VtRauth0:
+		return fmtprint(f, "Rauth0 tag %ud auth %.*H", t->tag, t->nauth, t->auth);
+	case VtTauth1:
+		return fmtprint(f, "Tauth1 tag %ud auth %.*H", t->tag, t->nauth, t->auth);
+	case VtRauth1:
+		return fmtprint(f, "Rauth1 tag %ud auth %.*H", t->tag, t->nauth, t->auth);
+	case VtTread:
+		return fmtprint(f, "Tread tag %ud score %V dtype %d count %d", t->tag, t->score, t->dtype, t->count);
+	case VtRread:
+		return fmtprint(f, "Rread tag %ud count %d", t->tag, packetsize(t->data));
+	case VtTwrite:
+		return fmtprint(f, "Twrite tag %ud dtype %d count %d", t->tag, t->dtype, packetsize(t->data));
+	case VtRwrite:
+		return fmtprint(f, "Rwrite tag %ud score %V", t->tag, t->score);
+	case VtTsync:
+		return fmtprint(f, "Tsync tag %ud", t->tag);
+	case VtRsync:
+		return fmtprint(f, "Rsync tag %ud", t->tag);
+	}
+}
diff --git a/src/libventi/file.c b/src/libventi/file.c
@@ -0,0 +1,1264 @@
+/*
+ * Manage tree of VtFiles stored in the block cache.
+ *
+ * The single point of truth for the info about the VtFiles themselves
+ * is the block data.  Because of this, there is no explicit locking of
+ * VtFile structures, and indeed there may be more than one VtFile
+ * structure for a given Venti file.  They synchronize through the 
+ * block cache.
+ *
+ * This is a bit simpler than fossil because there are no epochs
+ * or tags or anything else.  Just mutable local blocks and immutable
+ * Venti blocks.
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+enum
+{
+	MaxBlock = (1UL<<31),
+};
+
+struct VtFile
+{
+	QLock lk;
+	int ref;
+	int local;
+	VtBlock *b;			/* block containing this file */
+	uchar score[VtScoreSize];	/* score of block containing this file */
+
+/* immutable */
+	VtCache *c;
+	int mode;
+	u32int gen;
+	int dsize;
+	int dir;
+	VtFile *parent;
+	int epb;			/* entries per block in parent */
+	u32int offset; 			/* entry offset in parent */
+};
+
+static char EBadEntry[] = "bad VtEntry";
+static char ENotDir[] = "walk in non-directory";
+static char ETooBig[] = "file too big";
+static char EBadAddr[] = "bad address";
+static char ELabelMismatch[] = "label mismatch";
+
+static int	sizetodepth(uvlong s, int psize, int dsize);
+static VtBlock 	*fileload(VtFile *r, VtEntry *e);
+static int	shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
+static int	shrinksize(VtFile*, VtEntry*, uvlong);
+static int	growdepth(VtFile*, VtBlock*, VtEntry*, int);
+
+#define ISLOCKED(r)	((r)->b != nil)
+#define DEPTH(t)	((t)&VtTypeDepthMask)
+
+static VtFile *
+vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, u32int offset, int mode)
+{
+	int epb;
+	u32int size;
+	VtEntry e;
+	VtFile *r;
+
+	assert(p==nil || ISLOCKED(p));
+
+	if(p == nil){
+		assert(offset == 0);
+		epb = 1;
+	}else
+		epb = p->dsize / VtEntrySize;
+
+	if(b->type != VtDirType)
+		goto Bad;
+
+	/*
+	 * a non-active entry is the only thing that
+	 * can legitimately happen here. all the others
+	 * get prints.
+	 */
+	if(vtentryunpack(&e, b->data, offset % epb) < 0){
+		fprint(2, "vtentryunpack failed\n");
+		goto Bad;
+	}
+	if(!(e.flags & VtEntryActive)){
+		if(0)fprint(2, "not active\n");
+		goto Bad;
+	}
+	if(e.psize < 256 || e.dsize < 256){
+		fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
+		goto Bad;
+	}
+
+	if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
+		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n",
+			DEPTH(e.type), e.size, e.psize, e.dsize);
+		goto Bad;
+	}
+
+	size = vtcacheblocksize(c);
+	if(e.dsize > size || e.psize > size){
+		fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, size);
+		goto Bad;
+	}
+
+	r = vtmallocz(sizeof(VtFile));
+	r->c = c;
+	r->mode = mode;
+	r->dsize = e.dsize;
+	r->gen = e.gen;
+	r->dir = (e.flags & VtEntryDir) != 0;
+	r->ref = 1;
+	r->parent = p;
+	if(p){
+		qlock(&p->lk);
+		assert(mode == VtOREAD || p->mode == VtORDWR);
+		p->ref++;
+		qunlock(&p->lk);
+	}
+	memmove(r->score, b->score, VtScoreSize);
+	r->offset = offset;
+	r->epb = epb;
+
+	return r;
+Bad:
+	werrstr(EBadEntry);
+	return nil;
+	
+}
+
+VtFile *
+vtfileroot(VtCache *c, u32int addr, int mode)
+{
+	VtFile *r;
+	VtBlock *b;
+
+	b = vtcachelocal(c, addr, VtDirType);
+	if(b == nil)
+		return nil;
+
+	r = vtfilealloc(c, b, nil, 0, mode);
+	vtblockput(b);
+	return r;
+}
+
+VtFile*
+vtfileopenroot(VtCache *c, VtEntry *e)
+{
+	VtBlock *b;
+	VtFile *f;
+
+	b = vtcacheallocblock(c, VtDirType);
+	if(b == nil)
+		return nil;
+
+	vtentrypack(e, b->data, 0);
+	f = vtfilealloc(c, b, nil, 0, VtORDWR);
+	vtblockput(b);
+	return f;
+}
+
+VtFile *
+vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
+{
+	VtEntry e;
+
+	memset(&e, 0, sizeof e);
+	e.flags = VtEntryActive;
+	e.psize = psize;
+	e.dsize = dsize;
+	if(type == VtDirType)
+		e.flags |= VtEntryDir;
+	memmove(e.score, vtzeroscore, VtScoreSize);
+
+	return vtfileopenroot(c, &e);
+}
+
+VtFile *
+vtfileopen(VtFile *r, u32int offset, int mode)
+{
+	ulong bn;
+	VtBlock *b;
+
+	assert(ISLOCKED(r));
+	if(!r->dir){
+		werrstr(ENotDir);
+		return nil;
+	}
+
+	bn = offset/(r->dsize/VtEntrySize);
+
+	b = vtfileblock(r, bn, mode);
+	if(b == nil)
+		return nil;
+	r = vtfilealloc(r->c, b, r, offset, mode);
+	vtblockput(b);
+	return r;
+}
+
+VtFile *
+vtfilecreate(VtFile *r, int psize, int dsize, int dir)
+{
+	int i;
+	VtBlock *b;
+	u32int bn, size;
+	VtEntry e;
+	int epb;
+	VtFile *rr;
+ 	u32int offset;
+
+	assert(ISLOCKED(r));
+
+	if(!r->dir){
+		werrstr(ENotDir);
+		return nil;
+	}
+
+	epb = r->dsize/VtEntrySize;
+
+	size = vtfilegetdirsize(r);
+	/*
+	 * look at a random block to see if we can find an empty entry
+	 */
+	offset = lnrand(size+1);
+	offset -= offset % epb;
+
+	/* try the given block and then try the last block */
+	for(;;){
+		bn = offset/epb;
+		b = vtfileblock(r, bn, VtORDWR);
+		if(b == nil)
+			return nil;
+		for(i=offset%r->epb; i<epb; i++){
+			if(vtentryunpack(&e, b->data, i) < 0)
+				continue;
+			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
+				goto Found;
+		}
+		vtblockput(b);
+		if(offset == size){
+			fprint(2, "vtfilecreate: cannot happen\n");
+			werrstr("vtfilecreate: cannot happen");
+			return nil;
+		}
+		offset = size;
+	}
+
+Found:
+	/* found an entry - gen already set */
+	e.psize = psize;
+	e.dsize = dsize;
+	e.flags = VtEntryActive;
+	e.type = dir ? VtDirType : VtDataType;
+	e.size = 0;
+	memmove(e.score, vtzeroscore, VtScoreSize);
+	vtentrypack(&e, b->data, i);
+
+	offset = bn*epb + i;
+	if(offset+1 > size){
+		if(vtfilesetdirsize(r, offset+1) < 0){
+			vtblockput(b);
+			return nil;
+		}
+	}
+
+	rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
+	vtblockput(b);
+	return rr;
+}
+
+static int
+vtfilekill(VtFile *r, int doremove)
+{
+	VtEntry e;
+	VtBlock *b;
+
+	assert(ISLOCKED(r));
+	b = fileload(r, &e);
+	if(b == nil)
+		return -1;
+
+	if(doremove==0 && e.size == 0){
+		/* already truncated */
+		vtblockput(b);
+		return 0;
+	}
+
+	if(doremove){
+		if(e.gen != ~0)
+			e.gen++;
+		e.dsize = 0;
+		e.psize = 0;
+		e.flags = 0;
+	}else
+		e.flags &= ~VtEntryLocal;
+	e.type = 0;
+	e.size = 0;
+	memmove(e.score, vtzeroscore, VtScoreSize);
+	vtentrypack(&e, b->data, r->offset % r->epb);
+	vtblockput(b);
+
+	if(doremove){
+		vtfileunlock(r);
+		vtfileclose(r);
+	}
+
+	return 0;
+}
+
+int
+vtfileremove(VtFile *r)
+{
+	return vtfilekill(r, 1);
+}
+
+int
+vtfiletruncate(VtFile *r)
+{
+	return vtfilekill(r, 0);
+}
+
+uvlong
+vtfilegetsize(VtFile *r)
+{
+	VtEntry e;
+	VtBlock *b;
+
+	assert(ISLOCKED(r));
+	b = fileload(r, &e);
+	if(b == nil)
+		return ~(uvlong)0;
+	vtblockput(b);
+
+	return e.size;
+}
+
+static int
+shrinksize(VtFile *r, VtEntry *e, uvlong size)
+{
+	int i, depth, type, isdir, ppb;
+	uvlong ptrsz;
+	uchar score[VtScoreSize];
+	VtBlock *b;
+
+	b = vtcacheglobal(r->c, e->score, e->type);
+	if(b == nil)
+		return -1;
+
+	ptrsz = e->dsize;
+	ppb = e->psize/VtScoreSize;
+	type = e->type;
+	depth = DEPTH(type);
+	for(i=0; i+1<depth; i++)
+		ptrsz *= ppb;
+
+	isdir = r->dir;
+	while(depth > 0){
+		if(b->addr == NilBlock){
+			/* not worth copying the block just so we can zero some of it */
+			vtblockput(b);
+			return -1;
+		}
+
+		/*
+		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
+		 */
+
+		/* zero the pointers to unnecessary blocks */
+		i = (size+ptrsz-1)/ptrsz;
+		for(; i<ppb; i++)
+			memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
+
+		/* recurse (go around again) on the partially necessary block */
+		i = size/ptrsz;
+		size = size%ptrsz;
+		if(size == 0){
+			vtblockput(b);
+			return 0;
+		}
+		ptrsz /= ppb;
+		type--;
+		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
+		vtblockput(b);
+		b = vtcacheglobal(r->c, score, type);
+		if(b == nil)
+			return -1;
+	}
+
+	if(b->addr == NilBlock){
+		vtblockput(b);
+		return -1;
+	}
+
+	/*
+	 * No one ever truncates BtDir blocks.
+	 */
+	if(depth==0 && !isdir && e->dsize > size)
+		memset(b->data+size, 0, e->dsize-size);
+	vtblockput(b);
+	return 0;
+}
+
+int
+vtfilesetsize(VtFile *r, uvlong size)
+{
+	int depth, edepth;
+	VtEntry e;
+	VtBlock *b;
+
+	assert(ISLOCKED(r));
+	if(size == 0)
+		return vtfiletruncate(r);
+
+	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
+		werrstr(ETooBig);
+		return -1;
+	}
+
+	b = fileload(r, &e);
+	if(b == nil)
+		return -1;
+
+	/* quick out */
+	if(e.size == size){
+		vtblockput(b);
+		return 0;
+	}
+
+	depth = sizetodepth(size, e.psize, e.dsize);
+	edepth = DEPTH(e.type);
+	if(depth < edepth){
+		if(shrinkdepth(r, b, &e, depth) < 0){
+			vtblockput(b);
+			return -1;
+		}
+	}else if(depth > edepth){
+		if(growdepth(r, b, &e, depth) < 0){
+			vtblockput(b);
+			return -1;
+		}
+	}
+
+	if(size < e.size)
+		shrinksize(r, &e, size);
+
+	e.size = size;
+	vtentrypack(&e, b->data, r->offset % r->epb);
+	vtblockput(b);
+
+	return 0;
+}
+
+int
+vtfilesetdirsize(VtFile *r, u32int ds)
+{
+	uvlong size;
+	int epb;
+
+	assert(ISLOCKED(r));
+	epb = r->dsize/VtEntrySize;
+
+	size = (uvlong)r->dsize*(ds/epb);
+	size += VtEntrySize*(ds%epb);
+	return vtfilesetsize(r, size);
+}
+
+u32int
+vtfilegetdirsize(VtFile *r)
+{
+	ulong ds;
+	uvlong size;
+	int epb;
+
+	assert(ISLOCKED(r));
+	epb = r->dsize/VtEntrySize;
+
+	size = vtfilegetsize(r);
+	ds = epb*(size/r->dsize);
+	ds += (size%r->dsize)/VtEntrySize;
+	return ds;
+}
+
+int
+vtfilegetentry(VtFile *r, VtEntry *e)
+{
+	VtBlock *b;
+
+	assert(ISLOCKED(r));
+	b = fileload(r, e);
+	if(b == nil)
+		return -1;
+	vtblockput(b);
+
+	return 0;
+}
+
+int
+vtfilesetentry(VtFile *r, VtEntry *e)
+{
+	VtBlock *b;
+	VtEntry ee;
+
+	assert(ISLOCKED(r));
+	b = fileload(r, &ee);
+	if(b == nil)
+		return -1;
+	vtentrypack(e, b->data, r->offset % r->epb);
+	vtblockput(b);
+	return 0;
+}
+
+static VtBlock *
+blockwalk(VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
+{
+	VtBlock *b;
+	int type;
+	uchar *score;
+	VtEntry oe;
+
+	switch(p->type){
+	case VtDataType:
+		assert(0);
+	case VtDirType:
+		type = e->type;
+		score = e->score;
+		break;
+	default:
+		type = p->type - 1;
+		score = p->data+index*VtScoreSize;
+		break;
+	}
+//print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type);
+
+	if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
+		b = vtcacheallocblock(c, type);
+		if(b)
+			goto HaveCopy;
+	}else
+		b = vtcacheglobal(c, score, type);
+
+	if(b == nil || mode == VtOREAD)
+		return b;
+
+	if(vtglobaltolocal(b->score) != NilBlock)
+		return b;
+
+	oe = *e;
+
+	/*
+	 * Copy on write.
+	 */
+	e->flags |= VtEntryLocal;
+
+	b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
+	if(b == nil)
+		return nil;
+
+HaveCopy:
+	if(p->type == VtDirType){
+		memmove(e->score, b->score, VtScoreSize);
+		vtentrypack(e, p->data, index);
+	}else{
+		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
+	}
+	return b;
+}
+
+/*
+ * Change the depth of the VtFile r.
+ * The entry e for r is contained in block p.
+ */
+static int
+growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
+{
+	VtBlock *b, *bb;
+	VtEntry oe;
+
+	assert(ISLOCKED(r));
+	assert(depth <= VtPointerDepth);
+
+	b = vtcacheglobal(r->c, e->score, e->type);
+	if(b == nil)
+		return -1;
+
+	oe = *e;
+
+	/*
+	 * Keep adding layers until we get to the right depth
+	 * or an error occurs.
+	 */
+	while(DEPTH(e->type) < depth){
+		bb = vtcacheallocblock(r->c, e->type+1);
+		if(bb == nil)
+			break;
+		memmove(bb->data, b->score, VtScoreSize);
+		memmove(e->score, bb->score, VtScoreSize);	
+		e->type++;
+		e->flags |= VtEntryLocal;
+		vtblockput(b);
+		b = bb;
+	}
+
+	vtentrypack(e, p->data, r->offset % r->epb);
+	vtblockput(b);
+
+	if(DEPTH(e->type) == depth)
+		return 0;
+	return -1;
+}
+
+static int
+shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
+{
+	VtBlock *b, *nb, *ob, *rb;
+	VtEntry oe;
+
+	assert(ISLOCKED(r));
+	assert(depth <= VtPointerDepth);
+
+	rb = vtcacheglobal(r->c, e->score, e->type);
+	if(rb == nil)
+		return 0;
+
+	/*
+	 * Walk down to the new root block.
+	 * We may stop early, but something is better than nothing.
+	 */
+	oe = *e;
+
+	ob = nil;
+	b = rb;
+	for(; DEPTH(e->type) > depth; e->type--){
+		nb = vtcacheglobal(r->c, b->data, e->type-1);
+		if(nb == nil)
+			break;
+		if(ob!=nil && ob!=rb)
+			vtblockput(ob);
+		ob = b;
+		b = nb;
+	}
+
+	if(b == rb){
+		vtblockput(rb);
+		return 0;
+	}
+
+	/*
+	 * Right now, e points at the root block rb, b is the new root block,
+	 * and ob points at b.  To update:
+	 *
+	 *	(i) change e to point at b
+	 *	(ii) zero the pointer ob -> b
+	 *	(iii) free the root block
+	 *
+	 * p (the block containing e) must be written before
+	 * anything else.
+ 	 */
+
+	/* (i) */
+	memmove(e->score, b->score, VtScoreSize);
+	vtentrypack(e, p->data, r->offset % r->epb);
+
+	/* (ii) */
+	memmove(ob->data, vtzeroscore, VtScoreSize);
+
+	/* (iii) */
+	vtblockput(rb);
+	if(ob!=nil && ob!=rb)
+		vtblockput(ob);
+	vtblockput(b);
+
+	if(DEPTH(e->type) == depth)
+		return 0;
+	return -1;
+}
+
+static int
+mkindices(VtEntry *e, u32int bn, int *index)
+{
+	int i, np;
+
+	memset(index, 0, VtPointerDepth*sizeof(int));
+
+	np = e->psize/VtScoreSize;
+	for(i=0; bn > 0; i++){
+		if(i >= VtPointerDepth){
+			werrstr(EBadAddr);
+			return -1;
+		}
+		index[i] = bn % np;
+		bn /= np;
+	}
+	return i;
+}
+	
+VtBlock *
+vtfileblock(VtFile *r, u32int bn, int mode)
+{
+	VtBlock *b, *bb;
+	int index[VtPointerDepth+1];
+	VtEntry e;
+	int i;
+	int m;
+
+	assert(ISLOCKED(r));
+	assert(bn != NilBlock);
+
+	b = fileload(r, &e);
+	if(b == nil)
+		return nil;
+
+	i = mkindices(&e, bn, index);
+	if(i < 0)
+		return nil;
+	if(i > DEPTH(e.type)){
+		if(mode == VtOREAD){
+			werrstr(EBadAddr);
+			goto Err;
+		}
+		index[i] = 0;
+		if(growdepth(r, b, &e, i) < 0)
+			goto Err;
+	}
+
+assert(b->type == VtDirType);
+
+	index[DEPTH(e.type)] = r->offset % r->epb;
+
+	/* mode for intermediate block */
+	m = mode;
+	if(m == VtOWRITE)
+		m = VtORDWR;
+
+	for(i=DEPTH(e.type); i>=0; i--){
+		bb = blockwalk(b, index[i], r->c, i==0 ? mode : m, &e);
+		if(bb == nil)
+			goto Err;
+		vtblockput(b);
+		b = bb;
+	}
+	return b;
+Err:
+	vtblockput(b);
+	return nil;
+}
+
+int
+vtfileblockhash(VtFile *r, u32int bn, uchar score[VtScoreSize])
+{
+	VtBlock *b, *bb;
+	int index[VtPointerDepth+1];
+	VtEntry e;
+	int i;
+
+	assert(ISLOCKED(r));
+	assert(bn != NilBlock);
+
+	b = fileload(r, &e);
+	if(b == nil)
+		return -1;
+
+	i = mkindices(&e, bn, index);
+	if(i < 0){
+		vtblockput(b);
+		return -1;
+	}
+	if(i > DEPTH(e.type)){
+		memmove(score, vtzeroscore, VtScoreSize);
+		vtblockput(b);
+		return 0;
+	}
+
+	index[DEPTH(e.type)] = r->offset % r->epb;
+
+	for(i=DEPTH(e.type); i>=1; i--){
+		bb = blockwalk(b, index[i], r->c, VtOREAD, &e);
+		if(bb == nil)
+			goto Err;
+		vtblockput(b);
+		b = bb;
+		if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
+			break;
+	}
+
+	memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
+	vtblockput(b);
+	return 0;
+
+Err:
+fprint(2, "vtfileblockhash: %r\n");
+	vtblockput(b);
+	return -1;
+}
+
+void
+vtfileincref(VtFile *r)
+{
+	qlock(&r->lk);
+	r->ref++;
+	qunlock(&r->lk);
+}
+
+void
+vtfileclose(VtFile *r)
+{
+	if(r == nil)
+		return;
+	qlock(&r->lk);
+	r->ref--;
+	if(r->ref){
+		qunlock(&r->lk);
+		return;
+	}
+	assert(r->ref == 0);
+	qunlock(&r->lk);
+	if(r->parent)
+		vtfileclose(r->parent);
+	memset(r, ~0, sizeof(*r));
+	vtfree(r);
+}
+
+/*
+ * Retrieve the block containing the entry for r.
+ * If a snapshot has happened, we might need
+ * to get a new copy of the block.  We avoid this
+ * in the common case by caching the score for
+ * the block and the last epoch in which it was valid.
+ *
+ * We use r->mode to tell the difference between active
+ * file system VtFiles (VtORDWR) and VtFiles for the
+ * snapshot file system (VtOREAD).
+ */
+static VtBlock*
+fileloadblock(VtFile *r, int mode)
+{
+	char e[ERRMAX];
+	u32int addr;
+	VtBlock *b;
+
+	switch(r->mode){
+	default:
+		assert(0);
+	case VtORDWR:
+		assert(r->mode == VtORDWR);
+		if(r->local == 1){
+			b = vtcacheglobal(r->c, r->score, VtDirType);
+			if(b == nil)
+				return nil;
+			return b;
+		}
+		assert(r->parent != nil);
+		if(vtfilelock(r->parent, VtORDWR) < 0)
+			return nil;
+		b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
+		vtfileunlock(r->parent);
+		if(b == nil)
+			return nil;
+		memmove(r->score, b->score, VtScoreSize);
+		r->local = 1;
+		return b;
+
+	case VtOREAD:
+		if(mode == VtORDWR){
+			werrstr("read/write lock of read-only file");
+			return nil;
+		}
+		addr = vtglobaltolocal(r->score);
+		if(addr == NilBlock)
+			return vtcacheglobal(r->c, r->score, VtDirType);
+
+		b = vtcachelocal(r->c, addr, VtDirType);
+		if(b)
+			return b;
+
+		/*
+		 * If it failed because the epochs don't match, the block has been
+		 * archived and reclaimed.  Rewalk from the parent and get the
+		 * new pointer.  This can't happen in the VtORDWR case
+		 * above because blocks in the current epoch don't get
+		 * reclaimed.  The fact that we're VtOREAD means we're
+		 * a snapshot.  (Or else the file system is read-only, but then
+		 * the archiver isn't going around deleting blocks.)
+		 */
+		rerrstr(e, sizeof e);
+		if(strcmp(e, ELabelMismatch) == 0){
+			if(vtfilelock(r->parent, VtOREAD) < 0)
+				return nil;
+			b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
+			vtfileunlock(r->parent);
+			if(b){
+				fprint(2, "vtfilealloc: lost %V found %V\n",
+					r->score, b->score);
+				memmove(r->score, b->score, VtScoreSize);
+				return b;
+			}
+		}
+		return nil;
+	}
+}
+
+int
+vtfilelock(VtFile *r, int mode)
+{
+	VtBlock *b;
+
+	if(mode == -1)
+		mode = r->mode;
+
+	b = fileloadblock(r, mode);
+	if(b == nil)
+		return -1;
+	/*
+	 * The fact that we are holding b serves as the
+	 * lock entitling us to write to r->b.
+	 */
+	assert(r->b == nil);
+	r->b = b;
+	return 0;
+}
+
+/*
+ * Lock two (usually sibling) VtFiles.  This needs special care
+ * because the Entries for both vtFiles might be in the same block.
+ * We also try to lock blocks in left-to-right order within the tree.
+ */
+int
+vtfilelock2(VtFile *r, VtFile *rr, int mode)
+{
+	VtBlock *b, *bb;
+
+	if(rr == nil)
+		return vtfilelock(r, mode);
+
+	if(mode == -1)
+		mode = r->mode;
+
+	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
+		b = fileloadblock(r, mode);
+		if(b == nil)
+			return -1;
+		vtblockduplock(b);
+		bb = b;
+	}else if(r->parent==rr->parent || r->offset > rr->offset){
+		bb = fileloadblock(rr, mode);
+		b = fileloadblock(r, mode);
+	}else{
+		b = fileloadblock(r, mode);
+		bb = fileloadblock(rr, mode);
+	}
+	if(b == nil || bb == nil){
+		if(b)
+			vtblockput(b);
+		if(bb)
+			vtblockput(bb);
+		return -1;
+	}
+
+	/*
+	 * The fact that we are holding b and bb serves
+	 * as the lock entitling us to write to r->b and rr->b.
+	 */
+	r->b = b;
+	rr->b = bb;
+	return 0;
+}
+
+void
+vtfileunlock(VtFile *r)
+{
+	VtBlock *b;
+
+	if(r->b == nil){
+		fprint(2, "vtfileunlock: already unlocked\n");
+		abort();
+	}
+	b = r->b;
+	r->b = nil;
+	vtblockput(b);
+}
+
+static VtBlock*
+fileload(VtFile *r, VtEntry *e)
+{
+	VtBlock *b;
+
+	assert(ISLOCKED(r));
+	b = r->b;
+	if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
+		return nil;
+	vtblockduplock(b);
+	return b;
+}
+
+static int
+sizetodepth(uvlong s, int psize, int dsize)
+{
+	int np;
+	int d;
+	
+	/* determine pointer depth */
+	np = psize/VtScoreSize;
+	s = (s + dsize - 1)/dsize;
+	for(d = 0; s > 1; d++)
+		s = (s + np - 1)/np;
+	return d;
+}
+
+long
+vtfileread(VtFile *f, void *data, long count, vlong offset)
+{
+	int frag;
+	VtBlock *b;
+	VtEntry e;
+
+	assert(ISLOCKED(f));
+
+	vtfilegetentry(f, &e);
+	if(count == 0)
+		return 0;
+	if(count < 0 || offset < 0){
+		werrstr("vtfileread: bad offset or count");
+		return -1;
+	}
+	if(offset >= e.size)
+		return 0;
+
+	if(offset+count > e.size)
+		count = e.size - offset;
+
+	frag = offset % e.dsize;
+	if(frag+count > e.dsize)
+		count = e.dsize - frag;
+
+	b = vtfileblock(f, offset/e.dsize, VtOREAD);
+	if(b == nil)
+		return -1;
+
+	memmove(data, b->data+frag, count);
+	vtblockput(b);
+	return count;
+}
+
+static long
+filewrite1(VtFile *f, void *data, long count, vlong offset)
+{
+	int frag, m;
+	VtBlock *b;
+	VtEntry e;
+
+	vtfilegetentry(f, &e);
+	if(count < 0 || offset < 0){
+		werrstr("vtfilewrite: bad offset or count");
+		return -1;
+	}
+
+	frag = offset % e.dsize;
+	if(frag+count > e.dsize)
+		count = e.dsize - frag;
+
+	m = VtORDWR;
+	if(frag == 0 && count == e.dsize)
+		m = VtOWRITE;
+
+	b = vtfileblock(f, offset/e.dsize, m);
+	if(b == nil)
+		return -1;
+
+	memmove(b->data+frag, data, count);
+
+	if(offset+count > e.size){
+		vtfilegetentry(f, &e);
+		e.size = offset+count;
+		vtfilesetentry(f, &e);
+	}
+
+	vtblockput(b);
+	return count;
+}
+
+long
+vtfilewrite(VtFile *f, void *data, long count, vlong offset)
+{
+	long tot, m;
+
+	assert(ISLOCKED(f));
+
+	tot = 0;
+	m = 0;
+	while(tot < count){
+		m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
+		if(m <= 0)
+			break;
+		tot += m;
+	}
+	if(tot==0)
+		return m;
+	return tot;
+}
+
+static int
+flushblock(VtCache *c, VtBlock *bb, uchar score[VtScoreSize], int ppb, int epb,
+	int type)
+{
+	u32int addr;
+	VtBlock *b;
+	VtEntry e;
+	int i;
+
+	addr = vtglobaltolocal(score);
+	if(addr == NilBlock)
+		return 0;
+
+	if(bb){
+		b = bb;
+		if(memcmp(b->score, score, VtScoreSize) != 0)
+			abort();
+	}else
+		if((b = vtcachelocal(c, addr, type)) == nil)
+			return -1;
+
+	switch(type){
+	case VtDataType:
+		break;
+
+	case VtDirType:
+		for(i=0; i<epb; i++){
+			if(vtentryunpack(&e, b->data, i) < 0)
+				goto Err;
+			if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
+				e.type) < 0)
+				goto Err;
+		}
+		break;
+	
+	default:	/* VtPointerTypeX */
+		for(i=0; i<ppb; i++){
+			if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
+				goto Err;
+		}
+		break;
+	}
+
+	if(vtblockwrite(b) < 0)
+		goto Err;
+	memmove(score, b->score, VtScoreSize);
+	if(b != bb)
+		vtblockput(b);
+	return 0;
+
+Err:
+	if(b != bb)
+		vtblockput(b);
+	return -1;
+}
+
+int
+vtfileflush(VtFile *f)
+{
+	int ret;
+	VtBlock *b;
+	VtEntry e;
+
+	assert(ISLOCKED(f));
+	b = fileload(f, &e);
+	if(!(e.flags&VtEntryLocal)){
+		vtblockput(b);
+		return 0;
+	}
+
+	ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
+		e.type);
+	if(!ret){
+		vtblockput(b);
+		return -1;
+	}
+
+	vtentrypack(&e, b->data, f->offset % f->epb);
+	vtblockput(b);
+	return 0;
+}
+
+int
+vtfileflushbefore(VtFile *r, u64int offset)
+{
+	VtBlock *b, *bb;
+	VtEntry e;
+	int i, base, depth, ppb, epb, ok;
+	int index[VtPointerDepth+1], index1[VtPointerDepth+1], j, ret;
+	VtBlock *bi[VtPointerDepth+2];
+	uchar *score;
+
+	assert(ISLOCKED(r));
+	if(offset == 0)
+		return 0;
+
+	b = fileload(r, &e);
+	if(b == nil)
+		return -1;
+
+	ret = -1;
+	memset(bi, 0, sizeof bi);
+	depth = DEPTH(e.type);
+	bi[depth+1] = b;
+	i = mkindices(&e, (offset-1)/e.dsize, index);
+	if(i < 0)
+		goto Err;
+	if(i > depth)
+		goto Err;
+	mkindices(&e, offset/e.dsize, index1);
+	ppb = e.psize / VtScoreSize;
+	epb = e.dsize / VtEntrySize;
+
+	index[depth] = r->offset % r->epb;
+	for(i=depth; i>=0; i--){
+		bb = blockwalk(b, index[i], r->c, VtORDWR, &e);
+		if(bb == nil)
+			goto Err;
+		bi[i] = bb;
+		b = bb;
+	}
+	ret = 0;
+
+	base = e.type&~VtTypeDepthMask;
+	for(i=0; i<depth; i++){
+		if(i == 0){
+			/* bottom: data or dir block */
+			ok = offset%e.dsize == 0;
+		}else{
+			/* middle: pointer blocks */
+			b = bi[i];
+			/*
+			 * flush everything up to the break
+			 */
+			for(j=0; j<index[i-1]; j++)
+				if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
+					goto Err;
+			/*
+			 * if the rest of the block is already flushed,
+			 * we can flush the whole block.
+			 */
+			ok = 1;
+			for(; j<ppb; j++)
+				if(vtglobaltolocal(b->data+j*VtScoreSize) != NilBlock)
+					ok = 0;
+		}
+		if(ok){
+			if(i == depth)
+				score = e.score;
+			else
+				score = bi[i+1]->data+index[i]*VtScoreSize;
+			if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
+				goto Err;
+		}
+	}
+
+Err:
+	/* top: entry.  do this always so that the score is up-to-date */
+	vtentrypack(&e, bi[depth+1]->data, index[depth]);
+	for(i=0; i<nelem(bi); i++)
+		if(bi[i])
+			vtblockput(bi[i]);
+	return ret;
+}
diff --git a/src/libventi/hangup.c b/src/libventi/hangup.c
@@ -0,0 +1,22 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include "queue.h"
+
+void
+vthangup(VtConn *z)
+{
+	qlock(&z->lk);
+	z->state = VtStateClosed;
+	if(z->infd >= 0)
+		close(z->infd);
+	if(z->outfd >= 0 && z->outfd != z->infd)
+		close(z->outfd);
+	z->infd = -1;
+	z->outfd = -1;
+	if(z->writeq)
+		_vtqhangup(z->writeq);
+	if(z->readq)
+		_vtqhangup(z->readq);
+	qunlock(&z->lk);
+}
diff --git a/src/libventi/mem.c b/src/libventi/mem.c
@@ -0,0 +1,87 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+enum {
+	IdealAlignment = 32,
+	ChunkSize 	= 128*1024,
+};
+
+
+void
+vtfree(void *p)
+{
+	if(p == 0)
+		return;
+	free(p);
+}
+
+void *
+vtmalloc(int size)
+{
+	void *p;
+
+	p = malloc(size);
+	if(p == 0)
+		sysfatal("vtmalloc: out of memory");
+	setmalloctag(p, getcallerpc(&size));
+	return p;
+}
+
+void *
+vtmallocz(int size)
+{
+	void *p = vtmalloc(size);
+	memset(p, 0, size);
+	setmalloctag(p, getcallerpc(&size));
+	return p;
+}
+
+void *
+vtrealloc(void *p, int size)
+{
+	if(p == nil)
+		return vtmalloc(size);
+	p = realloc(p, size);
+	if(p == 0)
+		sysfatal("vtMemRealloc: out of memory");
+	setrealloctag(p, getcallerpc(&size));
+	return p;
+}
+
+void *
+vtbrk(int n)
+{
+	static Lock lk;
+	static uchar *buf;
+	static int nbuf;
+	static int nchunk;
+	int align, pad;
+	void *p;
+
+	if(n >= IdealAlignment)
+		align = IdealAlignment;
+	else if(n > 8)
+		align = 8;
+	else	
+		align = 4;
+
+	lock(&lk);
+	pad = (align - (ulong)buf) & (align-1);
+	if(n + pad > nbuf) {
+		buf = vtmallocz(ChunkSize);
+		nbuf = ChunkSize;
+		pad = (align - (ulong)buf) & (align-1);
+		nchunk++;
+	}
+
+	assert(n + pad <= nbuf);	
+	
+	p = buf + pad;
+	buf += pad + n;
+	nbuf -= pad + n;
+	unlock(&lk);
+
+	return p;
+}
+
diff --git a/src/libventi/mkfile b/src/libventi/mkfile
@@ -0,0 +1,42 @@
+PLAN9=../..
+<$PLAN9/src/mkhdr
+
+LIB=libventi.a
+
+OFILES=\
+	cache.$O\
+	client.$O\
+	conn.$O\
+	dial.$O\
+	debug.$O\
+	dtype.$O\
+	entry.$O\
+	fcall.$O\
+	fcallfmt.$O\
+	file.$O\
+	hangup.$O\
+	mem.$O\
+	packet.$O\
+	queue.$O\
+	root.$O\
+	rpc.$O\
+	scorefmt.$O\
+	send.$O\
+	server.$O\
+	srvhello.$O\
+	strdup.$O\
+	string.$O\
+	version.$O\
+	zero.$O\
+	zeroscore.$O\
+
+HFILES=\
+	$PLAN9/include/venti.h\
+
+<$PLAN9/src/mksyslib
+
+send.$O: queue.h
+server.$O: queue.h
+queue.$O: queue.h
+
+
diff --git a/src/libventi/packet.c b/src/libventi/packet.c
@@ -0,0 +1,941 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+
+typedef struct Mem Mem;
+typedef struct Frag Frag;
+
+enum {
+	BigMemSize = MaxFragSize,
+	SmallMemSize = BigMemSize/8,
+	NLocalFrag = 2,
+};
+
+/* position to carve out of a Mem */
+enum {
+	PFront,
+	PMiddle,
+	PEnd,
+};
+
+struct Mem
+{
+	Lock lk;
+	int ref;
+	uchar *bp;
+	uchar *ep;
+	uchar *rp;
+	uchar *wp;
+	Mem *next;
+};
+
+enum {
+	FragLocalFree,
+	FragLocalAlloc,
+	FragGlobal,
+};
+	
+struct Frag
+{
+	int state;
+	Mem *mem;
+	uchar *rp;
+	uchar *wp;
+	Frag *next;
+	void (*free)(void*);
+	void *a;
+};
+
+struct Packet
+{
+	int size;
+	int asize;  /* allocated memory - greater than size unless foreign frags */
+	
+	Packet *next;
+	
+	Frag *first;
+	Frag *last;
+	
+	Frag local[NLocalFrag];
+};
+
+static Frag *fragalloc(Packet*, int n, int pos, Frag *next);
+static Frag *fragdup(Packet*, Frag*);
+static void fragfree(Frag*);
+
+static Mem *memalloc(int, int);
+static void memfree(Mem*);
+static int memhead(Mem *m, uchar *rp, int n);
+static int memtail(Mem *m, uchar *wp, int n);
+
+static char EPacketSize[] = "bad packet size";
+static char EPacketOffset[] = "bad packet offset";
+static char EBadSize[] = "bad size";
+
+static struct {
+	Lock lk;
+	Packet *packet;
+	int npacket;
+	Frag *frag;
+	int nfrag;
+	Mem *bigmem;
+	int nbigmem;
+	Mem *smallmem;
+	int nsmallmem;
+} freelist;
+
+#define FRAGSIZE(f) ((f)->wp - (f)->rp)
+#define FRAGASIZE(f) ((f)->mem ? (f)->mem->ep - (f)->mem->bp : 0)
+
+#define NOTFREE(p) assert((p)->size>=0)
+
+Packet *
+packetalloc(void)
+{
+	Packet *p;
+
+	lock(&freelist.lk);
+	p = freelist.packet;
+	if(p != nil)
+		freelist.packet = p->next;
+	else
+		freelist.npacket++;
+	unlock(&freelist.lk);
+
+	if(p == nil)
+		p = vtbrk(sizeof(Packet));
+	else
+		assert(p->size == -1);
+	p->size = 0;
+	p->asize = 0;
+	p->first = nil;
+	p->last = nil;
+	p->next = nil;
+
+if(1)fprint(2, "packetalloc %p from %08lux %08lux %08lux\n", p, *((uint*)&p+2), *((uint*)&p+3), *((uint*)&p+4));
+
+	return p;
+}
+
+void
+packetfree(Packet *p)
+{
+	Frag *f, *ff;
+
+if(1)fprint(2, "packetfree %p from %08lux\n", p, getcallerpc(&p));
+
+	if(p == nil)
+		return;
+
+	NOTFREE(p);
+	p->size = -1;
+
+	for(f=p->first; f!=nil; f=ff) {
+		ff = f->next;
+		fragfree(f);
+	}
+	p->first = nil;
+	p->last = nil;
+
+	lock(&freelist.lk);
+	p->next = freelist.packet;
+	freelist.packet = p;
+	unlock(&freelist.lk);
+}
+
+Packet *
+packetdup(Packet *p, int offset, int n)
+{	
+	Frag *f, *ff;
+	Packet *pp;
+
+	NOTFREE(p);
+	if(offset < 0 || n < 0 || offset+n > p->size) {
+		werrstr(EBadSize);
+		return nil;
+	}
+
+	pp = packetalloc();
+	if(n == 0)
+		return pp;
+
+	pp->size = n;
+
+	/* skip offset */
+	for(f=p->first; offset >= FRAGSIZE(f); f=f->next)
+		offset -= FRAGSIZE(f);
+	
+	/* first frag */
+	ff = fragdup(pp, f);
+	ff->rp += offset;
+	pp->first = ff;
+	n -= FRAGSIZE(ff);
+	pp->asize += FRAGASIZE(ff);
+
+	/* the remaining */
+	while(n > 0) {
+		f = f->next;
+		ff->next = fragdup(pp, f);
+		ff = ff->next;
+		n -= FRAGSIZE(ff);
+		pp->asize += FRAGASIZE(ff);
+	}
+	
+	/* fix up last frag: note n <= 0 */
+	ff->wp += n;
+	ff->next = nil;
+	pp->last = ff;
+
+	return pp;
+}
+
+Packet *
+packetsplit(Packet *p, int n)
+{
+	Packet *pp;
+	Frag *f, *ff;
+
+	NOTFREE(p);
+	if(n < 0 || n > p->size) {
+		werrstr(EPacketSize);
+		return nil;
+	}
+
+	pp = packetalloc();
+	if(n == 0)
+		return pp;
+
+	pp->size = n;
+	p->size -= n;
+	ff = nil;
+	for(f=p->first; n > 0 && n >= FRAGSIZE(f); f=f->next) {
+		n -= FRAGSIZE(f);
+		p->asize -= FRAGASIZE(f);
+		pp->asize += FRAGASIZE(f);
+		ff = f;
+	}
+
+	/* split shared frag */
+	if(n > 0) {
+		ff = f;
+		f = fragdup(pp, ff);
+		pp->asize += FRAGASIZE(ff);
+		ff->next = nil;
+		ff->wp = ff->rp + n;
+		f->rp += n;
+	}
+
+	pp->first = p->first;
+	pp->last = ff;
+	p->first = f;
+	return pp;
+}
+
+int
+packetconsume(Packet *p, uchar *buf, int n)
+{
+	NOTFREE(p);
+	if(buf && packetcopy(p, buf, 0, n) < 0)
+		return 0;
+	return packettrim(p, n, p->size-n);
+}
+
+int
+packettrim(Packet *p, int offset, int n)
+{
+	Frag *f, *ff;
+
+	NOTFREE(p);
+	if(offset < 0 || offset > p->size) {
+		werrstr(EPacketOffset);
+		return -1;
+	}
+
+	if(n < 0 || offset + n > p->size) {
+		werrstr(EPacketOffset);
+		return -1;
+	}
+
+	p->size = n;
+
+	/* easy case */
+	if(n == 0) {
+		for(f=p->first; f != nil; f=ff) {
+			ff = f->next;
+			fragfree(f);
+		}
+		p->first = p->last = nil;
+		p->asize = 0;
+		return 0;
+	}
+	
+	/* free before offset */
+	for(f=p->first; offset >= FRAGSIZE(f); f=ff) {
+		p->asize -= FRAGASIZE(f);
+		offset -= FRAGSIZE(f);
+		ff = f->next;
+		fragfree(f);
+	}
+
+	/* adjust frag */
+	f->rp += offset;
+	p->first = f;
+
+	/* skip middle */
+	for(; n > 0 && n > FRAGSIZE(f); f=f->next)
+		n -= FRAGSIZE(f);
+
+	/* adjust end */
+	f->wp = f->rp + n;
+	p->last = f;
+	ff = f->next;
+	f->next = nil;
+
+	/* free after */
+	for(f=ff; f != nil; f=ff) {
+		p->asize -= FRAGASIZE(f);
+		ff = f->next;
+		fragfree(f);
+	}
+	return 0;
+}
+
+uchar *
+packetheader(Packet *p, int n)
+{
+	Frag *f;
+	Mem *m;
+
+	NOTFREE(p);
+	if(n <= 0 || n > MaxFragSize) {
+		werrstr(EPacketSize);
+		return nil;
+	}
+
+	p->size += n;
+	
+	/* try and fix in current frag */
+	f = p->first;
+	if(f != nil) {
+		m = f->mem;
+		if(n <= f->rp - m->bp)
+		if(m->ref == 1 || memhead(m, f->rp, n) >= 0) {
+			f->rp -= n;
+			return f->rp;
+		}
+	}
+
+	/* add frag to front */
+	f = fragalloc(p, n, PEnd, p->first);
+	p->asize += FRAGASIZE(f);
+	if(p->first == nil)
+		p->last = f;
+	p->first = f;
+	return f->rp;
+}
+
+uchar *
+packettrailer(Packet *p, int n)
+{
+	Mem *m;
+	Frag *f;
+
+	NOTFREE(p);
+	if(n <= 0 || n > MaxFragSize) {
+		werrstr(EPacketSize);
+		return nil;
+	}
+
+	p->size += n;
+	
+	/* try and fix in current frag */
+	if(p->first != nil) {
+		f = p->last;
+		m = f->mem;
+		if(n <= m->ep - f->wp)
+		if(m->ref == 1 || memtail(m, f->wp, n) >= 0) {
+			f->wp += n;
+			return f->wp - n;
+		}
+	}
+
+	/* add frag to end */
+	f = fragalloc(p, n, (p->first == nil)?PMiddle:PFront, nil);
+	p->asize += FRAGASIZE(f);
+	if(p->first == nil)
+		p->first = f;
+	else
+		p->last->next = f;
+	p->last = f;
+	return f->rp;
+}
+
+int
+packetprefix(Packet *p, uchar *buf, int n)
+{
+	Frag *f;
+	int nn;
+	Mem *m;
+
+	NOTFREE(p);
+	if(n <= 0)
+		return 0;
+
+	p->size += n;
+
+	/* try and fix in current frag */
+	f = p->first;
+	if(f != nil) {
+		m = f->mem;
+		nn = f->rp - m->bp;
+		if(nn > n)
+			nn = n;
+		if(m->ref == 1 || memhead(m, f->rp, nn) >= 0) {
+			f->rp -= nn;
+			n -= nn;
+			memmove(f->rp, buf+n, nn);
+		}
+	}
+
+	while(n > 0) {
+		nn = n;
+		if(nn > MaxFragSize)
+			nn = MaxFragSize;
+		f = fragalloc(p, nn, PEnd, p->first);	
+		p->asize += FRAGASIZE(f);
+		if(p->first == nil)
+			p->last = f;
+		p->first = f;
+		n -= nn;
+		memmove(f->rp, buf+n, nn);
+	}
+	return 0;
+}
+
+int
+packetappend(Packet *p, uchar *buf, int n)
+{
+	Frag *f;
+	int nn;
+	Mem *m;
+
+	NOTFREE(p);
+	if(n <= 0)
+		return 0;
+
+	p->size += n;
+	/* try and fix in current frag */
+	if(p->first != nil) {
+		f = p->last;
+		m = f->mem;
+		nn = m->ep - f->wp;
+		if(nn > n)
+			nn = n;
+		if(m->ref == 1 || memtail(m, f->wp, nn) >= 0) {
+			memmove(f->wp, buf, nn);
+			f->wp += nn;
+			buf += nn;
+			n -= nn;
+		}
+	}
+	
+	while(n > 0) {
+		nn = n;
+		if(nn > MaxFragSize)
+			nn = MaxFragSize;
+		f = fragalloc(p, nn, (p->first == nil)?PMiddle:PFront, nil);
+		p->asize += FRAGASIZE(f);
+		if(p->first == nil)
+			p->first = f;
+		else
+			p->last->next = f;
+		p->last = f;
+		memmove(f->rp, buf, nn);
+		buf += nn;
+		n -= nn;
+	}
+	return 0;
+}
+
+int
+packetconcat(Packet *p, Packet *pp)
+{
+	NOTFREE(p);
+	NOTFREE(pp);
+	if(pp->size == 0)
+		return 0;
+	p->size += pp->size;
+	p->asize += pp->asize;
+
+	if(p->first != nil)
+		p->last->next = pp->first;
+	else
+		p->first = pp->first;
+	p->last = pp->last;
+	pp->size = 0;
+	pp->asize = 0;
+	pp->first = nil;
+	pp->last = nil;
+	return 0;
+}
+
+uchar *
+packetpeek(Packet *p, uchar *buf, int offset, int n)
+{
+	Frag *f;
+	int nn;
+	uchar *b;
+
+	NOTFREE(p);
+	if(n == 0)
+		return buf;
+
+	if(offset < 0 || offset >= p->size) {
+		werrstr(EPacketOffset);
+		return nil;
+	}
+
+	if(n < 0 || offset + n > p->size) {
+		werrstr(EPacketSize);
+		return nil;
+	}
+	
+	/* skip up to offset */
+	for(f=p->first; offset >= FRAGSIZE(f); f=f->next)
+		offset -= FRAGSIZE(f);
+
+	/* easy case */
+	if(offset + n <= FRAGSIZE(f))
+		return f->rp + offset;
+
+	for(b=buf; n>0; n -= nn) {
+		nn = FRAGSIZE(f) - offset;
+		if(nn > n)
+			nn = n;
+		memmove(b, f->rp+offset, nn);
+		offset = 0;
+		f = f->next;
+		b += nn;
+	}
+
+	return buf;
+}
+
+int
+packetcopy(Packet *p, uchar *buf, int offset, int n)
+{
+	uchar *b;
+
+	NOTFREE(p);
+	b = packetpeek(p, buf, offset, n);
+	if(b == nil)
+		return -1;
+	if(b != buf)
+		memmove(buf, b, n);
+	return 0;
+}
+
+int
+packetfragments(Packet *p, IOchunk *io, int nio, int offset)
+{
+	Frag *f;
+	int size;
+	IOchunk *eio;
+
+	NOTFREE(p);
+	if(p->size == 0 || nio <= 0)
+		return 0;
+	
+	if(offset < 0 || offset > p->size) {
+		werrstr(EPacketOffset);
+		return -1;
+	}
+
+	for(f=p->first; offset >= FRAGSIZE(f); f=f->next)
+		offset -= FRAGSIZE(f);
+
+	size = 0;
+	eio = io + nio;
+	for(; f != nil && io < eio; f=f->next) {
+		io->addr = f->rp + offset;
+		io->len = f->wp - (f->rp + offset);	
+		offset = 0;
+		size += io->len;
+		io++;
+	}
+
+	return size;
+}
+
+void
+packetstats(void)
+{
+	Packet *p;
+	Frag *f;
+	Mem *m;
+
+	int np, nf, nsm, nbm;
+
+	lock(&freelist.lk);
+	np = 0;
+	for(p=freelist.packet; p; p=p->next)
+		np++;
+	nf = 0;
+	for(f=freelist.frag; f; f=f->next)
+		nf++;
+	nsm = 0;
+	for(m=freelist.smallmem; m; m=m->next)
+		nsm++;
+	nbm = 0;
+	for(m=freelist.bigmem; m; m=m->next)
+		nbm++;
+	
+	fprint(2, "packet: %d/%d frag: %d/%d small mem: %d/%d big mem: %d/%d\n",
+		np, freelist.npacket,
+		nf, freelist.nfrag,
+		nsm, freelist.nsmallmem,
+		nbm, freelist.nbigmem);
+
+	unlock(&freelist.lk);
+}
+
+
+uint
+packetsize(Packet *p)
+{
+	NOTFREE(p);
+	if(0) {
+		Frag *f;
+		int size = 0;
+	
+		for(f=p->first; f; f=f->next)
+			size += FRAGSIZE(f);
+		if(size != p->size)
+			fprint(2, "packetsize %d %d\n", size, p->size);
+		assert(size == p->size);
+	}
+	return p->size;
+}
+
+uint
+packetasize(Packet *p)
+{
+	NOTFREE(p);
+	if(0) {
+		Frag *f;
+		int asize = 0;
+	
+		for(f=p->first; f; f=f->next)
+			asize += FRAGASIZE(f);
+		if(asize != p->asize)
+			fprint(2, "packetasize %d %d\n", asize, p->asize);
+		assert(asize == p->asize);
+	}
+	return p->asize;
+}
+
+void
+packetsha1(Packet *p, uchar digest[VtScoreSize])
+{
+	DigestState ds;
+	Frag *f;
+	int size;
+
+	NOTFREE(p);
+	memset(&ds, 0, sizeof ds);
+	size = p->size;
+	for(f=p->first; f; f=f->next) {
+		sha1(f->rp, FRAGSIZE(f), nil, &ds);
+		size -= FRAGSIZE(f);
+	}
+	assert(size == 0);
+	sha1(nil, 0, digest, &ds);
+}
+
+int
+packetcmp(Packet *pkt0, Packet *pkt1)
+{
+	Frag *f0, *f1;
+	int n0, n1, x;
+
+	NOTFREE(pkt0);
+	NOTFREE(pkt1);
+	f0 = pkt0->first;
+	f1 = pkt1->first;
+
+	if(f0 == nil)
+		return (f1 == nil)?0:-1;
+	if(f1 == nil)
+		return 1;
+	n0 = FRAGSIZE(f0);
+	n1 = FRAGSIZE(f1);
+
+	for(;;) {
+		if(n0 < n1) {
+			x = memcmp(f0->wp - n0, f1->wp - n1, n0);
+			if(x != 0)
+				return x;
+			n1 -= n0;
+			f0 = f0->next;
+			if(f0 == nil)
+				return -1;
+			n0 = FRAGSIZE(f0);
+		} else if (n0 > n1) {
+			x = memcmp(f0->wp - n0, f1->wp - n1, n1);
+			if(x != 0)
+				return x;
+			n0 -= n1;
+			f1 = f1->next;
+			if(f1 == nil)
+				return 1;
+			n1 = FRAGSIZE(f1);
+		} else { /* n0 == n1 */
+			x = memcmp(f0->wp - n0, f1->wp - n1, n0);
+			if(x != 0)
+				return x;
+			f0 = f0->next;
+			f1 = f1->next;
+			if(f0 == nil)
+				return (f1 == nil)?0:-1;
+			if(f1 == nil)
+				return 1;
+			n0 = FRAGSIZE(f0);
+			n1 = FRAGSIZE(f1);
+		}
+	}
+	return 0; /* for ken */
+}
+	
+
+static Frag *
+fragalloc(Packet *p, int n, int pos, Frag *next)
+{
+	Frag *f, *ef;
+	Mem *m;
+
+	/* look for local frag */
+	f = &p->local[0];
+	ef = &p->local[NLocalFrag];
+	for(;f<ef; f++) {
+		if(f->state == FragLocalFree) {
+			f->state = FragLocalAlloc;
+			goto Found;
+		}
+	}
+	lock(&freelist.lk);	
+	f = freelist.frag;
+	if(f != nil)
+		freelist.frag = f->next;
+	else
+		freelist.nfrag++;
+	unlock(&freelist.lk);
+
+	if(f == nil) {
+		f = vtbrk(sizeof(Frag));
+		f->state = FragGlobal;
+	}
+
+Found:
+	f->next = next;
+
+	if(n == 0){
+		f->mem = 0;
+		f->rp = 0;
+		f->wp = 0;
+		return f;
+	}
+
+	if(pos == PEnd && next == nil)
+		pos = PMiddle;
+	m = memalloc(n, pos);
+	f->mem = m;
+	f->rp = m->rp;
+	f->wp = m->wp;
+	return f;
+}
+
+Packet*
+packetforeign(uchar *buf, int n, void (*free)(void *a), void *a)
+{
+	Packet *p;
+	Frag *f;
+
+	p = packetalloc();
+	f = fragalloc(p, 0, 0, nil);
+	f->free = free;
+	f->a = a;
+	f->next = nil;
+	f->rp = buf;
+	f->wp = buf+n;
+
+	p->first = f;
+	p->size = n;
+	return p;
+}
+
+static Frag *
+fragdup(Packet *p, Frag *f)
+{
+	Frag *ff;
+	Mem *m;
+
+	m = f->mem;	
+
+	/*
+	 * m->rp && m->wp can be out of date when ref == 1
+	 * also, potentially reclaims space from previous frags
+	 */
+	if(m && m->ref == 1) {
+		m->rp = f->rp;
+		m->wp = f->wp;
+	}
+
+	ff = fragalloc(p, 0, 0, nil);
+	*ff = *f;
+	if(m){
+		lock(&m->lk);
+		m->ref++;
+		unlock(&m->lk);
+	}
+	return ff;
+}
+
+
+static void
+fragfree(Frag *f)
+{
+	if(f->mem == nil){
+		if(f->free)
+			(*f->free)(f->a);
+	}else{
+		memfree(f->mem);
+		f->mem = 0;
+	}
+
+	if(f->state == FragLocalAlloc) {
+		f->state = FragLocalFree;
+		return;
+	}
+
+	lock(&freelist.lk);
+	f->next = freelist.frag;
+	freelist.frag = f;
+	unlock(&freelist.lk);	
+}
+
+static Mem *
+memalloc(int n, int pos)
+{
+	Mem *m;
+	int nn;
+
+	if(n < 0 || n > MaxFragSize) {
+		werrstr(EPacketSize);
+		return 0;
+	}
+	if(n <= SmallMemSize) {
+		lock(&freelist.lk);
+		m = freelist.smallmem;
+		if(m != nil)
+			freelist.smallmem = m->next;
+		else
+			freelist.nsmallmem++;
+		unlock(&freelist.lk);
+		nn = SmallMemSize;
+	} else {
+		lock(&freelist.lk);
+		m = freelist.bigmem;
+		if(m != nil)
+			freelist.bigmem = m->next;
+		else
+			freelist.nbigmem++;
+		unlock(&freelist.lk);
+		nn = BigMemSize;
+	}
+
+	if(m == nil) {
+		m = vtbrk(sizeof(Mem));
+		m->bp = vtbrk(nn);
+		m->ep = m->bp + nn;
+	}
+	assert(m->ref == 0);	
+	m->ref = 1;
+
+	switch(pos) {
+	default:
+		assert(0);
+	case PFront:
+		m->rp = m->bp;
+		break;
+	case PMiddle:
+		/* leave a little bit at end */
+		m->rp = m->ep - n - 32;
+		break;
+	case PEnd:
+		m->rp = m->ep - n;
+		break; 
+	}
+	/* check we did not blow it */
+	if(m->rp < m->bp)
+		m->rp = m->bp;
+	m->wp = m->rp + n;
+	assert(m->rp >= m->bp && m->wp <= m->ep);
+	return m;
+}
+
+static void
+memfree(Mem *m)
+{
+	lock(&m->lk);
+	m->ref--;
+	if(m->ref > 0) {
+		unlock(&m->lk);
+		return;
+	}
+	unlock(&m->lk);
+	assert(m->ref == 0);
+
+	switch(m->ep - m->bp) {
+	default:
+		assert(0);
+	case SmallMemSize:
+		lock(&freelist.lk);
+		m->next = freelist.smallmem;
+		freelist.smallmem = m;
+		unlock(&freelist.lk);
+		break;
+	case BigMemSize:
+		lock(&freelist.lk);
+		m->next = freelist.bigmem;
+		freelist.bigmem = m;
+		unlock(&freelist.lk);
+		break;
+	}
+}
+
+static int
+memhead(Mem *m, uchar *rp, int n)
+{
+	lock(&m->lk);
+	if(m->rp != rp) {
+		unlock(&m->lk);
+		return -1;
+	}
+	m->rp -= n;
+	unlock(&m->lk);
+	return 0;
+}
+
+static int
+memtail(Mem *m, uchar *wp, int n)
+{
+	lock(&m->lk);
+	if(m->wp != wp) {
+		unlock(&m->lk);
+		return -1;
+	}
+	m->wp += n;
+	unlock(&m->lk);
+	return 0;
+}
diff --git a/src/libventi/queue.c b/src/libventi/queue.c
@@ -0,0 +1,103 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include "queue.h"
+
+typedef struct Qel Qel;
+struct Qel
+{
+	Qel *next;
+	void *p;
+};
+
+struct Queue
+{
+	int hungup;
+	QLock lk;
+	Rendez r;
+	Qel *head;
+	Qel *tail;
+};
+
+Queue*
+_vtqalloc(void)
+{
+	Queue *q;
+
+	q = vtmallocz(sizeof(Queue));
+	q->r.l = &q->lk;
+	return q;
+}
+
+int
+_vtqsend(Queue *q, void *p)
+{
+	Qel *e;
+
+	e = vtmalloc(sizeof(Qel));
+	qlock(&q->lk);
+	if(q->hungup){
+		werrstr("hungup queue");
+		qunlock(&q->lk);
+		return -1;
+	}
+	e->p = p;
+	e->next = nil;
+	if(q->head == nil)
+		q->head = e;
+	else
+		q->tail->next = e;
+	q->tail = e;
+	rwakeup(&q->r);
+	qunlock(&q->lk);
+	return 0;
+}
+
+void*
+_vtqrecv(Queue *q)
+{
+	void *p;
+	Qel *e;
+
+	qlock(&q->lk);
+	while(q->head == nil && !q->hungup)
+		rsleep(&q->r);
+	if(q->hungup){
+		qunlock(&q->lk);
+		return nil;
+	}
+	e = q->head;
+	q->head = e->next;
+	qunlock(&q->lk);
+	p = e->p;
+	vtfree(e);
+	return p;
+}
+
+void*
+_vtnbqrecv(Queue *q)
+{
+	void *p;
+	Qel *e;
+
+	qlock(&q->lk);
+	if(q->head == nil){
+		qunlock(&q->lk);
+		return nil;
+	}
+	e = q->head;
+	q->head = e->next;
+	qunlock(&q->lk);
+	p = e->p;
+	vtfree(e);
+	return p;
+}
+
+void
+_vtqhangup(Queue *q)
+{
+	qlock(&q->lk);
+	q->hungup = 1;
+	rwakeupall(&q->r);
+	qunlock(&q->lk);
+}
diff --git a/src/libventi/queue.h b/src/libventi/queue.h
@@ -0,0 +1,6 @@
+typedef struct Queue Queue;
+Queue *_vtqalloc(void);
+int _vtqsend(Queue*, void*);
+void *_vtqrecv(Queue*);
+void _vtqhangup(Queue*);
+void *_vtnbqrecv(Queue*);
diff --git a/src/libventi/root.c b/src/libventi/root.c
@@ -0,0 +1,67 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include "cvt.h"
+
+static int
+checksize(int n)
+{
+	if(n < 256 || n > VtMaxLumpSize) {
+		werrstr("bad block size");
+		return -1;
+	}
+	return 0;
+}
+
+void
+vtrootpack(VtRoot *r, uchar *p)
+{
+	uchar *op = p;
+
+	U16PUT(p, VtRootVersion);
+	p += 2;
+	memmove(p, r->name, sizeof(r->name));
+	p += sizeof(r->name);
+	memmove(p, r->type, sizeof(r->type));
+	p += sizeof(r->type);
+	memmove(p, r->score, VtScoreSize);
+	p +=  VtScoreSize;
+	U16PUT(p, r->blocksize);
+	p += 2;
+	memmove(p, r->prev, VtScoreSize);
+	p += VtScoreSize;
+
+	assert(p-op == VtRootSize);
+}
+
+int
+vtrootunpack(VtRoot *r, uchar *p)
+{
+	uchar *op = p;
+	uint vers;
+	memset(r, 0, sizeof(*r));
+
+	vers = U16GET(p);
+	if(vers != VtRootVersion) {
+		werrstr("unknown root version");
+		return 0;
+	}
+	p += 2;
+	memmove(r->name, p, sizeof(r->name));
+	r->name[sizeof(r->name)-1] = 0;
+	p += sizeof(r->name);
+	memmove(r->type, p, sizeof(r->type));
+	r->type[sizeof(r->type)-1] = 0;
+	p += sizeof(r->type);
+	memmove(r->score, p, VtScoreSize);
+	p +=  VtScoreSize;
+	r->blocksize = U16GET(p);
+	if(checksize(r->blocksize) < 0)
+		return -1;
+	p += 2;
+	memmove(r->prev, p, VtScoreSize);
+	p += VtScoreSize;
+
+	assert(p-op == VtRootSize);
+	return 0;
+}
diff --git a/src/libventi/rpc.c b/src/libventi/rpc.c
@@ -0,0 +1,155 @@
+/*
+ * Multiplexed Venti client.  It would be nice if we 
+ * could turn this into a generic library routine rather
+ * than keep it Venti specific.  A user-level 9P client
+ * could use something like this too.
+ *
+ * This is a little more complicated than it might be
+ * because we want it to work well within and without libthread.
+ *
+ * The mux code is inspired by tra's, which is inspired by the Plan 9 kernel.
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+typedef struct Rwait Rwait;
+struct Rwait
+{
+	Rendez r;
+	Packet *p;
+	int done;
+	int sleeping;
+};
+
+static int gettag(VtConn*, Rwait*);
+static void puttag(VtConn*, Rwait*, int);
+static void muxrpc(VtConn*, Packet*);
+Packet *vtrpc(VtConn*, Packet*);
+
+Packet*
+vtrpc(VtConn *z, Packet *p)
+{
+	int i;
+	uchar tag, buf[2], *top;
+	Rwait *r;
+
+	/* must malloc because stack could be private */
+	r = vtmallocz(sizeof(Rwait));
+
+	qlock(&z->lk);
+	r->r.l = &z->lk;
+	tag = gettag(z, r);
+
+	/* slam tag into packet */
+	top = packetpeek(p, buf, 0, 2);
+	if(top == nil){
+		packetfree(p);
+		return nil;
+	}
+	if(top == buf){
+		werrstr("first two bytes must be in same packet fragment");
+		packetfree(p);
+		return nil;
+	}
+	top[1] = tag;
+	qunlock(&z->lk);
+	if(vtsend(z, p) < 0)
+		return nil;
+
+	qlock(&z->lk);
+	/* wait for the muxer to give us our packet */
+	r->sleeping = 1;
+	z->nsleep++;
+	while(z->muxer && !r->done)
+		rsleep(&r->r);
+	z->nsleep--;
+	r->sleeping = 0;
+
+	/* if not done, there's no muxer: start muxing */
+	if(!r->done){
+		if(z->muxer)
+			abort();
+		z->muxer = 1;
+		while(!r->done){
+			qunlock(&z->lk);
+			if((p = vtrecv(z)) == nil){
+				z->muxer = 0;
+				return nil;
+			}
+			qlock(&z->lk);
+			muxrpc(z, p);
+		}
+		z->muxer = 0;
+		/* if there is anyone else sleeping, wake them to mux */
+		if(z->nsleep){
+			for(i=0; i<256; i++)
+				if(z->wait[i] != nil && ((Rwait*)z->wait[i])->sleeping)
+					break;
+			if(i==256)
+				fprint(2, "libventi: nsleep botch\n");
+			else
+				rwakeup(&((Rwait*)z->wait[i])->r);
+		}	
+	}
+
+	p = r->p;
+	puttag(z, r, tag);
+	vtfree(r);
+	qunlock(&z->lk);
+	return p;
+}
+
+static int 
+gettag(VtConn *z, Rwait *r)
+{
+	int i;
+
+Again:
+	while(z->ntag == 256)
+		rsleep(&z->tagrend);
+	for(i=0; i<256; i++)
+		if(z->wait[i] == 0){
+			z->ntag++;
+			z->wait[i] = r;
+			return i;
+		}
+	fprint(2, "libventi: ntag botch\n");
+	goto Again;
+}
+
+static void
+puttag(VtConn *z, Rwait *r, int tag)
+{
+	assert(z->wait[tag] == r);
+	z->wait[tag] = nil;
+	z->ntag--;
+	rwakeup(&z->tagrend);
+}
+
+static void
+muxrpc(VtConn *z, Packet *p)
+{
+	uchar tag, buf[2], *top;
+	Rwait *r;
+
+	if((top = packetpeek(p, buf, 0, 2)) == nil){
+		fprint(2, "libventi: short packet in vtrpc\n");
+		packetfree(p);
+		return;
+	}
+
+	tag = top[1];
+	if((r = z->wait[tag]) == nil){
+		fprint(2, "libventi: unexpected packet tag %d in vtrpc\n", tag);
+abort();
+		packetfree(p);
+		return;
+	}
+
+	r->p = p;
+	r->done = 1;
+	rwakeup(&r->r);
+}
+
diff --git a/src/libventi/scorefmt.c b/src/libventi/scorefmt.c
@@ -0,0 +1,18 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+int
+vtscorefmt(Fmt *f)
+{
+	uchar *v;
+	int i;
+
+	v = va_arg(f->args, uchar*);
+	if(v == nil)
+		fmtprint(f, "*");
+	else
+		for(i = 0; i < VtScoreSize; i++)
+			fmtprint(f, "%2.2ux", v[i]);
+	return 0;
+}
diff --git a/src/libventi/send.c b/src/libventi/send.c
@@ -0,0 +1,212 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include "queue.h"
+
+static int
+_vtsend(VtConn *z, Packet *p)
+{
+	IOchunk ioc;
+	int n;
+	uchar buf[2];
+	
+	if(z->state != VtStateConnected) {
+		werrstr("session not connected");
+		return -1;
+	}
+
+	/* add framing */
+	n = packetsize(p);
+	if(n >= (1<<16)) {
+		werrstr("packet too large");
+		packetfree(p);
+		return -1;
+	}
+	buf[0] = n>>8;
+	buf[1] = n;
+	packetprefix(p, buf, 2);
+
+	for(;;){
+		n = packetfragments(p, &ioc, 1, 0);
+		if(n == 0)
+			break;
+		if(write(z->outfd, ioc.addr, ioc.len) < ioc.len){
+			packetfree(p);
+			return 0;
+		}
+		packetconsume(p, nil, ioc.len);
+	}
+	packetfree(p);
+	return 1;
+}
+
+static Packet*
+_vtrecv(VtConn *z)
+{
+	uchar buf[10], *b;
+	int n;
+	Packet *p;
+	int size, len;
+
+	if(z->state != VtStateConnected) {
+		werrstr("session not connected");
+		return nil;
+	}
+
+	p = z->part;
+	/* get enough for head size */
+	size = packetsize(p);
+	while(size < 2) {
+		b = packettrailer(p, MaxFragSize);
+		assert(b != nil);
+		n = read(z->infd, b, MaxFragSize);
+		if(n <= 0)
+			goto Err;
+		size += n;
+		packettrim(p, 0, size);
+	}
+
+	if(packetconsume(p, buf, 2) < 0)
+		goto Err;
+	len = (buf[0] << 8) | buf[1];
+	size -= 2;
+
+	while(size < len) {
+		n = len - size;
+		if(n > MaxFragSize)
+			n = MaxFragSize;
+		b = packettrailer(p, n);
+		if(readn(z->infd, b, n) != n)
+			goto Err;
+		size += n;
+	}
+	p = packetsplit(p, len);
+	return p;
+Err:	
+	return nil;	
+}
+
+/*
+ * If you fork off two procs running vtrecvproc and vtsendproc,
+ * then vtrecv/vtsend (and thus vtrpc) will never block except on 
+ * rendevouses, which is nice when it's running in one thread of many.
+ */
+void
+vtrecvproc(void *v)
+{
+	Packet *p;
+	VtConn *z;
+	Queue *q;
+
+	z = v;
+	q = _vtqalloc();
+
+	qlock(&z->lk);
+	z->readq = q;
+	qlock(&z->inlk);
+	rwakeup(&z->rpcfork);
+	qunlock(&z->lk);
+
+	while((p = _vtrecv(z)) != nil)
+		if(_vtqsend(q, p) < 0){
+			packetfree(p);
+			break;
+		}
+	qunlock(&z->inlk);
+	qlock(&z->lk);
+	_vtqhangup(q);
+	while((p = _vtnbqrecv(q)) != nil)
+		packetfree(p);
+	vtfree(q);
+	z->readq = nil;
+	rwakeup(&z->rpcfork);
+	qunlock(&z->lk);
+	vthangup(z);
+}
+
+void
+vtsendproc(void *v)
+{
+	Queue *q;
+	Packet *p;
+	VtConn *z;
+
+	z = v;
+	q = _vtqalloc();
+
+	qlock(&z->lk);
+	z->writeq = q;
+	qlock(&z->outlk);
+	rwakeup(&z->rpcfork);
+	qunlock(&z->lk);
+
+	while((p = _vtqrecv(q)) != nil)
+		if(_vtsend(z, p) < 0)
+			break;
+	qunlock(&z->outlk);
+	qlock(&z->lk);
+	_vtqhangup(q);
+	while((p = _vtnbqrecv(q)) != nil)
+		packetfree(p);
+	vtfree(q);
+	z->writeq = nil;
+	rwakeup(&z->rpcfork);
+	qunlock(&z->lk);
+	return;
+}
+
+Packet*
+vtrecv(VtConn *z)
+{
+	Packet *p;
+
+	qlock(&z->lk);
+	if(z->state != VtStateConnected){
+		werrstr("not connected");
+		qunlock(&z->lk);
+		return nil;
+	}
+	if(z->readq){
+		qunlock(&z->lk);
+		return _vtqrecv(z->readq);
+	}
+
+	qlock(&z->inlk);
+	qunlock(&z->lk);
+	p = _vtrecv(z);
+	qunlock(&z->inlk);
+	if(!p)
+		vthangup(z);
+	return p;
+}
+
+int
+vtsend(VtConn *z, Packet *p)
+{
+	qlock(&z->lk);
+	if(z->state != VtStateConnected){
+		packetfree(p);
+		werrstr("not connected");
+		qunlock(&z->lk);
+		return -1;
+	}
+	if(z->writeq){
+		qunlock(&z->lk);
+		if(_vtqsend(z->writeq, p) < 0){
+			packetfree(p);
+			return -1;
+		}
+		return 0;
+	}
+
+	qlock(&z->outlk);
+	qunlock(&z->lk);
+	if(_vtsend(z, p) < 0){
+		qunlock(&z->outlk);
+		vthangup(z);
+		return -1;	
+	}
+	qunlock(&z->outlk);
+	return 0;
+}
+
diff --git a/src/libventi/server.c b/src/libventi/server.c
@@ -0,0 +1,172 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <thread.h>
+#include "queue.h"
+
+enum
+{
+	STACK = 8192,
+};
+
+typedef struct VtSconn VtSconn;
+struct VtSconn
+{
+	int ctl;
+	char dir[NETPATHLEN];
+	VtSrv *srv;
+	VtConn *c;
+};
+
+struct VtSrv
+{
+	int afd;
+	int dead;
+	char adir[NETPATHLEN];
+	Queue *q;	/* Queue(VtReq*) */
+};
+
+static void listenproc(void*);
+static void connproc(void*);
+
+VtSrv*
+vtlisten(char *addr)
+{
+	VtSrv *s;
+
+	s = vtmallocz(sizeof(VtSrv));
+	s->afd = announce(addr, s->adir);
+	if(s->afd < 0){
+		free(s);
+		return nil;
+	}
+	s->q = _vtqalloc();
+	proccreate(listenproc, s, STACK);
+	return s;
+}
+
+static void
+listenproc(void *v)
+{
+	int ctl;
+	char dir[NETPATHLEN];
+	VtSrv *srv;
+	VtSconn *sc;
+
+	srv = v;
+	for(;;){
+		ctl = listen(srv->adir, dir);
+		if(ctl < 0){
+			srv->dead = 1;
+			break;
+		}
+		sc = vtmallocz(sizeof(VtSconn));
+		sc->ctl = ctl;
+		sc->srv = srv;
+		strcpy(sc->dir, dir);
+		proccreate(connproc, sc, STACK);
+	}
+
+	// hangup
+}
+
+static void
+connproc(void *v)
+{
+	VtSconn *sc;
+	VtConn *c;
+	Packet *p;
+	VtReq *r;
+	int fd;
+
+	r = nil;
+	c = nil;
+	sc = v;
+	fprint(2, "new call %s on %d\n", sc->dir, sc->ctl);
+	fd = accept(sc->ctl, sc->dir);
+	close(sc->ctl);
+	if(fd < 0){
+		fprint(2, "accept %s: %r\n", sc->dir);
+		goto out;
+	}
+
+	c = vtconn(fd, fd);
+	sc->c = c;
+	if(vtversion(c) < 0){
+		fprint(2, "vtversion %s: %r\n", sc->dir);
+		goto out;
+	}
+	if(vtsrvhello(c) < 0){
+		fprint(2, "vtsrvhello %s: %r\n", sc->dir);
+		goto out;
+	}
+
+	fprint(2, "new proc %s\n", sc->dir);
+	proccreate(vtsendproc, c, STACK);
+	qlock(&c->lk);
+	while(!c->writeq)
+		rsleep(&c->rpcfork);
+	qunlock(&c->lk);
+
+	while((p = vtrecv(c)) != nil){
+		r = vtmallocz(sizeof(VtReq));
+		if(vtfcallunpack(&r->tx, p) < 0){
+			packetfree(p);
+			fprint(2, "bad packet on %s: %r\n", sc->dir);
+			continue;
+		}
+		packetfree(p);
+		if(r->tx.type == VtTgoodbye)
+			break;
+		r->rx.tag = r->tx.tag;
+		r->sc = sc;
+		if(_vtqsend(sc->srv->q, r) < 0){
+			fprint(2, "hungup queue\n");
+			break;
+		}
+		r = nil;
+	}
+
+	fprint(2, "eof on %s\n", sc->dir);
+
+out:
+	if(r){
+		vtfcallclear(&r->tx);
+		vtfree(r);
+	}
+	if(c)
+		vtfreeconn(c);
+	fprint(2, "freed %s\n", sc->dir);
+	vtfree(sc);
+	return;
+}
+
+VtReq*
+vtgetreq(VtSrv *srv)
+{
+	return _vtqrecv(srv->q);
+}
+
+void
+vtrespond(VtReq *r)
+{
+	Packet *p;
+	VtSconn *sc;
+
+	sc = r->sc;
+	if(r->rx.tag != r->tx.tag)
+		abort();
+	if(r->rx.type != r->tx.type+1 && r->rx.type != VtRerror)
+		abort();
+	if((p = vtfcallpack(&r->rx)) == nil){
+		fprint(2, "fcallpack on %s: %r\n", sc->dir);
+		packetfree(p);
+		vtfcallclear(&r->rx);
+		return;
+	}
+	vtsend(sc->c, p);
+	vtfcallclear(&r->tx);
+	vtfcallclear(&r->rx);
+	vtfree(r);
+}
+
diff --git a/src/libventi/srvhello.c b/src/libventi/srvhello.c
@@ -0,0 +1,50 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+int
+vtsrvhello(VtConn *z)
+{
+	VtFcall tx, rx;
+	Packet *p;
+
+	if((p = vtrecv(z)) == nil)
+		return 0;
+
+	if(vtfcallunpack(&tx, p) < 0){
+		packetfree(p);
+		return 0;
+	}
+	packetfree(p);
+
+	if(tx.type != VtThello){
+		vtfcallclear(&tx);
+		werrstr("bad packet type %d; want Thello %d", tx.type, VtThello);
+		return 0;
+	}
+	if(tx.tag != 0){
+		vtfcallclear(&tx);
+		werrstr("bad tag in hello");
+		return 0;
+	}
+	if(strcmp(tx.version, z->version) != 0){
+		vtfcallclear(&tx);
+		werrstr("bad version in hello");
+		return 0;
+	}
+	vtfree(z->uid);
+	z->uid = tx.uid;
+	tx.uid = nil;
+	vtfcallclear(&tx);
+
+	memset(&rx, 0, sizeof rx);
+	rx.type = VtRhello;
+	rx.tag = tx.tag;
+	rx.sid = "anonymous";
+	if((p = vtfcallpack(&rx)) == nil)
+		return 0;
+	if(vtsend(z, p) < 0)
+		return 0;
+
+	return 1;
+}
diff --git a/src/libventi/strdup.c b/src/libventi/strdup.c
@@ -0,0 +1,18 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+char*
+vtstrdup(char *s)
+{
+	int n;
+	char *ss;
+
+	if(s == nil)
+		return nil;
+	n = strlen(s) + 1;
+	ss = vtmalloc(n);
+	memmove(ss, s, n);
+	return ss;
+}
+
diff --git a/src/libventi/string.c b/src/libventi/string.c
@@ -0,0 +1,50 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+int
+vtputstring(Packet *p, char *s)
+{
+	uchar buf[2];
+	int n;
+
+	if(s == nil){
+		werrstr("null string in packet");
+		return -1;
+	}
+	n = strlen(s);
+	if(n > VtMaxStringSize){
+		werrstr("string too long in packet");
+		return -1;
+	}
+	buf[0] = n>>8;
+	buf[1] = n;
+	packetappend(p, buf, 2);
+	packetappend(p, (uchar*)s, n);
+	return 0;
+}
+
+int
+vtgetstring(Packet *p, char **ps)
+{
+	uchar buf[2];
+	int n;
+	char *s;
+
+	if(packetconsume(p, buf, 2) < 0)
+		return -1;
+	n = (buf[0]<<8) + buf[1];
+	if(n > VtMaxStringSize) {
+		werrstr("string too long in packet");
+		return -1;
+	}
+	s = vtmalloc(n+1);
+	if(packetconsume(p, (uchar*)s, n) < 0){
+		vtfree(s);
+		return -1;
+	}
+	s[n] = 0;
+	*ps = s;
+	return 0;
+}
+
diff --git a/src/libventi/version.c b/src/libventi/version.c
@@ -0,0 +1,115 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+static char *okvers[] = {
+	"02",
+	nil,
+};
+
+/*
+static char EBigString[] = "string too long";
+static char EBigPacket[] = "packet too long";
+static char ENullString[] = "missing string";
+*/
+static char EBadVersion[] = "bad format in version string";
+
+static int
+vtreadversion(VtConn *z, char *q, char *v, int nv)
+{
+	int n;
+
+	for(;;){
+		if(nv <= 1){
+			werrstr("version too long");
+			return -1;
+		}
+		n = read(z->infd, v, 1);
+		if(n <= 0){
+			if(n == 0)
+				werrstr("unexpected eof");
+			return -1;
+		}
+		if(*v == '\n'){
+			*v = 0;
+			break;
+		}
+		if((uchar)*v < ' ' || (uchar)*v > 0x7f || (*q && *v != *q)){
+			werrstr(EBadVersion);
+			return -1;
+		}
+		v++;
+		nv--;
+		if(*q)
+			q++;
+	}
+	return 0;
+}
+
+int
+vtversion(VtConn *z)
+{
+	char buf[VtMaxStringSize], *p, *ep, *prefix, *pp;
+	int i;
+
+	qlock(&z->lk);
+	if(z->state != VtStateAlloc){
+		werrstr("bad session state");
+		qunlock(&z->lk);
+		return -1;
+	}
+
+	qlock(&z->inlk);
+	qlock(&z->outlk);
+
+	p = buf;
+	ep = buf + sizeof buf;
+	prefix = "venti-";
+	p = seprint(p, ep, "%s", prefix);
+	p += strlen(p);
+	for(i=0; okvers[i]; i++)
+		p = seprint(p, ep, "%s%s", i ? ":" : "", okvers[i]);
+	p = seprint(p, ep, "-libventi\n");
+	assert(p-buf < sizeof buf);
+
+	if(write(z->outfd, buf, p-buf) != p-buf)
+		goto Err;
+	vtdebug(z, "version string out: %s", buf);
+
+	if(vtreadversion(z, prefix, buf, sizeof buf) < 0)
+		goto Err;
+	vtdebug(z, "version string in: %s", buf);
+
+	p = buf+strlen(prefix);
+	for(;;){
+		pp = strpbrk(p, ":-");
+		for(i=0; okvers[i]; i++)
+			if(strlen(okvers[i]) == pp-p && memcmp(okvers[i], p, pp-p) == 0){
+				*pp = 0;
+				z->version = vtstrdup(p);
+				goto Okay;
+			}
+	}
+	werrstr("unable to negotiate version");
+	goto Err;
+
+Okay:
+	z->state = VtStateConnected;
+	qunlock(&z->inlk);
+	qunlock(&z->outlk);
+	qunlock(&z->lk);
+	return 0;
+
+Err:
+	if(z->infd >= 0)
+		close(z->infd);
+	if(z->outfd >= 0 && z->outfd != z->infd)
+		close(z->outfd);
+	z->infd = -1;
+	z->outfd = -1;
+	z->state = VtStateClosed;
+	qunlock(&z->inlk);
+	qunlock(&z->outlk);
+	qunlock(&z->lk);
+	return -1;
+}
diff --git a/src/libventi/zero.c b/src/libventi/zero.c
@@ -0,0 +1,55 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+void
+vtzeroextend(int type, uchar *buf, uint n, uint nn)
+{
+	uchar *p, *ep;
+
+	switch(type&7) {
+	case 0:
+		memset(buf+n, 0, nn-n);
+		break;
+	default:
+		p = buf + (n/VtScoreSize)*VtScoreSize;
+		ep = buf + (nn/VtScoreSize)*VtScoreSize;
+		while(p < ep) {
+			memmove(p, vtzeroscore, VtScoreSize);
+			p += VtScoreSize;
+		}
+		memset(p, 0, buf+nn-p);
+		break;
+	}
+}
+
+uint 
+vtzerotruncate(int type, uchar *buf, uint n)
+{
+	uchar *p;
+
+	if(type == VtRootType){
+		if(n < VtRootSize)
+			return n;
+		return VtRootSize;
+	}
+
+	switch(type&7){
+	case 0:
+		for(p = buf + n; p > buf; p--) {
+			if(p[-1] != 0)
+				break;
+		}
+		return p - buf;
+	default:
+		/* ignore slop at end of block */
+		p = buf + (n/VtScoreSize)*VtScoreSize;
+
+		while(p > buf) {
+			if(memcmp(p - VtScoreSize, vtzeroscore, VtScoreSize) != 0)
+				break;
+			p -= VtScoreSize;
+		}
+		return p - buf;
+	}
+}
diff --git a/src/libventi/zeroscore.c b/src/libventi/zeroscore.c
@@ -0,0 +1,10 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+
+/* score of a zero length block */
+uchar	vtzeroscore[VtScoreSize] = {
+	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, 0x55,
+	0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8, 0x07, 0x09
+};
+