plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

deflate.c (30519B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <flate.h>
      4 
      5 typedef struct Chain	Chain;
      6 typedef struct Chains	Chains;
      7 typedef struct Dyncode	Dyncode;
      8 typedef struct Huff	Huff;
      9 typedef struct LZblock	LZblock;
     10 typedef struct LZstate	LZstate;
     11 
     12 enum
     13 {
     14 	/*
     15 	 * deflate format paramaters
     16 	 */
     17 	DeflateUnc	= 0,			/* uncompressed block */
     18 	DeflateFix	= 1,			/* fixed huffman codes */
     19 	DeflateDyn	= 2,			/* dynamic huffman codes */
     20 
     21 	DeflateEob	= 256,			/* end of block code in lit/len book */
     22 	DeflateMaxBlock	= 64*1024-1,		/* maximum size of uncompressed block */
     23 
     24 	DeflateMaxExp	= 10,			/* maximum expansion for a block */
     25 
     26 	LenStart	= 257,			/* start of length codes in litlen */
     27 	Nlitlen		= 288,			/* number of litlen codes */
     28 	Noff		= 30,			/* number of offset codes */
     29 	Nclen		= 19,			/* number of codelen codes */
     30 
     31 	MaxOff		= 32*1024,
     32 	MinMatch	= 3,			/* shortest match possible */
     33 	MaxMatch	= 258,			/* longest match possible */
     34 
     35 	/*
     36 	 * huffman code paramaters
     37 	 */
     38 	MaxLeaf		= Nlitlen,
     39 	MaxHuffBits	= 16,			/* max bits in a huffman code */
     40 	ChainMem	= 2 * (MaxHuffBits - 1) * MaxHuffBits,
     41 
     42 	/*
     43 	 * coding of the lz parse
     44 	 */
     45 	LenFlag		= 1 << 3,
     46 	LenShift	= 4,			/* leaves enough space for MinMatchMaxOff */
     47 	MaxLitRun	= LenFlag - 1,
     48 
     49 	/*
     50 	 * internal lz paramaters
     51 	 */
     52 	DeflateOut	= 4096,			/* output buffer size */
     53 	BlockSize	= 8192,			/* attempted input read quanta */
     54 	DeflateBlock	= DeflateMaxBlock & ~(BlockSize - 1),
     55 	MinMatchMaxOff	= 4096,			/* max profitable offset for small match;
     56 						 * assumes 8 bits for len, 5+10 for offset
     57 						 * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS
     58 						 */
     59 	HistSlop	= 512,			/* must be at lead MaxMatch */
     60 	HistBlock	= 64*1024,
     61 	HistSize	= HistBlock + HistSlop,
     62 
     63 	HashLog		= 13,
     64 	HashSize	= 1<<HashLog,
     65 
     66 	MaxOffCode	= 256,			/* biggest offset looked up in direct table */
     67 
     68 	EstLitBits	= 8,
     69 	EstLenBits	= 4,
     70 	EstOffBits	= 5
     71 };
     72 
     73 /*
     74  * knuth vol. 3 multiplicative hashing
     75  * each byte x chosen according to rules
     76  * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
     77  * with reasonable spread between the bytes & their complements
     78  *
     79  * the 3 byte value appears to be as almost good as the 4 byte value,
     80  * and might be faster on some machines
     81  */
     82 /*
     83 #define hashit(c)	((u32int)((c) * 0x6b43a9) >> (24 - HashLog))
     84 */
     85 #define hashit(c)	((u32int)(((c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
     86 
     87 /*
     88  * lempel-ziv style compression state
     89  */
     90 struct LZstate
     91 {
     92 	uchar	hist[HistSize];
     93 	ulong	pos;				/* current location in history buffer */
     94 	ulong	avail;				/* data available after pos */
     95 	int	eof;
     96 	ushort	hash[HashSize];			/* hash chains */
     97 	ushort	nexts[MaxOff];
     98 	int	now;				/* pos in hash chains */
     99 	int	dot;				/* dawn of time in history */
    100 	int	prevlen;			/* lazy matching state */
    101 	int	prevoff;
    102 	int	maxcheck;			/* compressor tuning */
    103 
    104 	uchar	obuf[DeflateOut];
    105 	uchar	*out;				/* current position in the output buffer */
    106 	uchar	*eout;
    107 	ulong	bits;				/* bit shift register */
    108 	int	nbits;
    109 	int	rbad;				/* got an error reading the buffer */
    110 	int	wbad;				/* got an error writing the buffer */
    111 	int	(*w)(void*, void*, int);
    112 	void	*wr;
    113 
    114 	ulong	totr;				/* total input size */
    115 	ulong	totw;				/* total output size */
    116 	int	debug;
    117 };
    118 
    119 struct LZblock
    120 {
    121 	ushort	parse[DeflateMaxBlock / 2 + 1];
    122 	int	lastv;				/* value being constucted for parse */
    123 	ulong	litlencount[Nlitlen];
    124 	ulong	offcount[Noff];
    125 	ushort	*eparse;			/* limit for parse table */
    126 	int	bytes;				/* consumed from the input */
    127 	int	excost;				/* cost of encoding extra len & off bits */
    128 };
    129 
    130 /*
    131  * huffman code table
    132  */
    133 struct Huff
    134 {
    135 	short	bits;				/* length of the code */
    136 	ushort	encode;				/* the code */
    137 };
    138 
    139 /*
    140  * encoding of dynamic huffman trees
    141  */
    142 struct Dyncode
    143 {
    144 	int	nlit;
    145 	int	noff;
    146 	int	nclen;
    147 	int	ncode;
    148 	Huff	codetab[Nclen];
    149 	uchar	codes[Nlitlen+Noff];
    150 	uchar	codeaux[Nlitlen+Noff];
    151 };
    152 
    153 static	int	deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));
    154 static	int	lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);
    155 static	void	wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);
    156 static	int	bitcost(Huff*, ulong*, int);
    157 static	int	huffcodes(Dyncode*, Huff*, Huff*);
    158 static	void	wrdyncode(LZstate*, Dyncode*);
    159 static	void	lzput(LZstate*, ulong bits, int nbits);
    160 static	void	lzflushbits(LZstate*);
    161 static	void	lzflush(LZstate *lz);
    162 static	void	lzwrite(LZstate *lz, void *buf, int n);
    163 
    164 static	int	hufftabinit(Huff*, int, ulong*, int);
    165 static	int	mkgzprecode(Huff*, ulong *, int, int);
    166 
    167 static	int	mkprecode(Huff*, ulong *, int, int, ulong*);
    168 static	void	nextchain(Chains*, int);
    169 static	void	leafsort(ulong*, ushort*, int, int);
    170 
    171 /* conversion from len to code word */
    172 static int lencode[MaxMatch];
    173 
    174 /*
    175  * conversion from off to code word
    176  * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]
    177 */
    178 static int offcode[MaxOffCode];
    179 static int bigoffcode[256];
    180 
    181 /* litlen code words LenStart-285 extra bits */
    182 static int litlenbase[Nlitlen-LenStart];
    183 static int litlenextra[Nlitlen-LenStart] =
    184 {
    185 /* 257 */	0, 0, 0,
    186 /* 260 */	0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
    187 /* 270 */	2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
    188 /* 280 */	4, 5, 5, 5, 5, 0, 0, 0
    189 };
    190 
    191 /* offset code word extra bits */
    192 static int offbase[Noff];
    193 static int offextra[] =
    194 {
    195 	0,  0,  0,  0,  1,  1,  2,  2,  3,  3,
    196 	4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
    197 	9,  9,  10, 10, 11, 11, 12, 12, 13, 13,
    198 	0,  0,
    199 };
    200 
    201 /* order code lengths */
    202 static int clenorder[Nclen] =
    203 {
    204         16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
    205 };
    206 
    207 /* static huffman tables */
    208 static	Huff	litlentab[Nlitlen];
    209 static	Huff	offtab[Noff];
    210 static	Huff	hofftab[Noff];
    211 
    212 /* bit reversal for brain dead endian swap in huffman codes */
    213 static	uchar	revtab[256];
    214 static	ulong	nlits;
    215 static	ulong	nmatches;
    216 
    217 int
    218 deflateinit(void)
    219 {
    220 	ulong bitcount[MaxHuffBits];
    221 	int i, j, ci, n;
    222 
    223 	/* byte reverse table */
    224 	for(i=0; i<256; i++)
    225 		for(j=0; j<8; j++)
    226 			if(i & (1<<j))
    227 				revtab[i] |= 0x80 >> j;
    228 
    229 	/* static Litlen bit lengths */
    230 	for(i=0; i<144; i++)
    231 		litlentab[i].bits = 8;
    232 	for(i=144; i<256; i++)
    233 		litlentab[i].bits = 9;
    234 	for(i=256; i<280; i++)
    235 		litlentab[i].bits = 7;
    236 	for(i=280; i<Nlitlen; i++)
    237 		litlentab[i].bits = 8;
    238 
    239 	memset(bitcount, 0, sizeof(bitcount));
    240 	bitcount[8] += 144 - 0;
    241 	bitcount[9] += 256 - 144;
    242 	bitcount[7] += 280 - 256;
    243 	bitcount[8] += Nlitlen - 280;
    244 
    245 	if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))
    246 		return FlateInternal;
    247 
    248 	/* static offset bit lengths */
    249 	for(i = 0; i < Noff; i++)
    250 		offtab[i].bits = 5;
    251 
    252 	memset(bitcount, 0, sizeof(bitcount));
    253 	bitcount[5] = Noff;
    254 
    255 	if(!hufftabinit(offtab, Noff, bitcount, 5))
    256 		return FlateInternal;
    257 
    258 	bitcount[0] = 0;
    259 	bitcount[1] = 0;
    260 	if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))
    261 		return FlateInternal;
    262 
    263 	/* conversion tables for lens & offs to codes */
    264 	ci = 0;
    265 	for(i = LenStart; i < 286; i++){
    266 		n = ci + (1 << litlenextra[i - LenStart]);
    267 		litlenbase[i - LenStart] = ci;
    268 		for(; ci < n; ci++)
    269 			lencode[ci] = i;
    270 	}
    271 	/* patch up special case for len MaxMatch */
    272 	lencode[MaxMatch-MinMatch] = 285;
    273 	litlenbase[285-LenStart] = MaxMatch-MinMatch;
    274 
    275 	ci = 0;
    276 	for(i = 0; i < 16; i++){
    277 		n = ci + (1 << offextra[i]);
    278 		offbase[i] = ci;
    279 		for(; ci < n; ci++)
    280 			offcode[ci] = i;
    281 	}
    282 
    283 	ci = ci >> 7;
    284 	for(; i < 30; i++){
    285 		n = ci + (1 << (offextra[i] - 7));
    286 		offbase[i] = ci << 7;
    287 		for(; ci < n; ci++)
    288 			bigoffcode[ci] = i;
    289 	}
    290 	return FlateOk;
    291 }
    292 
    293 static void
    294 deflatereset(LZstate *lz, int level, int debug)
    295 {
    296 	memset(lz->nexts, 0, sizeof lz->nexts);
    297 	memset(lz->hash, 0, sizeof lz->hash);
    298 	lz->totr = 0;
    299 	lz->totw = 0;
    300 	lz->pos = 0;
    301 	lz->avail = 0;
    302 	lz->out = lz->obuf;
    303 	lz->eout = &lz->obuf[DeflateOut];
    304 	lz->prevlen = MinMatch - 1;
    305 	lz->prevoff = 0;
    306 	lz->now = MaxOff + 1;
    307 	lz->dot = lz->now;
    308 	lz->bits = 0;
    309 	lz->nbits = 0;
    310 	lz->maxcheck = (1 << level);
    311 	lz->maxcheck -= lz->maxcheck >> 2;
    312 	if(lz->maxcheck < 2)
    313 		lz->maxcheck = 2;
    314 	else if(lz->maxcheck > 1024)
    315 		lz->maxcheck = 1024;
    316 
    317 	lz->debug = debug;
    318 }
    319 
    320 int
    321 deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug)
    322 {
    323 	LZstate *lz;
    324 	LZblock *lzb;
    325 	int ok;
    326 
    327 	lz = malloc(sizeof *lz + sizeof *lzb);
    328 	if(lz == nil)
    329 		return FlateNoMem;
    330 	lzb = (LZblock*)&lz[1];
    331 
    332 	deflatereset(lz, level, debug);
    333 	lz->w = w;
    334 	lz->wr = wr;
    335 	lz->wbad = 0;
    336 	lz->rbad = 0;
    337 	lz->eof = 0;
    338 	ok = FlateOk;
    339 	while(!lz->eof || lz->avail){
    340 		ok = deflateb(lz, lzb, rr, r);
    341 		if(ok != FlateOk)
    342 			break;
    343 	}
    344 	if(ok == FlateOk && lz->rbad)
    345 		ok = FlateInputFail;
    346 	if(ok == FlateOk && lz->wbad)
    347 		ok = FlateOutputFail;
    348 	free(lz);
    349 	return ok;
    350 }
    351 
    352 static int
    353 deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int))
    354 {
    355 	Dyncode dyncode, hdyncode;
    356 	Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];
    357 	ulong litcount[Nlitlen];
    358 	long nunc, ndyn, nfix, nhuff;
    359 	uchar *slop, *hslop;
    360 	ulong ep;
    361 	int i, n, m, mm, nslop;
    362 
    363 	memset(lzb->litlencount, 0, sizeof lzb->litlencount);
    364 	memset(lzb->offcount, 0, sizeof lzb->offcount);
    365 	lzb->litlencount[DeflateEob]++;
    366 
    367 	lzb->bytes = 0;
    368 	lzb->eparse = lzb->parse;
    369 	lzb->lastv = 0;
    370 	lzb->excost = 0;
    371 
    372 	slop = &lz->hist[lz->pos];
    373 	n = lz->avail;
    374 	while(n < DeflateBlock && (!lz->eof || lz->avail)){
    375 		/*
    376 		 * fill the buffer as much as possible,
    377 		 * while leaving room for MaxOff history behind lz->pos,
    378 		 * and not reading more than we can handle.
    379 		 *
    380 		 * make sure we read at least HistSlop bytes.
    381 		 */
    382 		if(!lz->eof){
    383 			ep = lz->pos + lz->avail;
    384 			if(ep >= HistBlock)
    385 				ep -= HistBlock;
    386 			m = HistBlock - MaxOff - lz->avail;
    387 			if(m > HistBlock - n)
    388 				m = HistBlock - n;
    389 			if(m > (HistBlock + HistSlop) - ep)
    390 				m = (HistBlock + HistSlop) - ep;
    391 			if(m & ~(BlockSize - 1))
    392 				m &= ~(BlockSize - 1);
    393 
    394 			/*
    395 			 * be nice to the caller: stop reads that are too small.
    396 			 * can only get here when we've already filled the buffer some
    397 			 */
    398 			if(m < HistSlop){
    399 				if(!m || !lzb->bytes)
    400 					return FlateInternal;
    401 				break;
    402 			}
    403 
    404 			mm = (*r)(rr, &lz->hist[ep], m);
    405 			if(mm > 0){
    406 				/*
    407 				 * wrap data to end if we're read it from the beginning
    408 				 * this way, we don't have to wrap searches.
    409 				 *
    410 				 * wrap reads past the end to the beginning.
    411 				 * this way, we can guarantee minimum size reads.
    412 				 */
    413 				if(ep < HistSlop)
    414 					memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);
    415 				else if(ep + mm > HistBlock)
    416 					memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);
    417 
    418 				lz->totr += mm;
    419 				n += mm;
    420 				lz->avail += mm;
    421 			}else{
    422 				if(mm < 0)
    423 					lz->rbad = 1;
    424 				lz->eof = 1;
    425 			}
    426 		}
    427 		ep = lz->pos + lz->avail;
    428 		if(ep > HistSize)
    429 			ep = HistSize;
    430 		if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)
    431 			ep = lz->pos + DeflateMaxBlock - lzb->bytes;
    432 		m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);
    433 		lzb->bytes += m;
    434 		lz->pos = (lz->pos + m) & (HistBlock - 1);
    435 		lz->avail -= m;
    436 	}
    437 	if(lzb->lastv)
    438 		*lzb->eparse++ = lzb->lastv;
    439 	if(lzb->eparse > lzb->parse + nelem(lzb->parse))
    440 		return FlateInternal;
    441 	nunc = lzb->bytes;
    442 
    443 	if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)
    444 	|| !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))
    445 		return FlateInternal;
    446 
    447 	ndyn = huffcodes(&dyncode, dlitlentab, dofftab);
    448 	if(ndyn < 0)
    449 		return FlateInternal;
    450 	ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)
    451 		+ bitcost(dofftab, lzb->offcount, Noff)
    452 		+ lzb->excost;
    453 
    454 	memset(litcount, 0, sizeof litcount);
    455 
    456 	nslop = nunc;
    457 	if(nslop > &lz->hist[HistSize] - slop)
    458 		nslop = &lz->hist[HistSize] - slop;
    459 
    460 	for(i = 0; i < nslop; i++)
    461 		litcount[slop[i]]++;
    462 	hslop = &lz->hist[HistSlop - nslop];
    463 	for(; i < nunc; i++)
    464 		litcount[hslop[i]]++;
    465 	litcount[DeflateEob]++;
    466 
    467 	if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))
    468 		return FlateInternal;
    469 	nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);
    470 	if(nhuff < 0)
    471 		return FlateInternal;
    472 	nhuff += bitcost(hlitlentab, litcount, Nlitlen);
    473 
    474 	nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)
    475 		+ bitcost(offtab, lzb->offcount, Noff)
    476 		+ lzb->excost;
    477 
    478 	lzput(lz, lz->eof && !lz->avail, 1);
    479 
    480 	if(lz->debug){
    481 		fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",
    482 			nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);
    483 		fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);
    484 	}
    485 
    486 	if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){
    487 		lzput(lz, DeflateUnc, 2);
    488 		lzflushbits(lz);
    489 
    490 		lzput(lz, nunc & 0xff, 8);
    491 		lzput(lz, (nunc >> 8) & 0xff, 8);
    492 		lzput(lz, ~nunc & 0xff, 8);
    493 		lzput(lz, (~nunc >> 8) & 0xff, 8);
    494 		lzflush(lz);
    495 
    496 		lzwrite(lz, slop, nslop);
    497 		lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);
    498 	}else if(ndyn < nfix && ndyn < nhuff){
    499 		lzput(lz, DeflateDyn, 2);
    500 
    501 		wrdyncode(lz, &dyncode);
    502 		wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);
    503 		lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);
    504 	}else if(nhuff < nfix){
    505 		lzput(lz, DeflateDyn, 2);
    506 
    507 		wrdyncode(lz, &hdyncode);
    508 
    509 		m = 0;
    510 		for(i = nunc; i > MaxLitRun; i -= MaxLitRun)
    511 			lzb->parse[m++] = MaxLitRun;
    512 		lzb->parse[m++] = i;
    513 
    514 		wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);
    515 		lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);
    516 	}else{
    517 		lzput(lz, DeflateFix, 2);
    518 
    519 		wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);
    520 		lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
    521 	}
    522 
    523 	if(lz->eof && !lz->avail){
    524 		lzflushbits(lz);
    525 		lzflush(lz);
    526 	}
    527 	return FlateOk;
    528 }
    529 
    530 static void
    531 lzwrite(LZstate *lz, void *buf, int n)
    532 {
    533 	int nw;
    534 
    535 	if(n && lz->w){
    536 		nw = (*lz->w)(lz->wr, buf, n);
    537 		if(nw != n){
    538 			lz->w = 0;
    539 			lz->wbad = 1;
    540 		}else
    541 			lz->totw += n;
    542 	}
    543 }
    544 
    545 static void
    546 lzflush(LZstate *lz)
    547 {
    548 	lzwrite(lz, lz->obuf, lz->out - lz->obuf);
    549 	lz->out = lz->obuf;
    550 }
    551 
    552 static void
    553 lzput(LZstate *lz, ulong bits, int nbits)
    554 {
    555 	bits = (bits << lz->nbits) | lz->bits;
    556 	for(nbits += lz->nbits; nbits >= 8; nbits -= 8){
    557 		*lz->out++ = bits;
    558 		if(lz->out == lz->eout)
    559 			lzflush(lz);
    560 		bits >>= 8;
    561 	}
    562 	lz->bits = bits;
    563 	lz->nbits = nbits;
    564 }
    565 
    566 static void
    567 lzflushbits(LZstate *lz)
    568 {
    569 	if(lz->nbits)
    570 		lzput(lz, 0, 8 - (lz->nbits & 7));
    571 }
    572 
    573 /*
    574  * write out a block of n samples,
    575  * given lz encoding and counts for huffman tables
    576  */
    577 static void
    578 wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab)
    579 {
    580 	ushort *off;
    581 	int i, run, offset, lit, len, c;
    582 
    583 	if(out->debug > 2){
    584 		for(off = soff; off < eoff; ){
    585 			offset = *off++;
    586 			run = offset & MaxLitRun;
    587 			if(run){
    588 				for(i = 0; i < run; i++){
    589 					lit = out->hist[litoff & (HistBlock - 1)];
    590 					litoff++;
    591 					fprint(2, "\tlit %.2ux %c\n", lit, lit);
    592 				}
    593 				if(!(offset & LenFlag))
    594 					continue;
    595 				len = offset >> LenShift;
    596 				offset = *off++;
    597 			}else if(offset & LenFlag){
    598 				len = offset >> LenShift;
    599 				offset = *off++;
    600 			}else{
    601 				len = 0;
    602 				offset >>= LenShift;
    603 			}
    604 			litoff += len + MinMatch;
    605 			fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);
    606 		}
    607 	}
    608 
    609 	for(off = soff; off < eoff; ){
    610 		offset = *off++;
    611 		run = offset & MaxLitRun;
    612 		if(run){
    613 			for(i = 0; i < run; i++){
    614 				lit = out->hist[litoff & (HistBlock - 1)];
    615 				litoff++;
    616 				lzput(out, litlentab[lit].encode, litlentab[lit].bits);
    617 			}
    618 			if(!(offset & LenFlag))
    619 				continue;
    620 			len = offset >> LenShift;
    621 			offset = *off++;
    622 		}else if(offset & LenFlag){
    623 			len = offset >> LenShift;
    624 			offset = *off++;
    625 		}else{
    626 			len = 0;
    627 			offset >>= LenShift;
    628 		}
    629 		litoff += len + MinMatch;
    630 		c = lencode[len];
    631 		lzput(out, litlentab[c].encode, litlentab[c].bits);
    632 		c -= LenStart;
    633 		if(litlenextra[c])
    634 			lzput(out, len - litlenbase[c], litlenextra[c]);
    635 
    636 		if(offset < MaxOffCode)
    637 			c = offcode[offset];
    638 		else
    639 			c = bigoffcode[offset >> 7];
    640 		lzput(out, offtab[c].encode, offtab[c].bits);
    641 		if(offextra[c])
    642 			lzput(out, offset - offbase[c], offextra[c]);
    643 	}
    644 }
    645 
    646 /*
    647  * look for the longest, closest string which matches
    648  * the next prefix.  the clever part here is looking for
    649  * a string 1 longer than the previous best match.
    650  *
    651  * follows the recommendation of limiting number of chains
    652  * which are checked.  this appears to be the best heuristic.
    653  */
    654 static int
    655 lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m)
    656 {
    657 	uchar *s, *t;
    658 	int ml, off, last;
    659 
    660 	ml = check;
    661 	if(runlen >= 8)
    662 		check >>= 2;
    663 	*m = 0;
    664 	if(p + runlen >= es)
    665 		return runlen;
    666 	last = 0;
    667 	for(; check-- > 0; then = nexts[then & (MaxOff-1)]){
    668 		off = (ushort)(now - then);
    669 		if(off <= last || off > MaxOff)
    670 			break;
    671 		s = p + runlen;
    672 		t = hist + (((p - hist) - off) & (HistBlock-1));
    673 		t += runlen;
    674 		for(; s >= p; s--){
    675 			if(*s != *t)
    676 				goto matchloop;
    677 			t--;
    678 		}
    679 
    680 		/*
    681 		 * we have a new best match.
    682 		 * extend it to it's maximum length
    683 		 */
    684 		t += runlen + 2;
    685 		s += runlen + 2;
    686 		for(; s < es; s++){
    687 			if(*s != *t)
    688 				break;
    689 			t++;
    690 		}
    691 		runlen = s - p;
    692 		*m = off - 1;
    693 		if(s == es || runlen > ml)
    694 			break;
    695 matchloop:;
    696 		last = off;
    697 	}
    698 	return runlen;
    699 }
    700 
    701 static int
    702 lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish)
    703 {
    704 	ulong cont, excost, *litlencount, *offcount;
    705 	uchar *p, *q, *s, *es;
    706 	ushort *nexts, *hash;
    707 	int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer;
    708 
    709 	litlencount = lzb->litlencount;
    710 	offcount = lzb->offcount;
    711 	nexts = lz->nexts;
    712 	hash = lz->hash;
    713 	now = lz->now;
    714 
    715 	p = &lz->hist[lz->pos];
    716 	if(lz->prevlen != MinMatch - 1)
    717 		p++;
    718 
    719 	/*
    720 	 * hash in the links for any hanging link positions,
    721 	 * and calculate the hash for the current position.
    722 	 */
    723 	n = MinMatch;
    724 	if(n > ep - p)
    725 		n = ep - p;
    726 	cont = 0;
    727 	for(i = 0; i < n - 1; i++){
    728 		m = now - ((MinMatch-1) - i);
    729 		if(m < lz->dot)
    730 			continue;
    731 		s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1));
    732 
    733 		cont = (s[0] << 16) | (s[1] << 8) | s[2];
    734 		h = hashit(cont);
    735 		prevoff = 0;
    736 		for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){
    737 			v = (ushort)(now - then);
    738 			if(v <= prevoff || v >= (MinMatch-1) - i)
    739 				break;
    740 			prevoff = v;
    741 		}
    742 		if(then == (ushort)m)
    743 			continue;
    744 		nexts[m & (MaxOff-1)] = hash[h];
    745 		hash[h] = m;
    746 	}
    747 	for(i = 0; i < n; i++)
    748 		cont = (cont << 8) | p[i];
    749 
    750 	/*
    751 	 * now must point to the index in the nexts array
    752 	 * corresponding to p's position in the history
    753 	 */
    754 	prevlen = lz->prevlen;
    755 	prevoff = lz->prevoff;
    756 	maxdefer = lz->maxcheck >> 2;
    757 	excost = 0;
    758 	v = lzb->lastv;
    759 	for(;;){
    760 		es = p + MaxMatch;
    761 		if(es > ep){
    762 			if(!finish || p >= ep)
    763 				break;
    764 			es = ep;
    765 		}
    766 
    767 		h = hashit(cont);
    768 		runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m);
    769 
    770 		/*
    771 		 * back out of small matches too far in the past
    772 		 */
    773 		if(runlen == MinMatch && m >= MinMatchMaxOff){
    774 			runlen = MinMatch - 1;
    775 			m = 0;
    776 		}
    777 
    778 		/*
    779 		 * record the encoding and increment counts for huffman trees
    780 		 * if we get a match, defer selecting it until we check for
    781 		 * a longer match at the next position.
    782 		 */
    783 		if(prevlen >= runlen && prevlen != MinMatch - 1){
    784 			/*
    785 			 * old match at least as good; use that one
    786 			 */
    787 			n = prevlen - MinMatch;
    788 			if(v || n){
    789 				*parse++ = v | LenFlag | (n << LenShift);
    790 				*parse++ = prevoff;
    791 			}else
    792 				*parse++ = prevoff << LenShift;
    793 			v = 0;
    794 
    795 			n = lencode[n];
    796 			litlencount[n]++;
    797 			excost += litlenextra[n - LenStart];
    798 
    799 			if(prevoff < MaxOffCode)
    800 				n = offcode[prevoff];
    801 			else
    802 				n = bigoffcode[prevoff >> 7];
    803 			offcount[n]++;
    804 			excost += offextra[n];
    805 
    806 			runlen = prevlen - 1;
    807 			prevlen = MinMatch - 1;
    808 			nmatches++;
    809 		}else if(runlen == MinMatch - 1){
    810 			/*
    811 			 * no match; just put out the literal
    812 			 */
    813 			if(++v == MaxLitRun){
    814 				*parse++ = v;
    815 				v = 0;
    816 			}
    817 			litlencount[*p]++;
    818 			nlits++;
    819 			runlen = 1;
    820 		}else{
    821 			if(prevlen != MinMatch - 1){
    822 				/*
    823 				 * longer match now. output previous literal,
    824 				 * update current match, and try again
    825 				 */
    826 				if(++v == MaxLitRun){
    827 					*parse++ = v;
    828 					v = 0;
    829 				}
    830 				litlencount[p[-1]]++;
    831 				nlits++;
    832 			}
    833 
    834 			prevoff = m;
    835 
    836 			if(runlen < maxdefer){
    837 				prevlen = runlen;
    838 				runlen = 1;
    839 			}else{
    840 				n = runlen - MinMatch;
    841 				if(v || n){
    842 					*parse++ = v | LenFlag | (n << LenShift);
    843 					*parse++ = prevoff;
    844 				}else
    845 					*parse++ = prevoff << LenShift;
    846 				v = 0;
    847 
    848 				n = lencode[n];
    849 				litlencount[n]++;
    850 				excost += litlenextra[n - LenStart];
    851 
    852 				if(prevoff < MaxOffCode)
    853 					n = offcode[prevoff];
    854 				else
    855 					n = bigoffcode[prevoff >> 7];
    856 				offcount[n]++;
    857 				excost += offextra[n];
    858 
    859 				prevlen = MinMatch - 1;
    860 				nmatches++;
    861 			}
    862 		}
    863 
    864 		/*
    865 		 * update the hash for the newly matched data
    866 		 * this is constructed so the link for the old
    867 		 * match in this position must be at the end of a chain,
    868 		 * and will expire when this match is added, ie it will
    869 		 * never be examined by the match loop.
    870 		 * add to the hash chain only if we have the real hash data.
    871 		 */
    872 		for(q = p + runlen; p != q; p++){
    873 			if(p + MinMatch <= ep){
    874 				h = hashit(cont);
    875 				nexts[now & (MaxOff-1)] = hash[h];
    876 				hash[h] = now;
    877 				if(p + MinMatch < ep)
    878 					cont = (cont << 8) | p[MinMatch];
    879 			}
    880 			now++;
    881 		}
    882 	}
    883 
    884 	/*
    885 	 * we can just store away the lazy state and
    886 	 * pick it up next time.  the last block will have finish set
    887 	 * so we won't have any pending matches
    888 	 * however, we need to correct for how much we've encoded
    889 	 */
    890 	if(prevlen != MinMatch - 1)
    891 		p--;
    892 
    893 	lzb->excost += excost;
    894 	lzb->eparse = parse;
    895 	lzb->lastv = v;
    896 
    897 	lz->now = now;
    898 	lz->prevlen = prevlen;
    899 	lz->prevoff = prevoff;
    900 
    901 	return p - &lz->hist[lz->pos];
    902 }
    903 
    904 /*
    905  * make up the dynamic code tables, and return the number of bits
    906  * needed to transmit them.
    907  */
    908 static int
    909 huffcodes(Dyncode *dc, Huff *littab, Huff *offtab)
    910 {
    911 	Huff *codetab;
    912 	uchar *codes, *codeaux;
    913 	ulong codecount[Nclen], excost;
    914 	int i, n, m, v, c, nlit, noff, ncode, nclen;
    915 
    916 	codetab = dc->codetab;
    917 	codes = dc->codes;
    918 	codeaux = dc->codeaux;
    919 
    920 	/*
    921 	 * trim the sizes of the tables
    922 	 */
    923 	for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
    924 		;
    925 	for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
    926 		;
    927 
    928 	/*
    929 	 * make the code-length code
    930 	 */
    931 	for(i = 0; i < nlit; i++)
    932 		codes[i] = littab[i].bits;
    933 	for(i = 0; i < noff; i++)
    934 		codes[i + nlit] = offtab[i].bits;
    935 
    936 	/*
    937 	 * run-length compress the code-length code
    938 	 */
    939 	excost = 0;
    940 	c = 0;
    941 	ncode = nlit+noff;
    942 	for(i = 0; i < ncode; ){
    943 		n = i + 1;
    944 		v = codes[i];
    945 		while(n < ncode && v == codes[n])
    946 			n++;
    947 		n -= i;
    948 		i += n;
    949 		if(v == 0){
    950 			while(n >= 11){
    951 				m = n;
    952 				if(m > 138)
    953 					m = 138;
    954 				codes[c] = 18;
    955 				codeaux[c++] = m - 11;
    956 				n -= m;
    957 				excost += 7;
    958 			}
    959 			if(n >= 3){
    960 				codes[c] = 17;
    961 				codeaux[c++] = n - 3;
    962 				n = 0;
    963 				excost += 3;
    964 			}
    965 		}
    966 		while(n--){
    967 			codes[c++] = v;
    968 			while(n >= 3){
    969 				m = n;
    970 				if(m > 6)
    971 					m = 6;
    972 				codes[c] = 16;
    973 				codeaux[c++] = m - 3;
    974 				n -= m;
    975 				excost += 3;
    976 			}
    977 		}
    978 	}
    979 
    980 	memset(codecount, 0, sizeof codecount);
    981 	for(i = 0; i < c; i++)
    982 		codecount[codes[i]]++;
    983 	if(!mkgzprecode(codetab, codecount, Nclen, 8))
    984 		return -1;
    985 
    986 	for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
    987 		;
    988 
    989 	dc->nlit = nlit;
    990 	dc->noff = noff;
    991 	dc->nclen = nclen;
    992 	dc->ncode = c;
    993 
    994 	return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
    995 }
    996 
    997 static void
    998 wrdyncode(LZstate *out, Dyncode *dc)
    999 {
   1000 	Huff *codetab;
   1001 	uchar *codes, *codeaux;
   1002 	int i, v, c;
   1003 
   1004 	/*
   1005 	 * write out header, then code length code lengths,
   1006 	 * and code lengths
   1007 	 */
   1008 	lzput(out, dc->nlit-257, 5);
   1009 	lzput(out, dc->noff-1, 5);
   1010 	lzput(out, dc->nclen-4, 4);
   1011 
   1012 	codetab = dc->codetab;
   1013 	for(i = 0; i < dc->nclen; i++)
   1014 		lzput(out, codetab[clenorder[i]].bits, 3);
   1015 
   1016 	codes = dc->codes;
   1017 	codeaux = dc->codeaux;
   1018 	c = dc->ncode;
   1019 	for(i = 0; i < c; i++){
   1020 		v = codes[i];
   1021 		lzput(out, codetab[v].encode, codetab[v].bits);
   1022 		if(v >= 16){
   1023 			if(v == 16)
   1024 				lzput(out, codeaux[i], 2);
   1025 			else if(v == 17)
   1026 				lzput(out, codeaux[i], 3);
   1027 			else /* v == 18 */
   1028 				lzput(out, codeaux[i], 7);
   1029 		}
   1030 	}
   1031 }
   1032 
   1033 static int
   1034 bitcost(Huff *tab, ulong *count, int n)
   1035 {
   1036 	ulong tot;
   1037 	int i;
   1038 
   1039 	tot = 0;
   1040 	for(i = 0; i < n; i++)
   1041 		tot += count[i] * tab[i].bits;
   1042 	return tot;
   1043 }
   1044 
   1045 static int
   1046 mkgzprecode(Huff *tab, ulong *count, int n, int maxbits)
   1047 {
   1048 	ulong bitcount[MaxHuffBits];
   1049 	int i, nbits;
   1050 
   1051 	nbits = mkprecode(tab, count, n, maxbits, bitcount);
   1052 	for(i = 0; i < n; i++){
   1053 		if(tab[i].bits == -1)
   1054 			tab[i].bits = 0;
   1055 		else if(tab[i].bits == 0){
   1056 			if(nbits != 0 || bitcount[0] != 1)
   1057 				return 0;
   1058 			bitcount[1] = 1;
   1059 			bitcount[0] = 0;
   1060 			nbits = 1;
   1061 			tab[i].bits = 1;
   1062 		}
   1063 	}
   1064 	if(bitcount[0] != 0)
   1065 		return 0;
   1066 	return hufftabinit(tab, n, bitcount, nbits);
   1067 }
   1068 
   1069 static int
   1070 hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits)
   1071 {
   1072 	ulong code, nc[MaxHuffBits];
   1073 	int i, bits;
   1074 
   1075 	code = 0;
   1076 	for(bits = 1; bits <= nbits; bits++){
   1077 		code = (code + bitcount[bits-1]) << 1;
   1078 		nc[bits] = code;
   1079 	}
   1080 
   1081 	for(i = 0; i < n; i++){
   1082 		bits = tab[i].bits;
   1083 		if(bits){
   1084 			code = nc[bits]++ << (16 - bits);
   1085 			if(code & ~0xffff)
   1086 				return 0;
   1087 			tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8);
   1088 		}
   1089 	}
   1090 	return 1;
   1091 }
   1092 
   1093 
   1094 /*
   1095  * this should be in a library
   1096  */
   1097 struct Chain
   1098 {
   1099 	ulong	count;				/* occurances of everything in the chain */
   1100 	ushort	leaf;				/* leaves to the left of chain, or leaf value */
   1101 	char	col;				/* ref count for collecting unused chains */
   1102 	char	gen;				/* need to generate chains for next lower level */
   1103 	Chain	*up;				/* Chain up in the lists */
   1104 };
   1105 
   1106 struct Chains
   1107 {
   1108 	Chain	*lists[(MaxHuffBits - 1) * 2];
   1109 	ulong	leafcount[MaxLeaf];		/* sorted list of leaf counts */
   1110 	ushort	leafmap[MaxLeaf];		/* map to actual leaf number */
   1111 	int	nleaf;				/* number of leaves */
   1112 	Chain	chains[ChainMem];
   1113 	Chain	*echains;
   1114 	Chain	*free;
   1115 	char	col;
   1116 	int	nlists;
   1117 };
   1118 
   1119 /*
   1120  * fast, low space overhead algorithm for max depth huffman type codes
   1121  *
   1122  * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
   1123  * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
   1124  * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
   1125  * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
   1126  * pp 12-21, Springer Verlag, New York, 1995.
   1127  */
   1128 static int
   1129 mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount)
   1130 {
   1131 	Chains cs;
   1132 	Chain *c;
   1133 	int i, m, em, bits;
   1134 
   1135 	memset(&cs, 0, sizeof cs);
   1136 
   1137 	/*
   1138 	 * set up the sorted list of leaves
   1139 	 */
   1140 	m = 0;
   1141 	for(i = 0; i < n; i++){
   1142 		tab[i].bits = -1;
   1143 		tab[i].encode = 0;
   1144 		if(count[i] != 0){
   1145 			cs.leafcount[m] = count[i];
   1146 			cs.leafmap[m] = i;
   1147 			m++;
   1148 		}
   1149 	}
   1150 	if(m < 2){
   1151 		if(m != 0){
   1152 			tab[cs.leafmap[0]].bits = 0;
   1153 			bitcount[0] = 1;
   1154 		}else
   1155 			bitcount[0] = 0;
   1156 		return 0;
   1157 	}
   1158 	cs.nleaf = m;
   1159 	leafsort(cs.leafcount, cs.leafmap, 0, m);
   1160 
   1161 	for(i = 0; i < m; i++)
   1162 		cs.leafcount[i] = count[cs.leafmap[i]];
   1163 
   1164 	/*
   1165 	 * set up free list
   1166 	 */
   1167 	cs.free = &cs.chains[2];
   1168 	cs.echains = &cs.chains[ChainMem];
   1169 	cs.col = 1;
   1170 
   1171 	/*
   1172 	 * initialize chains for each list
   1173 	 */
   1174 	c = &cs.chains[0];
   1175 	c->count = cs.leafcount[0];
   1176 	c->leaf = 1;
   1177 	c->col = cs.col;
   1178 	c->up = nil;
   1179 	c->gen = 0;
   1180 	cs.chains[1] = cs.chains[0];
   1181 	cs.chains[1].leaf = 2;
   1182 	cs.chains[1].count = cs.leafcount[1];
   1183 	for(i = 0; i < maxbits-1; i++){
   1184 		cs.lists[i * 2] = &cs.chains[0];
   1185 		cs.lists[i * 2 + 1] = &cs.chains[1];
   1186 	}
   1187 
   1188 	cs.nlists = 2 * (maxbits - 1);
   1189 	m = 2 * m - 2;
   1190 	for(i = 2; i < m; i++)
   1191 		nextchain(&cs, cs.nlists - 2);
   1192 
   1193 	bits = 0;
   1194 	bitcount[0] = cs.nleaf;
   1195 	for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
   1196 		m = c->leaf;
   1197 		bitcount[bits++] -= m;
   1198 		bitcount[bits] = m;
   1199 	}
   1200 	m = 0;
   1201 	for(i = bits; i >= 0; i--)
   1202 		for(em = m + bitcount[i]; m < em; m++)
   1203 			tab[cs.leafmap[m]].bits = i;
   1204 
   1205 	return bits;
   1206 }
   1207 
   1208 /*
   1209  * calculate the next chain on the list
   1210  * we can always toss out the old chain
   1211  */
   1212 static void
   1213 nextchain(Chains *cs, int list)
   1214 {
   1215 	Chain *c, *oc;
   1216 	int i, nleaf, sumc;
   1217 
   1218 	oc = cs->lists[list + 1];
   1219 	cs->lists[list] = oc;
   1220 	if(oc == nil)
   1221 		return;
   1222 
   1223 	/*
   1224 	 * make sure we have all chains needed to make sumc
   1225 	 * note it is possible to generate only one of these,
   1226 	 * use twice that value for sumc, and then generate
   1227 	 * the second if that preliminary sumc would be chosen.
   1228 	 * however, this appears to be slower on current tests
   1229 	 */
   1230 	if(oc->gen){
   1231 		nextchain(cs, list - 2);
   1232 		nextchain(cs, list - 2);
   1233 		oc->gen = 0;
   1234 	}
   1235 
   1236 	/*
   1237 	 * pick up the chain we're going to add;
   1238 	 * collect unused chains no free ones are left
   1239 	 */
   1240 	for(c = cs->free; ; c++){
   1241 		if(c >= cs->echains){
   1242 			cs->col++;
   1243 			for(i = 0; i < cs->nlists; i++)
   1244 				for(c = cs->lists[i]; c != nil; c = c->up)
   1245 					c->col = cs->col;
   1246 			c = cs->chains;
   1247 		}
   1248 		if(c->col != cs->col)
   1249 			break;
   1250 	}
   1251 
   1252 	/*
   1253 	 * pick the cheapest of
   1254 	 * 1) the next package from the previous list
   1255 	 * 2) the next leaf
   1256 	 */
   1257 	nleaf = oc->leaf;
   1258 	sumc = 0;
   1259 	if(list > 0 && cs->lists[list-1] != nil)
   1260 		sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
   1261 	if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){
   1262 		c->count = sumc;
   1263 		c->leaf = oc->leaf;
   1264 		c->up = cs->lists[list-1];
   1265 		c->gen = 1;
   1266 	}else if(nleaf >= cs->nleaf){
   1267 		cs->lists[list + 1] = nil;
   1268 		return;
   1269 	}else{
   1270 		c->leaf = nleaf + 1;
   1271 		c->count = cs->leafcount[nleaf];
   1272 		c->up = oc->up;
   1273 		c->gen = 0;
   1274 	}
   1275 	cs->free = c + 1;
   1276 
   1277 	cs->lists[list + 1] = c;
   1278 	c->col = cs->col;
   1279 }
   1280 
   1281 static int
   1282 pivot(ulong *c, int a, int n)
   1283 {
   1284 	int j, pi, pj, pk;
   1285 
   1286 	j = n/6;
   1287 	pi = a + j;	/* 1/6 */
   1288 	j += j;
   1289 	pj = pi + j;	/* 1/2 */
   1290 	pk = pj + j;	/* 5/6 */
   1291 	if(c[pi] < c[pj]){
   1292 		if(c[pi] < c[pk]){
   1293 			if(c[pj] < c[pk])
   1294 				return pj;
   1295 			return pk;
   1296 		}
   1297 		return pi;
   1298 	}
   1299 	if(c[pj] < c[pk]){
   1300 		if(c[pi] < c[pk])
   1301 			return pi;
   1302 		return pk;
   1303 	}
   1304 	return pj;
   1305 }
   1306 
   1307 static	void
   1308 leafsort(ulong *leafcount, ushort *leafmap, int a, int n)
   1309 {
   1310 	ulong t;
   1311 	int j, pi, pj, pn;
   1312 
   1313 	while(n > 1){
   1314 		if(n > 10){
   1315 			pi = pivot(leafcount, a, n);
   1316 		}else
   1317 			pi = a + (n>>1);
   1318 
   1319 		t = leafcount[pi];
   1320 		leafcount[pi] = leafcount[a];
   1321 		leafcount[a] = t;
   1322 		t = leafmap[pi];
   1323 		leafmap[pi] = leafmap[a];
   1324 		leafmap[a] = t;
   1325 		pi = a;
   1326 		pn = a + n;
   1327 		pj = pn;
   1328 		for(;;){
   1329 			do
   1330 				pi++;
   1331 			while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
   1332 			do
   1333 				pj--;
   1334 			while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
   1335 			if(pj < pi)
   1336 				break;
   1337 			t = leafcount[pi];
   1338 			leafcount[pi] = leafcount[pj];
   1339 			leafcount[pj] = t;
   1340 			t = leafmap[pi];
   1341 			leafmap[pi] = leafmap[pj];
   1342 			leafmap[pj] = t;
   1343 		}
   1344 		t = leafcount[a];
   1345 		leafcount[a] = leafcount[pj];
   1346 		leafcount[pj] = t;
   1347 		t = leafmap[a];
   1348 		leafmap[a] = leafmap[pj];
   1349 		leafmap[pj] = t;
   1350 		j = pj - a;
   1351 
   1352 		n = n-j-1;
   1353 		if(j >= n){
   1354 			leafsort(leafcount, leafmap, a, j);
   1355 			a += j+1;
   1356 		}else{
   1357 			leafsort(leafcount, leafmap, a + (j+1), n);
   1358 			n = j;
   1359 		}
   1360 	}
   1361 }