plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

whack.c (6419B)


      1 #include "stdinc.h"
      2 #include "whack.h"
      3 
      4 typedef struct Huff	Huff;
      5 int compressblocks = 1;
      6 
      7 enum
      8 {
      9 	MaxFastLen	= 9,
     10 	BigLenCode	= 0x1f4,	/* minimum code for large lenth encoding */
     11 	BigLenBits	= 9,
     12 	BigLenBase	= 4,		/* starting items to encode for big lens */
     13 
     14 	MinOffBits	= 6,
     15 	MaxOffBits	= MinOffBits + 8,
     16 
     17 	MaxLen		= 2051		/* max. length encodable in 24 bits */
     18 };
     19 
     20 enum
     21 {
     22 	StatBytes,
     23 	StatOutBytes,
     24 	StatLits,
     25 	StatMatches,
     26 	StatLitBits,
     27 	StatOffBits,
     28 	StatLenBits,
     29 
     30 	MaxStat
     31 };
     32 
     33 struct Huff
     34 {
     35 	short	bits;				/* length of the code */
     36 	ulong	encode;				/* the code */
     37 };
     38 
     39 static	Huff	lentab[MaxFastLen] =
     40 {
     41 	{2,	0x2},		/* 10 */
     42 	{3,	0x6},		/* 110 */
     43 	{5,	0x1c},		/* 11100 */
     44 	{5,	0x1d},		/* 11101 */
     45 	{6,	0x3c},		/* 111100 */
     46 	{7,	0x7a},		/* 1111010 */
     47 	{7,	0x7b},		/* 1111011 */
     48 	{8,	0xf8},		/* 11111000 */
     49 	{8,	0xf9},		/* 11111001 */
     50 };
     51 
     52 static int	thwmaxcheck;
     53 
     54 void
     55 whackinit(Whack *tw, int level)
     56 {
     57 	thwmaxcheck = (1 << level);
     58 	thwmaxcheck -= thwmaxcheck >> 2;
     59 	if(thwmaxcheck < 2)
     60 		thwmaxcheck = 2;
     61 	else if(thwmaxcheck > 1024)
     62 		thwmaxcheck = 1024;
     63 	memset(tw, 0, sizeof *tw);
     64 	tw->begin = 2 * WhackMaxOff;
     65 }
     66 
     67 /*
     68  * find a string in the dictionary
     69  */
     70 static int
     71 whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
     72 {
     73 	ushort then, off, last;
     74 	int bestoff, bestlen, check;
     75 	uchar *s, *t;
     76 
     77 	s = *ss;
     78 	if(esrc < s + MinMatch)
     79 		return -1;
     80 	if(s + MaxLen < esrc)
     81 		esrc = s + MaxLen;
     82 
     83 	bestoff = 0;
     84 	bestlen = 0;
     85 	check = thwmaxcheck;
     86 	last = 0;
     87 	for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
     88 		off = now - then;
     89 		if(off <= last || off > WhackMaxOff)
     90 			break;
     91 
     92 		/*
     93 		 * don't need to check for the end because
     94 		 * 1) s too close check above
     95 		 */
     96 		t = s - off;
     97 		if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
     98 			if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
     99 				t += 3;
    100 				for(s += 3; s < esrc; s++){
    101 					if(*s != *t)
    102 						break;
    103 					t++;
    104 				}
    105 				if(s - *ss > bestlen){
    106 					bestlen = s - *ss;
    107 					bestoff = off;
    108 					if(bestlen > thwmaxcheck)
    109 						break;
    110 				}
    111 			}
    112 		}
    113 		s = *ss;
    114 		last = off;
    115 	}
    116 	*ss += bestlen;
    117 	return bestoff;
    118 }
    119 
    120 /*
    121  * knuth vol. 3 multiplicative hashing
    122  * each byte x chosen according to rules
    123  * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
    124  * with reasonable spread between the bytes & their complements
    125  *
    126  * the 3 byte value appears to be as almost good as the 4 byte value,
    127  * and might be faster on some machines
    128  */
    129 /*
    130 #define hashit(c)	((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
    131 */
    132 #define hashit(c)	(((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
    133 
    134 /*
    135  * lz77 compression with single lookup in a hash table for each block
    136  */
    137 int
    138 whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
    139 {
    140 	uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
    141 	ulong cont, code, wbits;
    142 	ushort now;
    143 	int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
    144 
    145 	if(!compressblocks || n < MinMatch)
    146 		return -1;
    147 
    148 	wdst = dst;
    149 	wdmax = dst + n;
    150 
    151 	now = w->begin;
    152 	s = src;
    153 	w->data = s;
    154 
    155 	cont = (s[0] << 16) | (s[1] << 8) | s[2];
    156 
    157 	esrc = s + n;
    158 	half = s + (n >> 1);
    159 	wnbits = 0;
    160 	wbits = 0;
    161 	lits = 0;
    162 	matches = 0;
    163 	offbits = 0;
    164 	lenbits = 0;
    165 	lithist = ~0;
    166 	while(s < esrc){
    167 		h = hashit(cont);
    168 
    169 		sss = s;
    170 		toff = whackmatch(w, &sss, esrc, h, now);
    171 		ss = sss;
    172 
    173 		len = ss - s;
    174 		for(; wnbits >= 8; wnbits -= 8){
    175 			if(wdst >= wdmax){
    176 				w->begin = now;
    177 				return -1;
    178 			}
    179 			*wdst++ = wbits >> (wnbits - 8);
    180 		}
    181 		if(len < MinMatch){
    182 			toff = *s;
    183 			lithist = (lithist << 1) | toff < 32 | toff > 127;
    184 			if(lithist & 0x1e){
    185 				wbits = (wbits << 9) | toff;
    186 				wnbits += 9;
    187 			}else if(lithist & 1){
    188 				toff = (toff + 64) & 0xff;
    189 				if(toff < 96){
    190 					wbits = (wbits << 10) | toff;
    191 					wnbits += 10;
    192 				}else{
    193 					wbits = (wbits << 11) | toff;
    194 					wnbits += 11;
    195 				}
    196 			}else{
    197 				wbits = (wbits << 8) | toff;
    198 				wnbits += 8;
    199 			}
    200 			lits++;
    201 
    202 			/*
    203 			 * speed hack
    204 			 * check for compression progress, bail if none achieved
    205 			 */
    206 			if(s > half){
    207 				if(4 * (s - src) < 5 * lits){
    208 					w->begin = now;
    209 					return -1;
    210 				}
    211 				half = esrc;
    212 			}
    213 
    214 			if(s + MinMatch <= esrc){
    215 				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
    216 				w->hash[h] = now;
    217 				if(s + MinMatch < esrc)
    218 					cont = (cont << 8) | s[MinMatch];
    219 			}
    220 			now++;
    221 			s++;
    222 			continue;
    223 		}
    224 
    225 		matches++;
    226 
    227 		/*
    228 		 * length of match
    229 		 */
    230 		if(len > MaxLen){
    231 			len = MaxLen;
    232 			ss = s + len;
    233 		}
    234 		len -= MinMatch;
    235 		if(len < MaxFastLen){
    236 			bits = lentab[len].bits;
    237 			wbits = (wbits << bits) | lentab[len].encode;
    238 			wnbits += bits;
    239 			lenbits += bits;
    240 		}else{
    241 			code = BigLenCode;
    242 			bits = BigLenBits;
    243 			use = BigLenBase;
    244 			len -= MaxFastLen;
    245 			while(len >= use){
    246 				len -= use;
    247 				code = (code + use) << 1;
    248 				use <<= (bits & 1) ^ 1;
    249 				bits++;
    250 			}
    251 
    252 			wbits = (wbits << bits) | (code + len);
    253 			wnbits += bits;
    254 			lenbits += bits;
    255 
    256 			for(; wnbits >= 8; wnbits -= 8){
    257 				if(wdst >= wdmax){
    258 					w->begin = now;
    259 					return -1;
    260 				}
    261 				*wdst++ = wbits >> (wnbits - 8);
    262 			}
    263 		}
    264 
    265 		/*
    266 		 * offset in history
    267 		 */
    268 		toff--;
    269 		for(bits = MinOffBits; toff >= (1 << bits); bits++)
    270 			;
    271 		if(bits < MaxOffBits-1){
    272 			wbits = (wbits << 3) | (bits - MinOffBits);
    273 			if(bits != MinOffBits)
    274 				bits--;
    275 			wnbits += bits + 3;
    276 			offbits += bits + 3;
    277 		}else{
    278 			wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
    279 			bits--;
    280 			wnbits += bits + 4;
    281 			offbits += bits + 4;
    282 		}
    283 		wbits = (wbits << bits) | toff & ((1 << bits) - 1);
    284 
    285 		for(; s != ss; s++){
    286 			if(s + MinMatch <= esrc){
    287 				h = hashit(cont);
    288 				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
    289 				w->hash[h] = now;
    290 				if(s + MinMatch < esrc)
    291 					cont = (cont << 8) | s[MinMatch];
    292 			}
    293 			now++;
    294 		}
    295 	}
    296 
    297 	w->begin = now;
    298 
    299 	stats[StatBytes] += esrc - src;
    300 	stats[StatLits] += lits;
    301 	stats[StatMatches] += matches;
    302 	stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
    303 	stats[StatOffBits] += offbits;
    304 	stats[StatLenBits] += lenbits;
    305 
    306 	if(wnbits & 7){
    307 		wbits <<= 8 - (wnbits & 7);
    308 		wnbits += 8 - (wnbits & 7);
    309 	}
    310 	for(; wnbits >= 8; wnbits -= 8){
    311 		if(wdst >= wdmax)
    312 			return -1;
    313 		*wdst++ = wbits >> (wnbits - 8);
    314 	}
    315 
    316 	stats[StatOutBytes] += wdst - dst;
    317 
    318 	return wdst - dst;
    319 }
    320 
    321 int
    322 whackblock(uchar *dst, uchar *src, int ssize)
    323 {
    324 	Whack w;
    325 	ulong stats[MaxStat];
    326 	int r;
    327 
    328 	whackinit(&w, 6);
    329 	r = whack(&w, dst, src, ssize, stats);
    330 	return r;
    331 }