whack.c (6419B)
1 #include "stdinc.h" 2 #include "whack.h" 3 4 typedef struct Huff Huff; 5 int compressblocks = 1; 6 7 enum 8 { 9 MaxFastLen = 9, 10 BigLenCode = 0x1f4, /* minimum code for large lenth encoding */ 11 BigLenBits = 9, 12 BigLenBase = 4, /* starting items to encode for big lens */ 13 14 MinOffBits = 6, 15 MaxOffBits = MinOffBits + 8, 16 17 MaxLen = 2051 /* max. length encodable in 24 bits */ 18 }; 19 20 enum 21 { 22 StatBytes, 23 StatOutBytes, 24 StatLits, 25 StatMatches, 26 StatLitBits, 27 StatOffBits, 28 StatLenBits, 29 30 MaxStat 31 }; 32 33 struct Huff 34 { 35 short bits; /* length of the code */ 36 ulong encode; /* the code */ 37 }; 38 39 static Huff lentab[MaxFastLen] = 40 { 41 {2, 0x2}, /* 10 */ 42 {3, 0x6}, /* 110 */ 43 {5, 0x1c}, /* 11100 */ 44 {5, 0x1d}, /* 11101 */ 45 {6, 0x3c}, /* 111100 */ 46 {7, 0x7a}, /* 1111010 */ 47 {7, 0x7b}, /* 1111011 */ 48 {8, 0xf8}, /* 11111000 */ 49 {8, 0xf9}, /* 11111001 */ 50 }; 51 52 static int thwmaxcheck; 53 54 void 55 whackinit(Whack *tw, int level) 56 { 57 thwmaxcheck = (1 << level); 58 thwmaxcheck -= thwmaxcheck >> 2; 59 if(thwmaxcheck < 2) 60 thwmaxcheck = 2; 61 else if(thwmaxcheck > 1024) 62 thwmaxcheck = 1024; 63 memset(tw, 0, sizeof *tw); 64 tw->begin = 2 * WhackMaxOff; 65 } 66 67 /* 68 * find a string in the dictionary 69 */ 70 static int 71 whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now) 72 { 73 ushort then, off, last; 74 int bestoff, bestlen, check; 75 uchar *s, *t; 76 77 s = *ss; 78 if(esrc < s + MinMatch) 79 return -1; 80 if(s + MaxLen < esrc) 81 esrc = s + MaxLen; 82 83 bestoff = 0; 84 bestlen = 0; 85 check = thwmaxcheck; 86 last = 0; 87 for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){ 88 off = now - then; 89 if(off <= last || off > WhackMaxOff) 90 break; 91 92 /* 93 * don't need to check for the end because 94 * 1) s too close check above 95 */ 96 t = s - off; 97 if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){ 98 if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){ 99 t += 3; 100 for(s += 3; s < esrc; s++){ 101 if(*s != *t) 102 break; 103 t++; 104 } 105 if(s - *ss > bestlen){ 106 bestlen = s - *ss; 107 bestoff = off; 108 if(bestlen > thwmaxcheck) 109 break; 110 } 111 } 112 } 113 s = *ss; 114 last = off; 115 } 116 *ss += bestlen; 117 return bestoff; 118 } 119 120 /* 121 * knuth vol. 3 multiplicative hashing 122 * each byte x chosen according to rules 123 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 124 * with reasonable spread between the bytes & their complements 125 * 126 * the 3 byte value appears to be as almost good as the 4 byte value, 127 * and might be faster on some machines 128 */ 129 /* 130 #define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask) 131 */ 132 #define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask) 133 134 /* 135 * lz77 compression with single lookup in a hash table for each block 136 */ 137 int 138 whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats]) 139 { 140 uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax; 141 ulong cont, code, wbits; 142 ushort now; 143 int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits; 144 145 if(!compressblocks || n < MinMatch) 146 return -1; 147 148 wdst = dst; 149 wdmax = dst + n; 150 151 now = w->begin; 152 s = src; 153 w->data = s; 154 155 cont = (s[0] << 16) | (s[1] << 8) | s[2]; 156 157 esrc = s + n; 158 half = s + (n >> 1); 159 wnbits = 0; 160 wbits = 0; 161 lits = 0; 162 matches = 0; 163 offbits = 0; 164 lenbits = 0; 165 lithist = ~0; 166 while(s < esrc){ 167 h = hashit(cont); 168 169 sss = s; 170 toff = whackmatch(w, &sss, esrc, h, now); 171 ss = sss; 172 173 len = ss - s; 174 for(; wnbits >= 8; wnbits -= 8){ 175 if(wdst >= wdmax){ 176 w->begin = now; 177 return -1; 178 } 179 *wdst++ = wbits >> (wnbits - 8); 180 } 181 if(len < MinMatch){ 182 toff = *s; 183 lithist = (lithist << 1) | toff < 32 | toff > 127; 184 if(lithist & 0x1e){ 185 wbits = (wbits << 9) | toff; 186 wnbits += 9; 187 }else if(lithist & 1){ 188 toff = (toff + 64) & 0xff; 189 if(toff < 96){ 190 wbits = (wbits << 10) | toff; 191 wnbits += 10; 192 }else{ 193 wbits = (wbits << 11) | toff; 194 wnbits += 11; 195 } 196 }else{ 197 wbits = (wbits << 8) | toff; 198 wnbits += 8; 199 } 200 lits++; 201 202 /* 203 * speed hack 204 * check for compression progress, bail if none achieved 205 */ 206 if(s > half){ 207 if(4 * (s - src) < 5 * lits){ 208 w->begin = now; 209 return -1; 210 } 211 half = esrc; 212 } 213 214 if(s + MinMatch <= esrc){ 215 w->next[now & (WhackMaxOff - 1)] = w->hash[h]; 216 w->hash[h] = now; 217 if(s + MinMatch < esrc) 218 cont = (cont << 8) | s[MinMatch]; 219 } 220 now++; 221 s++; 222 continue; 223 } 224 225 matches++; 226 227 /* 228 * length of match 229 */ 230 if(len > MaxLen){ 231 len = MaxLen; 232 ss = s + len; 233 } 234 len -= MinMatch; 235 if(len < MaxFastLen){ 236 bits = lentab[len].bits; 237 wbits = (wbits << bits) | lentab[len].encode; 238 wnbits += bits; 239 lenbits += bits; 240 }else{ 241 code = BigLenCode; 242 bits = BigLenBits; 243 use = BigLenBase; 244 len -= MaxFastLen; 245 while(len >= use){ 246 len -= use; 247 code = (code + use) << 1; 248 use <<= (bits & 1) ^ 1; 249 bits++; 250 } 251 252 wbits = (wbits << bits) | (code + len); 253 wnbits += bits; 254 lenbits += bits; 255 256 for(; wnbits >= 8; wnbits -= 8){ 257 if(wdst >= wdmax){ 258 w->begin = now; 259 return -1; 260 } 261 *wdst++ = wbits >> (wnbits - 8); 262 } 263 } 264 265 /* 266 * offset in history 267 */ 268 toff--; 269 for(bits = MinOffBits; toff >= (1 << bits); bits++) 270 ; 271 if(bits < MaxOffBits-1){ 272 wbits = (wbits << 3) | (bits - MinOffBits); 273 if(bits != MinOffBits) 274 bits--; 275 wnbits += bits + 3; 276 offbits += bits + 3; 277 }else{ 278 wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1)); 279 bits--; 280 wnbits += bits + 4; 281 offbits += bits + 4; 282 } 283 wbits = (wbits << bits) | toff & ((1 << bits) - 1); 284 285 for(; s != ss; s++){ 286 if(s + MinMatch <= esrc){ 287 h = hashit(cont); 288 w->next[now & (WhackMaxOff - 1)] = w->hash[h]; 289 w->hash[h] = now; 290 if(s + MinMatch < esrc) 291 cont = (cont << 8) | s[MinMatch]; 292 } 293 now++; 294 } 295 } 296 297 w->begin = now; 298 299 stats[StatBytes] += esrc - src; 300 stats[StatLits] += lits; 301 stats[StatMatches] += matches; 302 stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits; 303 stats[StatOffBits] += offbits; 304 stats[StatLenBits] += lenbits; 305 306 if(wnbits & 7){ 307 wbits <<= 8 - (wnbits & 7); 308 wnbits += 8 - (wnbits & 7); 309 } 310 for(; wnbits >= 8; wnbits -= 8){ 311 if(wdst >= wdmax) 312 return -1; 313 *wdst++ = wbits >> (wnbits - 8); 314 } 315 316 stats[StatOutBytes] += wdst - dst; 317 318 return wdst - dst; 319 } 320 321 int 322 whackblock(uchar *dst, uchar *src, int ssize) 323 { 324 Whack w; 325 ulong stats[MaxStat]; 326 int r; 327 328 whackinit(&w, 6); 329 r = whack(&w, dst, src, ssize, stats); 330 return r; 331 }