deflate.c (30519B)
1 #include <u.h> 2 #include <libc.h> 3 #include <flate.h> 4 5 typedef struct Chain Chain; 6 typedef struct Chains Chains; 7 typedef struct Dyncode Dyncode; 8 typedef struct Huff Huff; 9 typedef struct LZblock LZblock; 10 typedef struct LZstate LZstate; 11 12 enum 13 { 14 /* 15 * deflate format paramaters 16 */ 17 DeflateUnc = 0, /* uncompressed block */ 18 DeflateFix = 1, /* fixed huffman codes */ 19 DeflateDyn = 2, /* dynamic huffman codes */ 20 21 DeflateEob = 256, /* end of block code in lit/len book */ 22 DeflateMaxBlock = 64*1024-1, /* maximum size of uncompressed block */ 23 24 DeflateMaxExp = 10, /* maximum expansion for a block */ 25 26 LenStart = 257, /* start of length codes in litlen */ 27 Nlitlen = 288, /* number of litlen codes */ 28 Noff = 30, /* number of offset codes */ 29 Nclen = 19, /* number of codelen codes */ 30 31 MaxOff = 32*1024, 32 MinMatch = 3, /* shortest match possible */ 33 MaxMatch = 258, /* longest match possible */ 34 35 /* 36 * huffman code paramaters 37 */ 38 MaxLeaf = Nlitlen, 39 MaxHuffBits = 16, /* max bits in a huffman code */ 40 ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits, 41 42 /* 43 * coding of the lz parse 44 */ 45 LenFlag = 1 << 3, 46 LenShift = 4, /* leaves enough space for MinMatchMaxOff */ 47 MaxLitRun = LenFlag - 1, 48 49 /* 50 * internal lz paramaters 51 */ 52 DeflateOut = 4096, /* output buffer size */ 53 BlockSize = 8192, /* attempted input read quanta */ 54 DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1), 55 MinMatchMaxOff = 4096, /* max profitable offset for small match; 56 * assumes 8 bits for len, 5+10 for offset 57 * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS 58 */ 59 HistSlop = 512, /* must be at lead MaxMatch */ 60 HistBlock = 64*1024, 61 HistSize = HistBlock + HistSlop, 62 63 HashLog = 13, 64 HashSize = 1<<HashLog, 65 66 MaxOffCode = 256, /* biggest offset looked up in direct table */ 67 68 EstLitBits = 8, 69 EstLenBits = 4, 70 EstOffBits = 5 71 }; 72 73 /* 74 * knuth vol. 3 multiplicative hashing 75 * each byte x chosen according to rules 76 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 77 * with reasonable spread between the bytes & their complements 78 * 79 * the 3 byte value appears to be as almost good as the 4 byte value, 80 * and might be faster on some machines 81 */ 82 /* 83 #define hashit(c) ((u32int)((c) * 0x6b43a9) >> (24 - HashLog)) 84 */ 85 #define hashit(c) ((u32int)(((c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) 86 87 /* 88 * lempel-ziv style compression state 89 */ 90 struct LZstate 91 { 92 uchar hist[HistSize]; 93 ulong pos; /* current location in history buffer */ 94 ulong avail; /* data available after pos */ 95 int eof; 96 ushort hash[HashSize]; /* hash chains */ 97 ushort nexts[MaxOff]; 98 int now; /* pos in hash chains */ 99 int dot; /* dawn of time in history */ 100 int prevlen; /* lazy matching state */ 101 int prevoff; 102 int maxcheck; /* compressor tuning */ 103 104 uchar obuf[DeflateOut]; 105 uchar *out; /* current position in the output buffer */ 106 uchar *eout; 107 ulong bits; /* bit shift register */ 108 int nbits; 109 int rbad; /* got an error reading the buffer */ 110 int wbad; /* got an error writing the buffer */ 111 int (*w)(void*, void*, int); 112 void *wr; 113 114 ulong totr; /* total input size */ 115 ulong totw; /* total output size */ 116 int debug; 117 }; 118 119 struct LZblock 120 { 121 ushort parse[DeflateMaxBlock / 2 + 1]; 122 int lastv; /* value being constucted for parse */ 123 ulong litlencount[Nlitlen]; 124 ulong offcount[Noff]; 125 ushort *eparse; /* limit for parse table */ 126 int bytes; /* consumed from the input */ 127 int excost; /* cost of encoding extra len & off bits */ 128 }; 129 130 /* 131 * huffman code table 132 */ 133 struct Huff 134 { 135 short bits; /* length of the code */ 136 ushort encode; /* the code */ 137 }; 138 139 /* 140 * encoding of dynamic huffman trees 141 */ 142 struct Dyncode 143 { 144 int nlit; 145 int noff; 146 int nclen; 147 int ncode; 148 Huff codetab[Nclen]; 149 uchar codes[Nlitlen+Noff]; 150 uchar codeaux[Nlitlen+Noff]; 151 }; 152 153 static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)); 154 static int lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish); 155 static void wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*); 156 static int bitcost(Huff*, ulong*, int); 157 static int huffcodes(Dyncode*, Huff*, Huff*); 158 static void wrdyncode(LZstate*, Dyncode*); 159 static void lzput(LZstate*, ulong bits, int nbits); 160 static void lzflushbits(LZstate*); 161 static void lzflush(LZstate *lz); 162 static void lzwrite(LZstate *lz, void *buf, int n); 163 164 static int hufftabinit(Huff*, int, ulong*, int); 165 static int mkgzprecode(Huff*, ulong *, int, int); 166 167 static int mkprecode(Huff*, ulong *, int, int, ulong*); 168 static void nextchain(Chains*, int); 169 static void leafsort(ulong*, ushort*, int, int); 170 171 /* conversion from len to code word */ 172 static int lencode[MaxMatch]; 173 174 /* 175 * conversion from off to code word 176 * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7] 177 */ 178 static int offcode[MaxOffCode]; 179 static int bigoffcode[256]; 180 181 /* litlen code words LenStart-285 extra bits */ 182 static int litlenbase[Nlitlen-LenStart]; 183 static int litlenextra[Nlitlen-LenStart] = 184 { 185 /* 257 */ 0, 0, 0, 186 /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 187 /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 188 /* 280 */ 4, 5, 5, 5, 5, 0, 0, 0 189 }; 190 191 /* offset code word extra bits */ 192 static int offbase[Noff]; 193 static int offextra[] = 194 { 195 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 196 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 197 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 198 0, 0, 199 }; 200 201 /* order code lengths */ 202 static int clenorder[Nclen] = 203 { 204 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 205 }; 206 207 /* static huffman tables */ 208 static Huff litlentab[Nlitlen]; 209 static Huff offtab[Noff]; 210 static Huff hofftab[Noff]; 211 212 /* bit reversal for brain dead endian swap in huffman codes */ 213 static uchar revtab[256]; 214 static ulong nlits; 215 static ulong nmatches; 216 217 int 218 deflateinit(void) 219 { 220 ulong bitcount[MaxHuffBits]; 221 int i, j, ci, n; 222 223 /* byte reverse table */ 224 for(i=0; i<256; i++) 225 for(j=0; j<8; j++) 226 if(i & (1<<j)) 227 revtab[i] |= 0x80 >> j; 228 229 /* static Litlen bit lengths */ 230 for(i=0; i<144; i++) 231 litlentab[i].bits = 8; 232 for(i=144; i<256; i++) 233 litlentab[i].bits = 9; 234 for(i=256; i<280; i++) 235 litlentab[i].bits = 7; 236 for(i=280; i<Nlitlen; i++) 237 litlentab[i].bits = 8; 238 239 memset(bitcount, 0, sizeof(bitcount)); 240 bitcount[8] += 144 - 0; 241 bitcount[9] += 256 - 144; 242 bitcount[7] += 280 - 256; 243 bitcount[8] += Nlitlen - 280; 244 245 if(!hufftabinit(litlentab, Nlitlen, bitcount, 9)) 246 return FlateInternal; 247 248 /* static offset bit lengths */ 249 for(i = 0; i < Noff; i++) 250 offtab[i].bits = 5; 251 252 memset(bitcount, 0, sizeof(bitcount)); 253 bitcount[5] = Noff; 254 255 if(!hufftabinit(offtab, Noff, bitcount, 5)) 256 return FlateInternal; 257 258 bitcount[0] = 0; 259 bitcount[1] = 0; 260 if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits)) 261 return FlateInternal; 262 263 /* conversion tables for lens & offs to codes */ 264 ci = 0; 265 for(i = LenStart; i < 286; i++){ 266 n = ci + (1 << litlenextra[i - LenStart]); 267 litlenbase[i - LenStart] = ci; 268 for(; ci < n; ci++) 269 lencode[ci] = i; 270 } 271 /* patch up special case for len MaxMatch */ 272 lencode[MaxMatch-MinMatch] = 285; 273 litlenbase[285-LenStart] = MaxMatch-MinMatch; 274 275 ci = 0; 276 for(i = 0; i < 16; i++){ 277 n = ci + (1 << offextra[i]); 278 offbase[i] = ci; 279 for(; ci < n; ci++) 280 offcode[ci] = i; 281 } 282 283 ci = ci >> 7; 284 for(; i < 30; i++){ 285 n = ci + (1 << (offextra[i] - 7)); 286 offbase[i] = ci << 7; 287 for(; ci < n; ci++) 288 bigoffcode[ci] = i; 289 } 290 return FlateOk; 291 } 292 293 static void 294 deflatereset(LZstate *lz, int level, int debug) 295 { 296 memset(lz->nexts, 0, sizeof lz->nexts); 297 memset(lz->hash, 0, sizeof lz->hash); 298 lz->totr = 0; 299 lz->totw = 0; 300 lz->pos = 0; 301 lz->avail = 0; 302 lz->out = lz->obuf; 303 lz->eout = &lz->obuf[DeflateOut]; 304 lz->prevlen = MinMatch - 1; 305 lz->prevoff = 0; 306 lz->now = MaxOff + 1; 307 lz->dot = lz->now; 308 lz->bits = 0; 309 lz->nbits = 0; 310 lz->maxcheck = (1 << level); 311 lz->maxcheck -= lz->maxcheck >> 2; 312 if(lz->maxcheck < 2) 313 lz->maxcheck = 2; 314 else if(lz->maxcheck > 1024) 315 lz->maxcheck = 1024; 316 317 lz->debug = debug; 318 } 319 320 int 321 deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug) 322 { 323 LZstate *lz; 324 LZblock *lzb; 325 int ok; 326 327 lz = malloc(sizeof *lz + sizeof *lzb); 328 if(lz == nil) 329 return FlateNoMem; 330 lzb = (LZblock*)&lz[1]; 331 332 deflatereset(lz, level, debug); 333 lz->w = w; 334 lz->wr = wr; 335 lz->wbad = 0; 336 lz->rbad = 0; 337 lz->eof = 0; 338 ok = FlateOk; 339 while(!lz->eof || lz->avail){ 340 ok = deflateb(lz, lzb, rr, r); 341 if(ok != FlateOk) 342 break; 343 } 344 if(ok == FlateOk && lz->rbad) 345 ok = FlateInputFail; 346 if(ok == FlateOk && lz->wbad) 347 ok = FlateOutputFail; 348 free(lz); 349 return ok; 350 } 351 352 static int 353 deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)) 354 { 355 Dyncode dyncode, hdyncode; 356 Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen]; 357 ulong litcount[Nlitlen]; 358 long nunc, ndyn, nfix, nhuff; 359 uchar *slop, *hslop; 360 ulong ep; 361 int i, n, m, mm, nslop; 362 363 memset(lzb->litlencount, 0, sizeof lzb->litlencount); 364 memset(lzb->offcount, 0, sizeof lzb->offcount); 365 lzb->litlencount[DeflateEob]++; 366 367 lzb->bytes = 0; 368 lzb->eparse = lzb->parse; 369 lzb->lastv = 0; 370 lzb->excost = 0; 371 372 slop = &lz->hist[lz->pos]; 373 n = lz->avail; 374 while(n < DeflateBlock && (!lz->eof || lz->avail)){ 375 /* 376 * fill the buffer as much as possible, 377 * while leaving room for MaxOff history behind lz->pos, 378 * and not reading more than we can handle. 379 * 380 * make sure we read at least HistSlop bytes. 381 */ 382 if(!lz->eof){ 383 ep = lz->pos + lz->avail; 384 if(ep >= HistBlock) 385 ep -= HistBlock; 386 m = HistBlock - MaxOff - lz->avail; 387 if(m > HistBlock - n) 388 m = HistBlock - n; 389 if(m > (HistBlock + HistSlop) - ep) 390 m = (HistBlock + HistSlop) - ep; 391 if(m & ~(BlockSize - 1)) 392 m &= ~(BlockSize - 1); 393 394 /* 395 * be nice to the caller: stop reads that are too small. 396 * can only get here when we've already filled the buffer some 397 */ 398 if(m < HistSlop){ 399 if(!m || !lzb->bytes) 400 return FlateInternal; 401 break; 402 } 403 404 mm = (*r)(rr, &lz->hist[ep], m); 405 if(mm > 0){ 406 /* 407 * wrap data to end if we're read it from the beginning 408 * this way, we don't have to wrap searches. 409 * 410 * wrap reads past the end to the beginning. 411 * this way, we can guarantee minimum size reads. 412 */ 413 if(ep < HistSlop) 414 memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep); 415 else if(ep + mm > HistBlock) 416 memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock); 417 418 lz->totr += mm; 419 n += mm; 420 lz->avail += mm; 421 }else{ 422 if(mm < 0) 423 lz->rbad = 1; 424 lz->eof = 1; 425 } 426 } 427 ep = lz->pos + lz->avail; 428 if(ep > HistSize) 429 ep = HistSize; 430 if(lzb->bytes + ep - lz->pos > DeflateMaxBlock) 431 ep = lz->pos + DeflateMaxBlock - lzb->bytes; 432 m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof); 433 lzb->bytes += m; 434 lz->pos = (lz->pos + m) & (HistBlock - 1); 435 lz->avail -= m; 436 } 437 if(lzb->lastv) 438 *lzb->eparse++ = lzb->lastv; 439 if(lzb->eparse > lzb->parse + nelem(lzb->parse)) 440 return FlateInternal; 441 nunc = lzb->bytes; 442 443 if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits) 444 || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits)) 445 return FlateInternal; 446 447 ndyn = huffcodes(&dyncode, dlitlentab, dofftab); 448 if(ndyn < 0) 449 return FlateInternal; 450 ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen) 451 + bitcost(dofftab, lzb->offcount, Noff) 452 + lzb->excost; 453 454 memset(litcount, 0, sizeof litcount); 455 456 nslop = nunc; 457 if(nslop > &lz->hist[HistSize] - slop) 458 nslop = &lz->hist[HistSize] - slop; 459 460 for(i = 0; i < nslop; i++) 461 litcount[slop[i]]++; 462 hslop = &lz->hist[HistSlop - nslop]; 463 for(; i < nunc; i++) 464 litcount[hslop[i]]++; 465 litcount[DeflateEob]++; 466 467 if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits)) 468 return FlateInternal; 469 nhuff = huffcodes(&hdyncode, hlitlentab, hofftab); 470 if(nhuff < 0) 471 return FlateInternal; 472 nhuff += bitcost(hlitlentab, litcount, Nlitlen); 473 474 nfix = bitcost(litlentab, lzb->litlencount, Nlitlen) 475 + bitcost(offtab, lzb->offcount, Noff) 476 + lzb->excost; 477 478 lzput(lz, lz->eof && !lz->avail, 1); 479 480 if(lz->debug){ 481 fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n", 482 nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff); 483 fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail); 484 } 485 486 if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){ 487 lzput(lz, DeflateUnc, 2); 488 lzflushbits(lz); 489 490 lzput(lz, nunc & 0xff, 8); 491 lzput(lz, (nunc >> 8) & 0xff, 8); 492 lzput(lz, ~nunc & 0xff, 8); 493 lzput(lz, (~nunc >> 8) & 0xff, 8); 494 lzflush(lz); 495 496 lzwrite(lz, slop, nslop); 497 lzwrite(lz, &lz->hist[HistSlop], nunc - nslop); 498 }else if(ndyn < nfix && ndyn < nhuff){ 499 lzput(lz, DeflateDyn, 2); 500 501 wrdyncode(lz, &dyncode); 502 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab); 503 lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits); 504 }else if(nhuff < nfix){ 505 lzput(lz, DeflateDyn, 2); 506 507 wrdyncode(lz, &hdyncode); 508 509 m = 0; 510 for(i = nunc; i > MaxLitRun; i -= MaxLitRun) 511 lzb->parse[m++] = MaxLitRun; 512 lzb->parse[m++] = i; 513 514 wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab); 515 lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits); 516 }else{ 517 lzput(lz, DeflateFix, 2); 518 519 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab); 520 lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits); 521 } 522 523 if(lz->eof && !lz->avail){ 524 lzflushbits(lz); 525 lzflush(lz); 526 } 527 return FlateOk; 528 } 529 530 static void 531 lzwrite(LZstate *lz, void *buf, int n) 532 { 533 int nw; 534 535 if(n && lz->w){ 536 nw = (*lz->w)(lz->wr, buf, n); 537 if(nw != n){ 538 lz->w = 0; 539 lz->wbad = 1; 540 }else 541 lz->totw += n; 542 } 543 } 544 545 static void 546 lzflush(LZstate *lz) 547 { 548 lzwrite(lz, lz->obuf, lz->out - lz->obuf); 549 lz->out = lz->obuf; 550 } 551 552 static void 553 lzput(LZstate *lz, ulong bits, int nbits) 554 { 555 bits = (bits << lz->nbits) | lz->bits; 556 for(nbits += lz->nbits; nbits >= 8; nbits -= 8){ 557 *lz->out++ = bits; 558 if(lz->out == lz->eout) 559 lzflush(lz); 560 bits >>= 8; 561 } 562 lz->bits = bits; 563 lz->nbits = nbits; 564 } 565 566 static void 567 lzflushbits(LZstate *lz) 568 { 569 if(lz->nbits) 570 lzput(lz, 0, 8 - (lz->nbits & 7)); 571 } 572 573 /* 574 * write out a block of n samples, 575 * given lz encoding and counts for huffman tables 576 */ 577 static void 578 wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab) 579 { 580 ushort *off; 581 int i, run, offset, lit, len, c; 582 583 if(out->debug > 2){ 584 for(off = soff; off < eoff; ){ 585 offset = *off++; 586 run = offset & MaxLitRun; 587 if(run){ 588 for(i = 0; i < run; i++){ 589 lit = out->hist[litoff & (HistBlock - 1)]; 590 litoff++; 591 fprint(2, "\tlit %.2ux %c\n", lit, lit); 592 } 593 if(!(offset & LenFlag)) 594 continue; 595 len = offset >> LenShift; 596 offset = *off++; 597 }else if(offset & LenFlag){ 598 len = offset >> LenShift; 599 offset = *off++; 600 }else{ 601 len = 0; 602 offset >>= LenShift; 603 } 604 litoff += len + MinMatch; 605 fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch); 606 } 607 } 608 609 for(off = soff; off < eoff; ){ 610 offset = *off++; 611 run = offset & MaxLitRun; 612 if(run){ 613 for(i = 0; i < run; i++){ 614 lit = out->hist[litoff & (HistBlock - 1)]; 615 litoff++; 616 lzput(out, litlentab[lit].encode, litlentab[lit].bits); 617 } 618 if(!(offset & LenFlag)) 619 continue; 620 len = offset >> LenShift; 621 offset = *off++; 622 }else if(offset & LenFlag){ 623 len = offset >> LenShift; 624 offset = *off++; 625 }else{ 626 len = 0; 627 offset >>= LenShift; 628 } 629 litoff += len + MinMatch; 630 c = lencode[len]; 631 lzput(out, litlentab[c].encode, litlentab[c].bits); 632 c -= LenStart; 633 if(litlenextra[c]) 634 lzput(out, len - litlenbase[c], litlenextra[c]); 635 636 if(offset < MaxOffCode) 637 c = offcode[offset]; 638 else 639 c = bigoffcode[offset >> 7]; 640 lzput(out, offtab[c].encode, offtab[c].bits); 641 if(offextra[c]) 642 lzput(out, offset - offbase[c], offextra[c]); 643 } 644 } 645 646 /* 647 * look for the longest, closest string which matches 648 * the next prefix. the clever part here is looking for 649 * a string 1 longer than the previous best match. 650 * 651 * follows the recommendation of limiting number of chains 652 * which are checked. this appears to be the best heuristic. 653 */ 654 static int 655 lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m) 656 { 657 uchar *s, *t; 658 int ml, off, last; 659 660 ml = check; 661 if(runlen >= 8) 662 check >>= 2; 663 *m = 0; 664 if(p + runlen >= es) 665 return runlen; 666 last = 0; 667 for(; check-- > 0; then = nexts[then & (MaxOff-1)]){ 668 off = (ushort)(now - then); 669 if(off <= last || off > MaxOff) 670 break; 671 s = p + runlen; 672 t = hist + (((p - hist) - off) & (HistBlock-1)); 673 t += runlen; 674 for(; s >= p; s--){ 675 if(*s != *t) 676 goto matchloop; 677 t--; 678 } 679 680 /* 681 * we have a new best match. 682 * extend it to it's maximum length 683 */ 684 t += runlen + 2; 685 s += runlen + 2; 686 for(; s < es; s++){ 687 if(*s != *t) 688 break; 689 t++; 690 } 691 runlen = s - p; 692 *m = off - 1; 693 if(s == es || runlen > ml) 694 break; 695 matchloop:; 696 last = off; 697 } 698 return runlen; 699 } 700 701 static int 702 lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish) 703 { 704 ulong cont, excost, *litlencount, *offcount; 705 uchar *p, *q, *s, *es; 706 ushort *nexts, *hash; 707 int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer; 708 709 litlencount = lzb->litlencount; 710 offcount = lzb->offcount; 711 nexts = lz->nexts; 712 hash = lz->hash; 713 now = lz->now; 714 715 p = &lz->hist[lz->pos]; 716 if(lz->prevlen != MinMatch - 1) 717 p++; 718 719 /* 720 * hash in the links for any hanging link positions, 721 * and calculate the hash for the current position. 722 */ 723 n = MinMatch; 724 if(n > ep - p) 725 n = ep - p; 726 cont = 0; 727 for(i = 0; i < n - 1; i++){ 728 m = now - ((MinMatch-1) - i); 729 if(m < lz->dot) 730 continue; 731 s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1)); 732 733 cont = (s[0] << 16) | (s[1] << 8) | s[2]; 734 h = hashit(cont); 735 prevoff = 0; 736 for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){ 737 v = (ushort)(now - then); 738 if(v <= prevoff || v >= (MinMatch-1) - i) 739 break; 740 prevoff = v; 741 } 742 if(then == (ushort)m) 743 continue; 744 nexts[m & (MaxOff-1)] = hash[h]; 745 hash[h] = m; 746 } 747 for(i = 0; i < n; i++) 748 cont = (cont << 8) | p[i]; 749 750 /* 751 * now must point to the index in the nexts array 752 * corresponding to p's position in the history 753 */ 754 prevlen = lz->prevlen; 755 prevoff = lz->prevoff; 756 maxdefer = lz->maxcheck >> 2; 757 excost = 0; 758 v = lzb->lastv; 759 for(;;){ 760 es = p + MaxMatch; 761 if(es > ep){ 762 if(!finish || p >= ep) 763 break; 764 es = ep; 765 } 766 767 h = hashit(cont); 768 runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m); 769 770 /* 771 * back out of small matches too far in the past 772 */ 773 if(runlen == MinMatch && m >= MinMatchMaxOff){ 774 runlen = MinMatch - 1; 775 m = 0; 776 } 777 778 /* 779 * record the encoding and increment counts for huffman trees 780 * if we get a match, defer selecting it until we check for 781 * a longer match at the next position. 782 */ 783 if(prevlen >= runlen && prevlen != MinMatch - 1){ 784 /* 785 * old match at least as good; use that one 786 */ 787 n = prevlen - MinMatch; 788 if(v || n){ 789 *parse++ = v | LenFlag | (n << LenShift); 790 *parse++ = prevoff; 791 }else 792 *parse++ = prevoff << LenShift; 793 v = 0; 794 795 n = lencode[n]; 796 litlencount[n]++; 797 excost += litlenextra[n - LenStart]; 798 799 if(prevoff < MaxOffCode) 800 n = offcode[prevoff]; 801 else 802 n = bigoffcode[prevoff >> 7]; 803 offcount[n]++; 804 excost += offextra[n]; 805 806 runlen = prevlen - 1; 807 prevlen = MinMatch - 1; 808 nmatches++; 809 }else if(runlen == MinMatch - 1){ 810 /* 811 * no match; just put out the literal 812 */ 813 if(++v == MaxLitRun){ 814 *parse++ = v; 815 v = 0; 816 } 817 litlencount[*p]++; 818 nlits++; 819 runlen = 1; 820 }else{ 821 if(prevlen != MinMatch - 1){ 822 /* 823 * longer match now. output previous literal, 824 * update current match, and try again 825 */ 826 if(++v == MaxLitRun){ 827 *parse++ = v; 828 v = 0; 829 } 830 litlencount[p[-1]]++; 831 nlits++; 832 } 833 834 prevoff = m; 835 836 if(runlen < maxdefer){ 837 prevlen = runlen; 838 runlen = 1; 839 }else{ 840 n = runlen - MinMatch; 841 if(v || n){ 842 *parse++ = v | LenFlag | (n << LenShift); 843 *parse++ = prevoff; 844 }else 845 *parse++ = prevoff << LenShift; 846 v = 0; 847 848 n = lencode[n]; 849 litlencount[n]++; 850 excost += litlenextra[n - LenStart]; 851 852 if(prevoff < MaxOffCode) 853 n = offcode[prevoff]; 854 else 855 n = bigoffcode[prevoff >> 7]; 856 offcount[n]++; 857 excost += offextra[n]; 858 859 prevlen = MinMatch - 1; 860 nmatches++; 861 } 862 } 863 864 /* 865 * update the hash for the newly matched data 866 * this is constructed so the link for the old 867 * match in this position must be at the end of a chain, 868 * and will expire when this match is added, ie it will 869 * never be examined by the match loop. 870 * add to the hash chain only if we have the real hash data. 871 */ 872 for(q = p + runlen; p != q; p++){ 873 if(p + MinMatch <= ep){ 874 h = hashit(cont); 875 nexts[now & (MaxOff-1)] = hash[h]; 876 hash[h] = now; 877 if(p + MinMatch < ep) 878 cont = (cont << 8) | p[MinMatch]; 879 } 880 now++; 881 } 882 } 883 884 /* 885 * we can just store away the lazy state and 886 * pick it up next time. the last block will have finish set 887 * so we won't have any pending matches 888 * however, we need to correct for how much we've encoded 889 */ 890 if(prevlen != MinMatch - 1) 891 p--; 892 893 lzb->excost += excost; 894 lzb->eparse = parse; 895 lzb->lastv = v; 896 897 lz->now = now; 898 lz->prevlen = prevlen; 899 lz->prevoff = prevoff; 900 901 return p - &lz->hist[lz->pos]; 902 } 903 904 /* 905 * make up the dynamic code tables, and return the number of bits 906 * needed to transmit them. 907 */ 908 static int 909 huffcodes(Dyncode *dc, Huff *littab, Huff *offtab) 910 { 911 Huff *codetab; 912 uchar *codes, *codeaux; 913 ulong codecount[Nclen], excost; 914 int i, n, m, v, c, nlit, noff, ncode, nclen; 915 916 codetab = dc->codetab; 917 codes = dc->codes; 918 codeaux = dc->codeaux; 919 920 /* 921 * trim the sizes of the tables 922 */ 923 for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--) 924 ; 925 for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--) 926 ; 927 928 /* 929 * make the code-length code 930 */ 931 for(i = 0; i < nlit; i++) 932 codes[i] = littab[i].bits; 933 for(i = 0; i < noff; i++) 934 codes[i + nlit] = offtab[i].bits; 935 936 /* 937 * run-length compress the code-length code 938 */ 939 excost = 0; 940 c = 0; 941 ncode = nlit+noff; 942 for(i = 0; i < ncode; ){ 943 n = i + 1; 944 v = codes[i]; 945 while(n < ncode && v == codes[n]) 946 n++; 947 n -= i; 948 i += n; 949 if(v == 0){ 950 while(n >= 11){ 951 m = n; 952 if(m > 138) 953 m = 138; 954 codes[c] = 18; 955 codeaux[c++] = m - 11; 956 n -= m; 957 excost += 7; 958 } 959 if(n >= 3){ 960 codes[c] = 17; 961 codeaux[c++] = n - 3; 962 n = 0; 963 excost += 3; 964 } 965 } 966 while(n--){ 967 codes[c++] = v; 968 while(n >= 3){ 969 m = n; 970 if(m > 6) 971 m = 6; 972 codes[c] = 16; 973 codeaux[c++] = m - 3; 974 n -= m; 975 excost += 3; 976 } 977 } 978 } 979 980 memset(codecount, 0, sizeof codecount); 981 for(i = 0; i < c; i++) 982 codecount[codes[i]]++; 983 if(!mkgzprecode(codetab, codecount, Nclen, 8)) 984 return -1; 985 986 for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--) 987 ; 988 989 dc->nlit = nlit; 990 dc->noff = noff; 991 dc->nclen = nclen; 992 dc->ncode = c; 993 994 return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost; 995 } 996 997 static void 998 wrdyncode(LZstate *out, Dyncode *dc) 999 { 1000 Huff *codetab; 1001 uchar *codes, *codeaux; 1002 int i, v, c; 1003 1004 /* 1005 * write out header, then code length code lengths, 1006 * and code lengths 1007 */ 1008 lzput(out, dc->nlit-257, 5); 1009 lzput(out, dc->noff-1, 5); 1010 lzput(out, dc->nclen-4, 4); 1011 1012 codetab = dc->codetab; 1013 for(i = 0; i < dc->nclen; i++) 1014 lzput(out, codetab[clenorder[i]].bits, 3); 1015 1016 codes = dc->codes; 1017 codeaux = dc->codeaux; 1018 c = dc->ncode; 1019 for(i = 0; i < c; i++){ 1020 v = codes[i]; 1021 lzput(out, codetab[v].encode, codetab[v].bits); 1022 if(v >= 16){ 1023 if(v == 16) 1024 lzput(out, codeaux[i], 2); 1025 else if(v == 17) 1026 lzput(out, codeaux[i], 3); 1027 else /* v == 18 */ 1028 lzput(out, codeaux[i], 7); 1029 } 1030 } 1031 } 1032 1033 static int 1034 bitcost(Huff *tab, ulong *count, int n) 1035 { 1036 ulong tot; 1037 int i; 1038 1039 tot = 0; 1040 for(i = 0; i < n; i++) 1041 tot += count[i] * tab[i].bits; 1042 return tot; 1043 } 1044 1045 static int 1046 mkgzprecode(Huff *tab, ulong *count, int n, int maxbits) 1047 { 1048 ulong bitcount[MaxHuffBits]; 1049 int i, nbits; 1050 1051 nbits = mkprecode(tab, count, n, maxbits, bitcount); 1052 for(i = 0; i < n; i++){ 1053 if(tab[i].bits == -1) 1054 tab[i].bits = 0; 1055 else if(tab[i].bits == 0){ 1056 if(nbits != 0 || bitcount[0] != 1) 1057 return 0; 1058 bitcount[1] = 1; 1059 bitcount[0] = 0; 1060 nbits = 1; 1061 tab[i].bits = 1; 1062 } 1063 } 1064 if(bitcount[0] != 0) 1065 return 0; 1066 return hufftabinit(tab, n, bitcount, nbits); 1067 } 1068 1069 static int 1070 hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits) 1071 { 1072 ulong code, nc[MaxHuffBits]; 1073 int i, bits; 1074 1075 code = 0; 1076 for(bits = 1; bits <= nbits; bits++){ 1077 code = (code + bitcount[bits-1]) << 1; 1078 nc[bits] = code; 1079 } 1080 1081 for(i = 0; i < n; i++){ 1082 bits = tab[i].bits; 1083 if(bits){ 1084 code = nc[bits]++ << (16 - bits); 1085 if(code & ~0xffff) 1086 return 0; 1087 tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8); 1088 } 1089 } 1090 return 1; 1091 } 1092 1093 1094 /* 1095 * this should be in a library 1096 */ 1097 struct Chain 1098 { 1099 ulong count; /* occurances of everything in the chain */ 1100 ushort leaf; /* leaves to the left of chain, or leaf value */ 1101 char col; /* ref count for collecting unused chains */ 1102 char gen; /* need to generate chains for next lower level */ 1103 Chain *up; /* Chain up in the lists */ 1104 }; 1105 1106 struct Chains 1107 { 1108 Chain *lists[(MaxHuffBits - 1) * 2]; 1109 ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */ 1110 ushort leafmap[MaxLeaf]; /* map to actual leaf number */ 1111 int nleaf; /* number of leaves */ 1112 Chain chains[ChainMem]; 1113 Chain *echains; 1114 Chain *free; 1115 char col; 1116 int nlists; 1117 }; 1118 1119 /* 1120 * fast, low space overhead algorithm for max depth huffman type codes 1121 * 1122 * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical 1123 * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms 1124 * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer 1125 * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds., 1126 * pp 12-21, Springer Verlag, New York, 1995. 1127 */ 1128 static int 1129 mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount) 1130 { 1131 Chains cs; 1132 Chain *c; 1133 int i, m, em, bits; 1134 1135 memset(&cs, 0, sizeof cs); 1136 1137 /* 1138 * set up the sorted list of leaves 1139 */ 1140 m = 0; 1141 for(i = 0; i < n; i++){ 1142 tab[i].bits = -1; 1143 tab[i].encode = 0; 1144 if(count[i] != 0){ 1145 cs.leafcount[m] = count[i]; 1146 cs.leafmap[m] = i; 1147 m++; 1148 } 1149 } 1150 if(m < 2){ 1151 if(m != 0){ 1152 tab[cs.leafmap[0]].bits = 0; 1153 bitcount[0] = 1; 1154 }else 1155 bitcount[0] = 0; 1156 return 0; 1157 } 1158 cs.nleaf = m; 1159 leafsort(cs.leafcount, cs.leafmap, 0, m); 1160 1161 for(i = 0; i < m; i++) 1162 cs.leafcount[i] = count[cs.leafmap[i]]; 1163 1164 /* 1165 * set up free list 1166 */ 1167 cs.free = &cs.chains[2]; 1168 cs.echains = &cs.chains[ChainMem]; 1169 cs.col = 1; 1170 1171 /* 1172 * initialize chains for each list 1173 */ 1174 c = &cs.chains[0]; 1175 c->count = cs.leafcount[0]; 1176 c->leaf = 1; 1177 c->col = cs.col; 1178 c->up = nil; 1179 c->gen = 0; 1180 cs.chains[1] = cs.chains[0]; 1181 cs.chains[1].leaf = 2; 1182 cs.chains[1].count = cs.leafcount[1]; 1183 for(i = 0; i < maxbits-1; i++){ 1184 cs.lists[i * 2] = &cs.chains[0]; 1185 cs.lists[i * 2 + 1] = &cs.chains[1]; 1186 } 1187 1188 cs.nlists = 2 * (maxbits - 1); 1189 m = 2 * m - 2; 1190 for(i = 2; i < m; i++) 1191 nextchain(&cs, cs.nlists - 2); 1192 1193 bits = 0; 1194 bitcount[0] = cs.nleaf; 1195 for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){ 1196 m = c->leaf; 1197 bitcount[bits++] -= m; 1198 bitcount[bits] = m; 1199 } 1200 m = 0; 1201 for(i = bits; i >= 0; i--) 1202 for(em = m + bitcount[i]; m < em; m++) 1203 tab[cs.leafmap[m]].bits = i; 1204 1205 return bits; 1206 } 1207 1208 /* 1209 * calculate the next chain on the list 1210 * we can always toss out the old chain 1211 */ 1212 static void 1213 nextchain(Chains *cs, int list) 1214 { 1215 Chain *c, *oc; 1216 int i, nleaf, sumc; 1217 1218 oc = cs->lists[list + 1]; 1219 cs->lists[list] = oc; 1220 if(oc == nil) 1221 return; 1222 1223 /* 1224 * make sure we have all chains needed to make sumc 1225 * note it is possible to generate only one of these, 1226 * use twice that value for sumc, and then generate 1227 * the second if that preliminary sumc would be chosen. 1228 * however, this appears to be slower on current tests 1229 */ 1230 if(oc->gen){ 1231 nextchain(cs, list - 2); 1232 nextchain(cs, list - 2); 1233 oc->gen = 0; 1234 } 1235 1236 /* 1237 * pick up the chain we're going to add; 1238 * collect unused chains no free ones are left 1239 */ 1240 for(c = cs->free; ; c++){ 1241 if(c >= cs->echains){ 1242 cs->col++; 1243 for(i = 0; i < cs->nlists; i++) 1244 for(c = cs->lists[i]; c != nil; c = c->up) 1245 c->col = cs->col; 1246 c = cs->chains; 1247 } 1248 if(c->col != cs->col) 1249 break; 1250 } 1251 1252 /* 1253 * pick the cheapest of 1254 * 1) the next package from the previous list 1255 * 2) the next leaf 1256 */ 1257 nleaf = oc->leaf; 1258 sumc = 0; 1259 if(list > 0 && cs->lists[list-1] != nil) 1260 sumc = cs->lists[list-2]->count + cs->lists[list-1]->count; 1261 if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){ 1262 c->count = sumc; 1263 c->leaf = oc->leaf; 1264 c->up = cs->lists[list-1]; 1265 c->gen = 1; 1266 }else if(nleaf >= cs->nleaf){ 1267 cs->lists[list + 1] = nil; 1268 return; 1269 }else{ 1270 c->leaf = nleaf + 1; 1271 c->count = cs->leafcount[nleaf]; 1272 c->up = oc->up; 1273 c->gen = 0; 1274 } 1275 cs->free = c + 1; 1276 1277 cs->lists[list + 1] = c; 1278 c->col = cs->col; 1279 } 1280 1281 static int 1282 pivot(ulong *c, int a, int n) 1283 { 1284 int j, pi, pj, pk; 1285 1286 j = n/6; 1287 pi = a + j; /* 1/6 */ 1288 j += j; 1289 pj = pi + j; /* 1/2 */ 1290 pk = pj + j; /* 5/6 */ 1291 if(c[pi] < c[pj]){ 1292 if(c[pi] < c[pk]){ 1293 if(c[pj] < c[pk]) 1294 return pj; 1295 return pk; 1296 } 1297 return pi; 1298 } 1299 if(c[pj] < c[pk]){ 1300 if(c[pi] < c[pk]) 1301 return pi; 1302 return pk; 1303 } 1304 return pj; 1305 } 1306 1307 static void 1308 leafsort(ulong *leafcount, ushort *leafmap, int a, int n) 1309 { 1310 ulong t; 1311 int j, pi, pj, pn; 1312 1313 while(n > 1){ 1314 if(n > 10){ 1315 pi = pivot(leafcount, a, n); 1316 }else 1317 pi = a + (n>>1); 1318 1319 t = leafcount[pi]; 1320 leafcount[pi] = leafcount[a]; 1321 leafcount[a] = t; 1322 t = leafmap[pi]; 1323 leafmap[pi] = leafmap[a]; 1324 leafmap[a] = t; 1325 pi = a; 1326 pn = a + n; 1327 pj = pn; 1328 for(;;){ 1329 do 1330 pi++; 1331 while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a])); 1332 do 1333 pj--; 1334 while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a])); 1335 if(pj < pi) 1336 break; 1337 t = leafcount[pi]; 1338 leafcount[pi] = leafcount[pj]; 1339 leafcount[pj] = t; 1340 t = leafmap[pi]; 1341 leafmap[pi] = leafmap[pj]; 1342 leafmap[pj] = t; 1343 } 1344 t = leafcount[a]; 1345 leafcount[a] = leafcount[pj]; 1346 leafcount[pj] = t; 1347 t = leafmap[a]; 1348 leafmap[a] = leafmap[pj]; 1349 leafmap[pj] = t; 1350 j = pj - a; 1351 1352 n = n-j-1; 1353 if(j >= n){ 1354 leafsort(leafcount, leafmap, a, j); 1355 a += j+1; 1356 }else{ 1357 leafsort(leafcount, leafmap, a + (j+1), n); 1358 n = j; 1359 } 1360 } 1361 }