plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

compress.c (32464B)


      1 /*
      2  * compress - File compression ala IEEE Computer, June 1984.
      3  *
      4  * Algorithm from "A Technique for High Performance Data Compression",
      5  * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
      6  *
      7  * Usage: compress [-dfvc] [-b bits] [file ...]
      8  * Inputs:
      9  *	-b:	limit the max number of bits/code.
     10  *	-c:	write output on stdout, don't remove original.
     11  *	-d:	decompress instead.
     12  *	-f:	Forces output file to be generated, even if one already
     13  *		exists, and even if no space is saved by compressing.
     14  *		If -f is not used, the user will be prompted if stdin is
     15  *		a tty, otherwise, the output file will not be overwritten.
     16  *	-v:	Write compression statistics
     17  *
     18  * 	file ...: Files to be compressed.  If none specified, stdin is used.
     19  * Outputs:
     20  *	file.Z:	Compressed form of file with same mode, owner, and utimes
     21  * 		or stdout (if stdin used as input)
     22  *
     23  * Assumptions:
     24  *	When filenames are given, replaces with the compressed version
     25  *	(.Z suffix) only if the file decreases in size.
     26  * Algorithm:
     27  * 	Modified Lempel-Ziv method (LZW).  Basically finds common
     28  * substrings and replaces them with a variable size code.  This is
     29  * deterministic, and can be done on the fly.  Thus, the decompression
     30  * procedure needs no input table, but tracks the way the table was built.
     31 
     32  * Authors:	Spencer W. Thomas	(decvax!harpo!utah-cs!utah-gr!thomas)
     33  *		Jim McKie		(decvax!mcvax!jim)
     34  *		Steve Davies		(decvax!vax135!petsd!peora!srd)
     35  *		Ken Turkowski		(decvax!decwrl!turtlevax!ken)
     36  *		James A. Woods		(decvax!ihnp4!ames!jaw)
     37  *		Joe Orost		(decvax!vax135!petsd!joe)
     38  */
     39 
     40 #ifndef USED
     41 #  define USED(x) if(x);else
     42 #endif
     43 
     44 #define _PLAN9_SOURCE
     45 #define _BSD_EXTENSION
     46 #define _POSIX_SOURCE
     47 
     48 #include <u.h>
     49 #include <stdio.h>
     50 #include <ctype.h>
     51 #include <stdlib.h>
     52 #include <unistd.h>
     53 #include <string.h>
     54 #include <signal.h>
     55 #include <utime.h>
     56 #include <sys/types.h>
     57 #include <sys/stat.h>
     58 
     59 #define	min(a,b)	((a>b) ? b : a)
     60 
     61 #define BITS	16
     62 #define HSIZE	69001		/* 95% occupancy */
     63 
     64 /*
     65  * a code_int must be able to hold 2**BITS values of type int, and also -1
     66  */
     67 typedef long	code_int;
     68 typedef long	count_int;
     69 
     70 static char rcs_ident[] = "$Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $";
     71 
     72 uchar magic_header[] = { 0x1F, 0x9D };	/* 1F 9D */
     73 
     74 /* Defines for third byte of header */
     75 #define BIT_MASK	0x1f
     76 #define BLOCK_MASK	0x80
     77 /* Masks 0x40 and 0x20 are free.  I think 0x20 should mean that there is
     78 	a fourth header byte (for expansion).
     79 */
     80 #define INIT_BITS 9			/* initial number of bits/code */
     81 
     82 #define ARGVAL() (*++(*argv) || (--argc && *++argv))
     83 
     84 int n_bits;				/* number of bits/code */
     85 int maxbits = BITS;			/* user settable max # bits/code */
     86 code_int maxcode;			/* maximum code, given n_bits */
     87 code_int maxmaxcode = 1 << BITS;	/* should NEVER generate this code */
     88 
     89 #define MAXCODE(n_bits)	((1 << (n_bits)) - 1)
     90 
     91 count_int htab[HSIZE];
     92 ushort codetab[HSIZE];
     93 
     94 #define htabof(i)	htab[i]
     95 #define codetabof(i)	codetab[i]
     96 
     97 code_int hsize = HSIZE;			/* for dynamic table sizing */
     98 count_int fsize;
     99 
    100 /*
    101  * To save much memory, we overlay the table used by compress() with those
    102  * used by decompress().  The tab_prefix table is the same size and type
    103  * as the codetab.  The tab_suffix table needs 2**BITS characters.  We
    104  * get this from the beginning of htab.  The output stack uses the rest
    105  * of htab, and contains characters.  There is plenty of room for any
    106  * possible stack (stack used to be 8000 characters).
    107  */
    108 
    109 #define tab_prefixof(i)	codetabof(i)
    110 #define tab_suffixof(i)	((uchar *)(htab))[i]
    111 #define de_stack		((uchar *)&tab_suffixof(1<<BITS))
    112 
    113 code_int free_ent = 0;			/* first unused entry */
    114 int exit_stat = 0;
    115 
    116 void	cl_block(void);
    117 void	cl_hash(count_int);
    118 void	compress(void);
    119 void	copystat(char *, char *);
    120 void	decompress(void);
    121 int	foreground(void);
    122 code_int getcode(void);
    123 void	onintr(int);
    124 void	oops(int);
    125 void	output(code_int);
    126 void	prratio(FILE *, long, long);
    127 void	version(void);
    128 void	writeerr(void);
    129 
    130 void
    131 Usage(void)
    132 {
    133 #ifdef DEBUG
    134 	fprintf(stderr,"Usage: compress [-cdfDV] [-b maxbits] [file ...]\n");
    135 #else
    136 	fprintf(stderr,"Usage: compress [-cdfvV] [-b maxbits] [file ...]\n");
    137 #endif /* DEBUG */
    138 }
    139 
    140 int debug = 0;
    141 int nomagic = 0;	/* Use a 3-byte magic number header, unless old file */
    142 int zcat_flg = 0;	/* Write output on stdout, suppress messages */
    143 int quiet = 1;		/* don't tell me about compression */
    144 
    145 /*
    146  * block compression parameters -- after all codes are used up,
    147  * and compression rate changes, start over.
    148  */
    149 int block_compress = BLOCK_MASK;
    150 int clear_flg = 0;
    151 long ratio = 0;
    152 #define CHECK_GAP 10000	/* ratio check interval */
    153 count_int checkpoint = CHECK_GAP;
    154 /*
    155  * the next two codes should not be changed lightly, as they must not
    156  * lie within the contiguous general code space.
    157  */
    158 #define FIRST	257	/* first free entry */
    159 #define	CLEAR	256	/* table clear output code */
    160 
    161 int force = 0;
    162 char ofname [100];
    163 #ifdef DEBUG
    164 int verbose = 0;
    165 #endif /* DEBUG */
    166 void (*bgnd_flag)(int);
    167 
    168 int do_decomp = 0;
    169 
    170 int
    171 main(argc, argv)
    172 int argc;
    173 char **argv;
    174 {
    175 	int overwrite = 0;	/* Do not overwrite unless given -f flag */
    176 	char tempname[512];
    177 	char **filelist, **fileptr;
    178 	char *cp;
    179 	struct stat statbuf;
    180 
    181 	if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
    182 		signal(SIGINT, onintr);
    183 		signal(SIGSEGV, oops);
    184 	}
    185 
    186 	filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
    187 	*filelist = NULL;
    188 
    189 	if((cp = strrchr(argv[0], '/')) != 0)
    190 		cp++;
    191 	else
    192 		cp = argv[0];
    193 	if(strcmp(cp, "uncompress") == 0)
    194 		do_decomp = 1;
    195 	else if(strcmp(cp, "zcat") == 0) {
    196 		do_decomp = 1;
    197 		zcat_flg = 1;
    198 	}
    199 
    200 	/*
    201 	 * Argument Processing
    202 	 * All flags are optional.
    203 	 * -C	generate output compatible with compress 2.0.
    204 	 * -D	debug
    205 	 * -V	print Version; debug verbose
    206 	 * -b maxbits	maxbits.  If -b is specified, then maxbits MUST be
    207 	 *		given also.
    208 	 * -c	cat all output to stdout
    209 	 * -d	do_decomp
    210 	 * -f	force overwrite of output file
    211 	 * -n	no header: useful to uncompress old files
    212 	 * -v	unquiet
    213 	 * if a string is left, must be an input filename.
    214 	 */
    215 	for (argc--, argv++; argc > 0; argc--, argv++) {
    216 	if (**argv == '-') {	/* A flag argument */
    217 		while (*++(*argv)) {	/* Process all flags in this arg */
    218 		switch (**argv) {
    219 		case 'C':
    220 			block_compress = 0;
    221 			break;
    222 #ifdef DEBUG
    223 		case 'D':
    224 			debug = 1;
    225 			break;
    226 		case 'V':
    227 			verbose = 1;
    228 			version();
    229 			break;
    230 #else
    231 		case 'V':
    232 			version();
    233 			break;
    234 #endif
    235 		case 'b':
    236 			if (!ARGVAL()) {
    237 				fprintf(stderr, "Missing maxbits\n");
    238 				Usage();
    239 				exit(1);
    240 			}
    241 			maxbits = atoi(*argv);
    242 			goto nextarg;
    243 		case 'c':
    244 			zcat_flg = 1;
    245 			break;
    246 		case 'd':
    247 			do_decomp = 1;
    248 			break;
    249 		case 'f':
    250 		case 'F':
    251 			overwrite = 1;
    252 			force = 1;
    253 			break;
    254 		case 'n':
    255 			nomagic = 1;
    256 			break;
    257 		case 'q':
    258 			quiet = 1;
    259 			break;
    260 		case 'v':
    261 			quiet = 0;
    262 			break;
    263 		default:
    264 			fprintf(stderr, "Unknown flag: '%c'; ", **argv);
    265 			Usage();
    266 			exit(1);
    267 		}
    268 		}
    269 	} else {			/* Input file name */
    270 		*fileptr++ = *argv;	/* Build input file list */
    271 		*fileptr = NULL;
    272 		/* process nextarg; */
    273 	}
    274 nextarg:
    275 		continue;
    276 	}
    277 
    278 	if(maxbits < INIT_BITS) maxbits = INIT_BITS;
    279 	if (maxbits > BITS) maxbits = BITS;
    280 	maxmaxcode = 1 << maxbits;
    281 
    282 	if (*filelist != NULL) {
    283 	for (fileptr = filelist; *fileptr; fileptr++) {
    284 		exit_stat = 0;
    285 		if (do_decomp != 0) {			/* DECOMPRESSION */
    286 		/* Check for .Z suffix */
    287 		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) {
    288 			/* No .Z: tack one on */
    289 			strcpy(tempname, *fileptr);
    290 			strcat(tempname, ".Z");
    291 			*fileptr = tempname;
    292 		}
    293 		/* Open input file */
    294 		if ((freopen(*fileptr, "r", stdin)) == NULL) {
    295 			perror(*fileptr);
    296 			continue;
    297 		}
    298 		/* Check the magic number */
    299 		if (nomagic == 0) {
    300 			if ((getchar() != (magic_header[0] & 0xFF))
    301 			|| (getchar() != (magic_header[1] & 0xFF))) {
    302 				fprintf(stderr, "%s: not in compressed format\n",
    303 					*fileptr);
    304 				continue;
    305 			}
    306 			maxbits = getchar();	/* set -b from file */
    307 			block_compress = maxbits & BLOCK_MASK;
    308 			maxbits &= BIT_MASK;
    309 			maxmaxcode = 1 << maxbits;
    310 			if(maxbits > BITS) {
    311 				fprintf(stderr,
    312 		"%s: compressed with %d bits, can only handle %d bits\n",
    313 					*fileptr, maxbits, BITS);
    314 				continue;
    315 			}
    316 		}
    317 		/* Generate output filename */
    318 		strcpy(ofname, *fileptr);
    319 		ofname[strlen(*fileptr) - 2] = '\0';  /* Strip off .Z */
    320 		} else {				/* COMPRESSION */
    321 		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) {
    322 			fprintf(stderr,
    323 				"%s: already has .Z suffix -- no change\n",
    324 				*fileptr);
    325 			continue;
    326 		}
    327 		/* Open input file */
    328 		if ((freopen(*fileptr, "r", stdin)) == NULL) {
    329 			perror(*fileptr);
    330 			continue;
    331 		}
    332 		(void) stat(*fileptr, &statbuf);
    333 		fsize = (long) statbuf.st_size;
    334 		/*
    335 		 * tune hash table size for small files -- ad hoc,
    336 		 * but the sizes match earlier #defines, which
    337 		 * serve as upper bounds on the number of output codes.
    338 		 */
    339 		hsize = HSIZE;
    340 		if (fsize < (1 << 12))
    341 			hsize = min(5003, HSIZE);
    342 		else if (fsize < (1 << 13))
    343 			hsize = min(9001, HSIZE);
    344 		else if (fsize < (1 << 14))
    345 			hsize = min (18013, HSIZE);
    346 		else if (fsize < (1 << 15))
    347 			hsize = min (35023, HSIZE);
    348 		else if (fsize < 47000)
    349 			hsize = min (50021, HSIZE);
    350 
    351 		/* Generate output filename */
    352 		strcpy(ofname, *fileptr);
    353 #ifndef BSD4_2
    354 		if ((cp=strrchr(ofname,'/')) != NULL)
    355 			cp++;
    356 		else
    357 			cp = ofname;
    358 		/*
    359 		 *** changed 12 to 25;  should be NAMELEN-3, but I don't want
    360 		 * to fight the headers.	ehg 5 Nov 92 **
    361 		 */
    362 		if (strlen(cp) > 25) {
    363 			fprintf(stderr, "%s: filename too long to tack on .Z\n",
    364 				cp);
    365 			continue;
    366 		}
    367 #endif
    368 		strcat(ofname, ".Z");
    369 		}
    370 		/* Check for overwrite of existing file */
    371 		if (overwrite == 0 && zcat_flg == 0 &&
    372 		   stat(ofname, &statbuf) == 0) {
    373 			char response[2];
    374 
    375 			response[0] = 'n';
    376 			fprintf(stderr, "%s already exists;", ofname);
    377 			if (foreground()) {
    378 				fprintf(stderr,
    379 				    " do you wish to overwrite %s (y or n)? ",
    380 					ofname);
    381 				fflush(stderr);
    382 				(void) read(2, response, 2);
    383 				while (response[1] != '\n')
    384 					if (read(2, response+1, 1) < 0) {
    385 						/* Ack! */
    386 						perror("stderr");
    387 						break;
    388 					}
    389 			}
    390 			if (response[0] != 'y') {
    391 				fprintf(stderr, "\tnot overwritten\n");
    392 				continue;
    393 			}
    394 		}
    395 		if(zcat_flg == 0) {		/* Open output file */
    396 			if (freopen(ofname, "w", stdout) == NULL) {
    397 				perror(ofname);
    398 				continue;
    399 			}
    400 			if(!quiet)
    401 				fprintf(stderr, "%s: ", *fileptr);
    402 		}
    403 
    404 		/* Actually do the compression/decompression */
    405 		if (do_decomp == 0)
    406 			compress();
    407 #ifndef DEBUG
    408 		else
    409 			decompress();
    410 #else
    411 		else if (debug == 0)
    412 			decompress();
    413 		else
    414 			printcodes();
    415 		if (verbose)
    416 			dump_tab();
    417 #endif						/* DEBUG */
    418 		if(zcat_flg == 0) {
    419 			copystat(*fileptr, ofname);	/* Copy stats */
    420 			if (exit_stat == 1 || !quiet)
    421 				putc('\n', stderr);
    422 		}
    423 	}
    424 	} else {		/* Standard input */
    425 	if (do_decomp == 0) {
    426 		compress();
    427 #ifdef DEBUG
    428 		if(verbose)
    429 			dump_tab();
    430 #endif
    431 		if(!quiet)
    432 			putc('\n', stderr);
    433 	} else {
    434 		/* Check the magic number */
    435 		if (nomagic == 0) {
    436 		if ((getchar()!=(magic_header[0] & 0xFF))
    437 		 || (getchar()!=(magic_header[1] & 0xFF))) {
    438 			fprintf(stderr, "stdin: not in compressed format\n");
    439 			exit(1);
    440 		}
    441 		maxbits = getchar();	/* set -b from file */
    442 		block_compress = maxbits & BLOCK_MASK;
    443 		maxbits &= BIT_MASK;
    444 		maxmaxcode = 1 << maxbits;
    445 		fsize = 100000;		/* assume stdin large for USERMEM */
    446 		if(maxbits > BITS) {
    447 			fprintf(stderr,
    448 			"stdin: compressed with %d bits, can only handle %d bits\n",
    449 			maxbits, BITS);
    450 			exit(1);
    451 		}
    452 		}
    453 #ifndef DEBUG
    454 		decompress();
    455 #else
    456 		if (debug == 0)
    457 			decompress();
    458 		else
    459 			printcodes();
    460 		if (verbose)
    461 			dump_tab();
    462 #endif						/* DEBUG */
    463 	}
    464 	}
    465 	exit(exit_stat);
    466 	return 0;
    467 }
    468 
    469 static int offset;
    470 long in_count = 1;		/* length of input */
    471 long bytes_out;			/* length of compressed output */
    472 long out_count = 0;		/* # of codes output (for debugging) */
    473 
    474 /*
    475  * compress stdin to stdout
    476  *
    477  * Algorithm:  use open addressing double hashing (no chaining) on the
    478  * prefix code / next character combination.  We do a variant of Knuth's
    479  * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
    480  * secondary probe.  Here, the modular division first probe is gives way
    481  * to a faster exclusive-or manipulation.  Also do block compression with
    482  * an adaptive reset, whereby the code table is cleared when the compression
    483  * ratio decreases, but after the table fills.  The variable-length output
    484  * codes are re-sized at this point, and a special CLEAR code is generated
    485  * for the decompressor.  Late addition:  construct the table according to
    486  * file size for noticeable speed improvement on small files.  Please direct
    487  * questions about this implementation to ames!jaw.
    488  */
    489 void
    490 compress(void)
    491 {
    492 	code_int ent, hsize_reg;
    493 	code_int i;
    494 	int c, disp, hshift;
    495 	long fcode;
    496 
    497 	if (nomagic == 0) {
    498 		putchar(magic_header[0]);
    499 		putchar(magic_header[1]);
    500 		putchar((char)(maxbits | block_compress));
    501 		if(ferror(stdout))
    502 			writeerr();
    503 	}
    504 	offset = 0;
    505 	bytes_out = 3;		/* includes 3-byte header mojo */
    506 	out_count = 0;
    507 	clear_flg = 0;
    508 	ratio = 0;
    509 	in_count = 1;
    510 	checkpoint = CHECK_GAP;
    511 	maxcode = MAXCODE(n_bits = INIT_BITS);
    512 	free_ent = (block_compress? FIRST: 256);
    513 
    514 	ent = getchar ();
    515 
    516 	hshift = 0;
    517 	for (fcode = (long)hsize;  fcode < 65536L; fcode *= 2)
    518 		hshift++;
    519 	hshift = 8 - hshift;		/* set hash code range bound */
    520 
    521 	hsize_reg = hsize;
    522 	cl_hash( (count_int) hsize_reg);		/* clear hash table */
    523 
    524 	while ((c = getchar()) != EOF) {
    525 		in_count++;
    526 		fcode = (long) (((long) c << maxbits) + ent);
    527 	 	i = ((c << hshift) ^ ent);	/* xor hashing */
    528 
    529 		if (htabof (i) == fcode) {
    530 			ent = codetabof(i);
    531 			continue;
    532 		} else if ((long)htabof(i) < 0 )	/* empty slot */
    533 			goto nomatch;
    534 	 	disp = hsize_reg - i;	/* secondary hash (after G. Knott) */
    535 		if (i == 0)
    536 			disp = 1;
    537 probe:
    538 		if ((i -= disp) < 0)
    539 			i += hsize_reg;
    540 
    541 		if (htabof (i) == fcode) {
    542 			ent = codetabof(i);
    543 			continue;
    544 		}
    545 		if ((long)htabof(i) > 0)
    546 			goto probe;
    547 nomatch:
    548 		output((code_int)ent);
    549 		out_count++;
    550 	 	ent = c;
    551 		if (free_ent < maxmaxcode) {
    552 	 		codetabof(i) = free_ent++;	/* code -> hashtable */
    553 			htabof(i) = fcode;
    554 		} else if ((count_int)in_count >= checkpoint && block_compress)
    555 			cl_block ();
    556 	}
    557 	/*
    558 	* Put out the final code.
    559 	*/
    560 	output( (code_int)ent );
    561 	out_count++;
    562 	output( (code_int)-1 );
    563 
    564 	/*
    565 	* Print out stats on stderr
    566 	*/
    567 	if(zcat_flg == 0 && !quiet) {
    568 #ifdef DEBUG
    569 	fprintf( stderr,
    570 		"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
    571 		in_count, out_count, bytes_out );
    572 	prratio( stderr, in_count, bytes_out );
    573 	fprintf( stderr, "\n");
    574 	fprintf( stderr, "\tCompression as in compact: " );
    575 	prratio( stderr, in_count-bytes_out, in_count );
    576 	fprintf( stderr, "\n");
    577 	fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
    578 		free_ent - 1, n_bits );
    579 #else /* !DEBUG */
    580 	fprintf( stderr, "Compression: " );
    581 	prratio( stderr, in_count-bytes_out, in_count );
    582 #endif /* DEBUG */
    583 	}
    584 	if(bytes_out > in_count)	/* exit(2) if no savings */
    585 		exit_stat = 2;
    586 }
    587 
    588 /*
    589  * TAG( output )
    590  *
    591  * Output the given code.
    592  * Inputs:
    593  * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
    594  *		that n_bits =< (long)wordsize - 1.
    595  * Outputs:
    596  * 	Outputs code to the file.
    597  * Assumptions:
    598  *	Chars are 8 bits long.
    599  * Algorithm:
    600  * 	Maintain a BITS character long buffer (so that 8 codes will
    601  * fit in it exactly).  When the buffer fills up empty it and start over.
    602  */
    603 
    604 static char buf[BITS];
    605 
    606 uchar lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
    607 uchar rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
    608 
    609 void
    610 output( code )
    611 code_int  code;
    612 {
    613 #ifdef DEBUG
    614 	static int col = 0;
    615 #endif
    616 	int r_off = offset, bits= n_bits;
    617 	char *bp = buf;
    618 
    619 #ifdef DEBUG
    620 	if (verbose)
    621 		fprintf(stderr, "%5d%c", code,
    622 			(col+=6) >= 74? (col = 0, '\n'): ' ');
    623 #endif
    624 	if (code >= 0) {
    625 		/*
    626 		 * byte/bit numbering on the VAX is simulated by the
    627 		 * following code
    628 		 */
    629 		/*
    630 		 * Get to the first byte.
    631 		 */
    632 		bp += (r_off >> 3);
    633 		r_off &= 7;
    634 		/*
    635 		 * Since code is always >= 8 bits, only need to mask the first
    636 		 * hunk on the left.
    637 		 */
    638 		*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
    639 		bp++;
    640 		bits -=  8 - r_off;
    641 		code >>= 8 - r_off;
    642 		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
    643 		if ( bits >= 8 ) {
    644 			*bp++ = code;
    645 			code >>= 8;
    646 			bits -= 8;
    647 		}
    648 		/* Last bits. */
    649 		if(bits)
    650 			*bp = code;
    651 
    652 		offset += n_bits;
    653 		if ( offset == (n_bits << 3) ) {
    654 			bp = buf;
    655 			bits = n_bits;
    656 			bytes_out += bits;
    657 			do {
    658 				putchar(*bp++);
    659 			} while(--bits);
    660 			offset = 0;
    661 		}
    662 
    663 		/*
    664 		 * If the next entry is going to be too big for the code size,
    665 		 * then increase it, if possible.
    666 		 */
    667 		if ( free_ent > maxcode || (clear_flg > 0)) {
    668 			/*
    669 			* Write the whole buffer, because the input side won't
    670 			* discover the size increase until after it has read it.
    671 			*/
    672 			if ( offset > 0 ) {
    673 				if( fwrite( buf, 1, n_bits, stdout ) != n_bits)
    674 					writeerr();
    675 				bytes_out += n_bits;
    676 			}
    677 			offset = 0;
    678 
    679 			if ( clear_flg ) {
    680 				maxcode = MAXCODE (n_bits = INIT_BITS);
    681 				clear_flg = 0;
    682 			} else {
    683 				n_bits++;
    684 				if ( n_bits == maxbits )
    685 					maxcode = maxmaxcode;
    686 				else
    687 					maxcode = MAXCODE(n_bits);
    688 			}
    689 #ifdef DEBUG
    690 			if ( debug ) {
    691 				fprintf(stderr,
    692 					"\nChange to %d bits\n", n_bits);
    693 				col = 0;
    694 			}
    695 #endif
    696 		}
    697 	} else {
    698 		/*
    699 		 * At EOF, write the rest of the buffer.
    700 		 */
    701 		if ( offset > 0 )
    702 			fwrite( buf, 1, (offset + 7) / 8, stdout );
    703 		bytes_out += (offset + 7) / 8;
    704 		offset = 0;
    705 		fflush( stdout );
    706 #ifdef DEBUG
    707 		if ( verbose )
    708 			fprintf( stderr, "\n" );
    709 #endif
    710 		if( ferror( stdout ) )
    711 			writeerr();
    712 	}
    713 }
    714 
    715 /*
    716  * Decompress stdin to stdout.  This routine adapts to the codes in the
    717  * file building the "string" table on-the-fly; requiring no table to
    718  * be stored in the compressed file.  The tables used herein are shared
    719  * with those of the compress() routine.  See the definitions above.
    720  */
    721 void
    722 decompress(void)
    723 {
    724 	int finchar;
    725 	code_int code, oldcode, incode;
    726 	uchar *stackp;
    727 
    728 	/*
    729 	* As above, initialize the first 256 entries in the table.
    730 	*/
    731 	maxcode = MAXCODE(n_bits = INIT_BITS);
    732 	for (code = 255; code >= 0; code--) {
    733 		tab_prefixof(code) = 0;
    734 		tab_suffixof(code) = (uchar)code;
    735 	}
    736 	free_ent = (block_compress? FIRST: 256);
    737 
    738 	finchar = oldcode = getcode();
    739 	if(oldcode == -1)		/* EOF already? */
    740 		return;			/* Get out of here */
    741 	putchar((char)finchar);		/* first code must be 8 bits = char */
    742 	if(ferror(stdout))		/* Crash if can't write */
    743 		writeerr();
    744 	stackp = de_stack;
    745 
    746 	while ((code = getcode()) > -1) {
    747 		if ((code == CLEAR) && block_compress) {
    748 			for (code = 255; code >= 0; code--)
    749 				tab_prefixof(code) = 0;
    750 			clear_flg = 1;
    751 			free_ent = FIRST - 1;
    752 			if ((code = getcode()) == -1)	/* O, untimely death! */
    753 				break;
    754 		}
    755 		incode = code;
    756 		/*
    757 		 * Special case for KwKwK string.
    758 		 */
    759 		if (code >= free_ent) {
    760 			*stackp++ = finchar;
    761 			code = oldcode;
    762 		}
    763 
    764 		/*
    765 		 * Generate output characters in reverse order
    766 		 */
    767 		while (code >= 256) {
    768 			*stackp++ = tab_suffixof(code);
    769 			code = tab_prefixof(code);
    770 		}
    771 		*stackp++ = finchar = tab_suffixof(code);
    772 
    773 		/*
    774 		 * And put them out in forward order
    775 		 */
    776 		do {
    777 			putchar(*--stackp);
    778 		} while (stackp > de_stack);
    779 
    780 		/*
    781 		 * Generate the new entry.
    782 		 */
    783 		if ( (code=free_ent) < maxmaxcode ) {
    784 			tab_prefixof(code) = (ushort)oldcode;
    785 			tab_suffixof(code) = finchar;
    786 			free_ent = code+1;
    787 		}
    788 		/*
    789 		 * Remember previous code.
    790 		 */
    791 		oldcode = incode;
    792 	}
    793 	fflush(stdout);
    794 	if(ferror(stdout))
    795 		writeerr();
    796 }
    797 
    798 /*
    799  * TAG( getcode )
    800  *
    801  * Read one code from the standard input.  If EOF, return -1.
    802  * Inputs:
    803  * 	stdin
    804  * Outputs:
    805  * 	code or -1 is returned.
    806  */
    807 code_int
    808 getcode()
    809 {
    810 	int r_off, bits;
    811 	code_int code;
    812 	static int offset = 0, size = 0;
    813 	static uchar buf[BITS];
    814 	uchar *bp = buf;
    815 
    816 	if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {
    817 		/*
    818 		 * If the next entry will be too big for the current code
    819 		 * size, then we must increase the size.  This implies reading
    820 		 * a new buffer full, too.
    821 		 */
    822 		if ( free_ent > maxcode ) {
    823 			n_bits++;
    824 			if ( n_bits == maxbits )
    825 				maxcode = maxmaxcode; /* won't get any bigger now */
    826 			else
    827 				maxcode = MAXCODE(n_bits);
    828 		}
    829 		if ( clear_flg > 0) {
    830 			maxcode = MAXCODE(n_bits = INIT_BITS);
    831 			clear_flg = 0;
    832 		}
    833 		size = fread(buf, 1, n_bits, stdin);
    834 		if (size <= 0)
    835 			return -1;			/* end of file */
    836 		offset = 0;
    837 		/* Round size down to integral number of codes */
    838 		size = (size << 3) - (n_bits - 1);
    839 	}
    840 	r_off = offset;
    841 	bits = n_bits;
    842 	/*
    843 	 * Get to the first byte.
    844 	 */
    845 	bp += (r_off >> 3);
    846 	r_off &= 7;
    847 	/* Get first part (low order bits) */
    848 	code = (*bp++ >> r_off);
    849 	bits -= (8 - r_off);
    850 	r_off = 8 - r_off;		/* now, offset into code word */
    851 	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
    852 	if (bits >= 8) {
    853 		code |= *bp++ << r_off;
    854 		r_off += 8;
    855 		bits -= 8;
    856 	}
    857 	/* high order bits. */
    858 	code |= (*bp & rmask[bits]) << r_off;
    859 	offset += n_bits;
    860 	return code;
    861 }
    862 
    863 #ifdef DEBUG
    864 printcodes()
    865 {
    866 	/*
    867 	* Just print out codes from input file.  For debugging.
    868 	*/
    869 	code_int code;
    870 	int col = 0, bits;
    871 
    872 	bits = n_bits = INIT_BITS;
    873 	maxcode = MAXCODE(n_bits);
    874 	free_ent = ((block_compress) ? FIRST : 256 );
    875 	while ( ( code = getcode() ) >= 0 ) {
    876 	if ( (code == CLEAR) && block_compress ) {
    877 			free_ent = FIRST - 1;
    878 			clear_flg = 1;
    879 	}
    880 	else if ( free_ent < maxmaxcode )
    881 		free_ent++;
    882 	if ( bits != n_bits ) {
    883 		fprintf(stderr, "\nChange to %d bits\n", n_bits );
    884 		bits = n_bits;
    885 		col = 0;
    886 	}
    887 	fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
    888 	}
    889 	putc( '\n', stderr );
    890 	exit( 0 );
    891 }
    892 
    893 code_int sorttab[1<<BITS];	/* sorted pointers into htab */
    894 
    895 #define STACK_SIZE	15000
    896 
    897 dump_tab()	/* dump string table */
    898 {
    899 	int i, first, c, ent;
    900 	int stack_top = STACK_SIZE;
    901 
    902 	if(do_decomp == 0) {	/* compressing */
    903 	int flag = 1;
    904 
    905 	for(i=0; i<hsize; i++) {	/* build sort pointers */
    906 		if((long)htabof(i) >= 0) {
    907 			sorttab[codetabof(i)] = i;
    908 		}
    909 	}
    910 	first = block_compress ? FIRST : 256;
    911 	for(i = first; i < free_ent; i++) {
    912 		fprintf(stderr, "%5d: \"", i);
    913 		de_stack[--stack_top] = '\n';
    914 		de_stack[--stack_top] = '"';
    915 		stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff,
    916 	stack_top);
    917 		for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1);
    918 			ent > 256;
    919 			ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) {
    920 			stack_top = in_stack(htabof(sorttab[ent]) >> maxbits,
    921 						stack_top);
    922 		}
    923 		stack_top = in_stack(ent, stack_top);
    924 		fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr);
    925 			stack_top = STACK_SIZE;
    926 	}
    927 	} else if(!debug) {	/* decompressing */
    928 
    929 	for ( i = 0; i < free_ent; i++ ) {
    930 		ent = i;
    931 		c = tab_suffixof(ent);
    932 		if ( isascii(c) && isprint(c) )
    933 		fprintf( stderr, "%5d: %5d/'%c'  \"",
    934 				ent, tab_prefixof(ent), c );
    935 		else
    936 		fprintf( stderr, "%5d: %5d/\\%03o \"",
    937 				ent, tab_prefixof(ent), c );
    938 		de_stack[--stack_top] = '\n';
    939 		de_stack[--stack_top] = '"';
    940 		for ( ; ent != NULL;
    941 			ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) {
    942 		stack_top = in_stack(tab_suffixof(ent), stack_top);
    943 		}
    944 		fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr );
    945 		stack_top = STACK_SIZE;
    946 	}
    947 	}
    948 }
    949 
    950 int
    951 in_stack(int c, int stack_top)
    952 {
    953 	if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) {
    954 		de_stack[--stack_top] = c;
    955 	} else {
    956 		switch( c ) {
    957 		case '\n': de_stack[--stack_top] = 'n'; break;
    958 		case '\t': de_stack[--stack_top] = 't'; break;
    959 		case '\b': de_stack[--stack_top] = 'b'; break;
    960 		case '\f': de_stack[--stack_top] = 'f'; break;
    961 		case '\r': de_stack[--stack_top] = 'r'; break;
    962 		case '\\': de_stack[--stack_top] = '\\'; break;
    963 		default:
    964 	 	de_stack[--stack_top] = '0' + c % 8;
    965 	 	de_stack[--stack_top] = '0' + (c / 8) % 8;
    966 	 	de_stack[--stack_top] = '0' + c / 64;
    967 	 	break;
    968 		}
    969 		de_stack[--stack_top] = '\\';
    970 	}
    971 	return stack_top;
    972 }
    973 #endif /* DEBUG */
    974 
    975 void
    976 writeerr(void)
    977 {
    978 	perror(ofname);
    979 	unlink(ofname);
    980 	exit(1);
    981 }
    982 
    983 void
    984 copystat(ifname, ofname)
    985 char *ifname, *ofname;
    986 {
    987 	int mode;
    988 	time_t timep[2];			/* should be struct utimbuf */
    989 	struct stat statbuf;
    990 
    991 	fclose(stdout);
    992 	if (stat(ifname, &statbuf)) {		/* Get stat on input file */
    993 		perror(ifname);
    994 		return;
    995 	}
    996 	if (!S_ISREG(statbuf.st_mode)) {
    997 		if (quiet)
    998 			fprintf(stderr, "%s: ", ifname);
    999 		fprintf(stderr, " -- not a regular file: unchanged");
   1000 		exit_stat = 1;
   1001 	} else if (exit_stat == 2 && !force) {
   1002 		/* No compression: remove file.Z */
   1003 		if (!quiet)
   1004 			fprintf(stderr, " -- file unchanged");
   1005 	} else {			/* Successful Compression */
   1006 		exit_stat = 0;
   1007 		mode = statbuf.st_mode & 0777;
   1008 		if (chmod(ofname, mode))		/* Copy modes */
   1009 			perror(ofname);
   1010 		/* Copy ownership */
   1011 		chown(ofname, statbuf.st_uid, statbuf.st_gid);
   1012 		timep[0] = statbuf.st_atime;
   1013 		timep[1] = statbuf.st_mtime;
   1014 		/* Update last accessed and modified times */
   1015 		utime(ofname, (struct utimbuf *)timep);
   1016 //		if (unlink(ifname))	/* Remove input file */
   1017 //			perror(ifname);
   1018 		return;			/* success */
   1019 	}
   1020 
   1021 	/* Unsuccessful return -- one of the tests failed */
   1022 	if (unlink(ofname))
   1023 		perror(ofname);
   1024 }
   1025 
   1026 /*
   1027  * This routine returns 1 if we are running in the foreground and stderr
   1028  * is a tty.
   1029  */
   1030 int
   1031 foreground(void)
   1032 {
   1033 	if(bgnd_flag)			/* background? */
   1034 		return 0;
   1035 	else				/* foreground */
   1036 		return isatty(2);	/* and stderr is a tty */
   1037 }
   1038 
   1039 void
   1040 onintr(int x)
   1041 {
   1042 	USED(x);
   1043 	unlink(ofname);
   1044 	exit(1);
   1045 }
   1046 
   1047 void
   1048 oops(int x)		/* wild pointer -- assume bad input */
   1049 {
   1050 	USED(x);
   1051 	if (do_decomp == 1)
   1052 		fprintf(stderr, "uncompress: corrupt input\n");
   1053 	unlink(ofname);
   1054 	exit(1);
   1055 }
   1056 
   1057 void
   1058 cl_block(void)		/* table clear for block compress */
   1059 {
   1060 	long rat;
   1061 
   1062 	checkpoint = in_count + CHECK_GAP;
   1063 #ifdef DEBUG
   1064 	if ( debug ) {
   1065 		fprintf ( stderr, "count: %ld, ratio: ", in_count );
   1066 		prratio ( stderr, in_count, bytes_out );
   1067 		fprintf ( stderr, "\n");
   1068 	}
   1069 #endif /* DEBUG */
   1070 
   1071 	if (in_count > 0x007fffff) {	/* shift will overflow */
   1072 		rat = bytes_out >> 8;
   1073 		if (rat == 0)		/* Don't divide by zero */
   1074 			rat = 0x7fffffff;
   1075 		else
   1076 			rat = in_count / rat;
   1077 	} else
   1078 		rat = (in_count << 8) / bytes_out;	/* 8 fractional bits */
   1079 	if (rat > ratio)
   1080 		ratio = rat;
   1081 	else {
   1082 		ratio = 0;
   1083 #ifdef DEBUG
   1084 		if (verbose)
   1085 			dump_tab();	/* dump string table */
   1086 #endif
   1087 		cl_hash((count_int)hsize);
   1088 		free_ent = FIRST;
   1089 		clear_flg = 1;
   1090 		output((code_int)CLEAR);
   1091 #ifdef DEBUG
   1092 		if (debug)
   1093 			fprintf(stderr, "clear\n");
   1094 #endif /* DEBUG */
   1095 	}
   1096 }
   1097 
   1098 void
   1099 cl_hash(count_int hsize)		/* reset code table */
   1100 {
   1101 	count_int *htab_p = htab+hsize;
   1102 	long i;
   1103 	long m1 = -1;
   1104 
   1105 	i = hsize - 16;
   1106  	do {				/* might use Sys V memset(3) here */
   1107 		*(htab_p-16) = m1;
   1108 		*(htab_p-15) = m1;
   1109 		*(htab_p-14) = m1;
   1110 		*(htab_p-13) = m1;
   1111 		*(htab_p-12) = m1;
   1112 		*(htab_p-11) = m1;
   1113 		*(htab_p-10) = m1;
   1114 		*(htab_p-9) = m1;
   1115 		*(htab_p-8) = m1;
   1116 		*(htab_p-7) = m1;
   1117 		*(htab_p-6) = m1;
   1118 		*(htab_p-5) = m1;
   1119 		*(htab_p-4) = m1;
   1120 		*(htab_p-3) = m1;
   1121 		*(htab_p-2) = m1;
   1122 		*(htab_p-1) = m1;
   1123 		htab_p -= 16;
   1124 	} while ((i -= 16) >= 0);
   1125 	for ( i += 16; i > 0; i-- )
   1126 		*--htab_p = m1;
   1127 }
   1128 
   1129 void
   1130 prratio(stream, num, den)
   1131 FILE *stream;
   1132 long num, den;
   1133 {
   1134 	int q;				/* Doesn't need to be long */
   1135 
   1136 	if(num > 214748L)		/* 2147483647/10000 */
   1137 		q = num / (den / 10000L);
   1138 	else
   1139 		q = 10000L * num / den;	/* Long calculations, though */
   1140 	if (q < 0) {
   1141 		putc('-', stream);
   1142 		q = -q;
   1143 	}
   1144 	fprintf(stream, "%d.%02d%%", q / 100, q % 100);
   1145 }
   1146 
   1147 void
   1148 version(void)
   1149 {
   1150 	fprintf(stderr, "%s\n", rcs_ident);
   1151 	fprintf(stderr, "Options: ");
   1152 #ifdef DEBUG
   1153 	fprintf(stderr, "DEBUG, ");
   1154 #endif
   1155 #ifdef BSD4_2
   1156 	fprintf(stderr, "BSD4_2, ");
   1157 #endif
   1158 	fprintf(stderr, "BITS = %d\n", BITS);
   1159 }
   1160 
   1161 /*
   1162  * The revision-history novel:
   1163  *
   1164  * $Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $
   1165  * $Log:	compress.c,v $
   1166  * Revision 4.0  85/07/30  12:50:00  joe
   1167  * Removed ferror() calls in output routine on every output except first.
   1168  * Prepared for release to the world.
   1169  *
   1170  * Revision 3.6  85/07/04  01:22:21  joe
   1171  * Remove much wasted storage by overlaying hash table with the tables
   1172  * used by decompress: tab_suffix[1<<BITS], stack[8000].  Updated USERMEM
   1173  * computations.  Fixed dump_tab() DEBUG routine.
   1174  *
   1175  * Revision 3.5  85/06/30  20:47:21  jaw
   1176  * Change hash function to use exclusive-or.  Rip out hash cache.  These
   1177  * speedups render the megamemory version defunct, for now.  Make decoder
   1178  * stack global.  Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.
   1179  *
   1180  * Revision 3.4  85/06/27  12:00:00  ken
   1181  * Get rid of all floating-point calculations by doing all compression ratio
   1182  * calculations in fixed point.
   1183  *
   1184  * Revision 3.3  85/06/24  21:53:24  joe
   1185  * Incorporate portability suggestion for M_XENIX.  Got rid of text on #else
   1186  * and #endif lines.  Cleaned up #ifdefs for vax and interdata.
   1187  *
   1188  * Revision 3.2  85/06/06  21:53:24  jaw
   1189  * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
   1190  * Default to "quiet" output (no compression statistics).
   1191  *
   1192  * Revision 3.1  85/05/12  18:56:13  jaw
   1193  * Integrate decompress() stack speedups (from early pointer mods by McKie).
   1194  * Repair multi-file USERMEM gaffe.  Unify 'force' flags to mimic semantics
   1195  * of SVR2 'pack'.  Streamline block-compress table clear logic.  Increase
   1196  * output byte count by magic number size.
   1197  *
   1198  * Revision 3.0	84/11/27  11:50:00  petsd!joe
   1199  * Set HSIZE depending on BITS.  Set BITS depending on USERMEM.  Unrolled
   1200  * loops in clear routines.  Added "-C" flag for 2.0 compatibility.  Used
   1201  * unsigned compares on Perkin-Elmer.  Fixed foreground check.
   1202  *
   1203  * Revision 2.7	84/11/16  19:35:39  ames!jaw
   1204  * Cache common hash codes based on input statistics; this improves
   1205  * performance for low-density raster images.  Pass on #ifdef bundle
   1206  * from Turkowski.
   1207  *
   1208  * Revision 2.6	84/11/05  19:18:21  ames!jaw
   1209  * Vary size of hash tables to reduce time for small files.
   1210  * Tune PDP-11 hash function.
   1211  *
   1212  * Revision 2.5	84/10/30  20:15:14  ames!jaw
   1213  * Junk chaining; replace with the simpler (and, on the VAX, faster)
   1214  * double hashing, discussed within.  Make block compression standard.
   1215  *
   1216  * Revision 2.4	84/10/16  11:11:11  ames!jaw
   1217  * Introduce adaptive reset for block compression, to boost the rate
   1218  * another several percent.  (See mailing list notes.)
   1219  *
   1220  * Revision 2.3	84/09/22  22:00:00  petsd!joe
   1221  * Implemented "-B" block compress.  Implemented REVERSE sorting of tab_next.
   1222  * Bug fix for last bits.  Changed fwrite to putchar loop everywhere.
   1223  *
   1224  * Revision 2.2	84/09/18  14:12:21  ames!jaw
   1225  * Fold in news changes, small machine typedef from thomas,
   1226  * #ifdef interdata from joe.
   1227  *
   1228  * Revision 2.1	84/09/10  12:34:56  ames!jaw
   1229  * Configured fast table lookup for 32-bit machines.
   1230  * This cuts user time in half for b <= FBITS, and is useful for news batching
   1231  * from VAX to PDP sites.  Also sped up decompress() [fwrite->putc] and
   1232  * added signal catcher [plus beef in writeerr()] to delete effluvia.
   1233  *
   1234  * Revision 2.0	84/08/28  22:00:00  petsd!joe
   1235  * Add check for foreground before prompting user.  Insert maxbits into
   1236  * compressed file.  Force file being uncompressed to end with ".Z".
   1237  * Added "-c" flag and "zcat".  Prepared for release.
   1238  *
   1239  * Revision 1.10  84/08/24  18:28:00  turtlevax!ken
   1240  * Will only compress regular files (no directories), added a magic number
   1241  * header (plus an undocumented -n flag to handle old files without headers),
   1242  * added -f flag to force overwriting of possibly existing destination file,
   1243  * otherwise the user is prompted for a response.  Will tack on a .Z to a
   1244  * filename if it doesn't have one when decompressing.  Will only replace
   1245  * file if it was compressed.
   1246  *
   1247  * Revision 1.9  84/08/16  17:28:00  turtlevax!ken
   1248  * Removed scanargs(), getopt(), added .Z extension and unlimited number of
   1249  * filenames to compress.  Flags may be clustered (-Ddvb12) or separated
   1250  * (-D -d -v -b 12), or combination thereof.  Modes and other status is
   1251  * copied with copystat().  -O bug for 4.2 seems to have disappeared with
   1252  * 1.8.
   1253  *
   1254  * Revision 1.8  84/08/09  23:15:00  joe
   1255  * Made it compatible with vax version, installed jim's fixes/enhancements
   1256  *
   1257  * Revision 1.6  84/08/01  22:08:00  joe
   1258  * Sped up algorithm significantly by sorting the compress chain.
   1259  *
   1260  * Revision 1.5  84/07/13  13:11:00  srd
   1261  * Added C version of vax asm routines.  Changed structure to arrays to
   1262  * save much memory.  Do unsigned compares where possible (faster on
   1263  * Perkin-Elmer)
   1264  *
   1265  * Revision 1.4  84/07/05  03:11:11  thomas
   1266  * Clean up the code a little and lint it.  (Lint complains about all
   1267  * the regs used in the asm, but I'm not going to "fix" this.)
   1268  *
   1269  * Revision 1.3  84/07/05  02:06:54  thomas
   1270  * Minor fixes.
   1271  *
   1272  * Revision 1.2  84/07/05  00:27:27  thomas
   1273  * Add variable bit length output.
   1274  */