plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

postreverse.c (16701B)


      1 /*
      2  *
      3  * postreverse - reverse the page order in certain PostScript files.
      4  *
      5  * Page reversal relies on being able to locate sections of a document using file
      6  * structuring comments defined by Adobe (ie. the 1.0 and now 2.0 conventions) and
      7  * a few I've added. Among other things a minimally conforming document, according
      8  * to the 1.0 conventions,
      9  *
     10  *	1) Marks the end of the prologue with an %%EndProlog comment.
     11  *
     12  *	2) Starts each page with a %%Page: comment.
     13  *
     14  *	3) Marks the end of all the pages %%Trailer comment.
     15  *
     16  *	4) Obeys page independence (ie. pages can be arbitrarily rearranged).
     17  *
     18  * The most important change (at least for this program) that Adobe made in going
     19  * from the 1.0 to the 2.0 structuring conventions was in the prologue. They now
     20  * say the prologue should only define things, and the global initialization that
     21  * was in the prologue (1.0 conventions) should now come after the %%EndProlog
     22  * comment but before the first %%Page: comment and be bracketed by %%BeginSetup
     23  * and %%EndSetup comments. So a document that conforms to Adobe's 2.0 conventions,
     24  *
     25  *	1) Marks the end of the prologue (only definitions) with %%EndProlog.
     26  *
     27  *	2) Brackets global initialization with %%BeginSetup and %%EndSetup comments
     28  *	   which come after the prologue but before the first %Page: comment.
     29  *
     30  *	3) Starts each page with a %%Page: comment.
     31  *
     32  *	4) Marks the end of all the pages with a %%Trailer comment.
     33  *
     34  *	5) Obeys page independence.
     35  *
     36  * postreverse can handle documents that follow the 1.0 or 2.0 conventions, but has
     37  * also been extended slightly so it works properly with the translators (primarily
     38  * dpost) supplied with this package. The page independence requirement has been
     39  * relaxed some. In particular definitions exported to the global environment from
     40  * within a page should be bracketed by %%BeginGlobal and %%EndGlobal comments.
     41  * postreverse pulls them out of each page and inserts them in the setup section
     42  * of the document, immediately before it writes the %%EndProlog (for version 1.0)
     43  * or %%EndSetup (for version 2.0) comments.
     44  *
     45  * In addition postreverse accepts documents that choose to mark the end of each
     46  * page with a %%EndPage: comment, which from a translator's point of view is often
     47  * a more natural approach. Both page boundary comments (ie. Page: and %%EndPage:)
     48  * are also accepted, but be warned that everything between consecutive %%EndPage:
     49  * and %%Page: comments will be ignored.
     50  *
     51  * So a document that will reverse properly with postreverse,
     52  *
     53  *	1) Marks the end of the prologue with %%EndProlog.
     54  *
     55  *	2) May have a %%BeginSetup/%%EndSetup comment pair before the first %%Page:
     56  *	   comment that brackets any global initialization.
     57  *
     58  *	3) Marks the start of each page with a %%Page: comment, or the end of each
     59  *	   page with a %%EndPage: comment. Both page boundary comments are allowed.
     60  *
     61  *	4) Marks the end of all the pages with a %%Trailer comment.
     62  *
     63  *	5) Obeys page independence or violates it to a rather limited extent and
     64  *	   marks the violations with %%BeginGlobal and %%EndGlobal comments.
     65  *
     66  * If no file arguments are given postreverse copies stdin to a temporary file and
     67  * then processes that file. That means the input is read three times (rather than
     68  * two) whenever we handle stdin. That's expensive, and shouldn't be too difficult
     69  * to fix, but I haven't gotten around to it yet.
     70  *
     71  */
     72 
     73 #include <stdio.h>
     74 #include <signal.h>
     75 #include <sys/types.h>
     76 #include <fcntl.h>
     77 
     78 #include "comments.h"			/* PostScript file structuring comments */
     79 #include "gen.h"			/* general purpose definitions */
     80 #include "path.h"			/* for temporary directory */
     81 #include "ext.h"			/* external variable declarations */
     82 #include "postreverse.h"		/* a few special definitions */
     83 
     84 int	page = 1;			/* current page number */
     85 int	forms = 1;			/* forms per page in the input file */
     86 
     87 char	*temp_dir = TEMPDIR;		/* temp directory for copying stdin */
     88 
     89 Pages	pages[1000];			/* byte offsets for all pages */
     90 int	next_page = 0;			/* next page goes here */
     91 long	start;				/* starting offset for next page */
     92 long	endoff = -1;			/* offset where TRAILER was found */
     93 int	noreverse = FALSE;		/* don't reverse pages if TRUE */
     94 char	*endprolog = ENDPROLOG;		/* occasionally changed to ENDSETUP */
     95 
     96 double	version = 3.3;			/* of the input file */
     97 int	ignoreversion = FALSE;		/* ignore possible forms.ps problems */
     98 
     99 char	buf[2048];			/* line buffer for input file */
    100 
    101 FILE	*fp_in;				/* stuff is read from this file */
    102 FILE	*fp_out;			/* and written here */
    103 
    104 /*****************************************************************************/
    105 
    106 main(agc, agv)
    107 
    108     int		agc;
    109     char	*agv[];
    110 
    111 {
    112 
    113 /*
    114  *
    115  * A simple program that reverses the pages in specially formatted PostScript
    116  * files. Will work with all the translators in this package, and should handle
    117  * any document that conforms to Adobe's version 1.0 or 2.0 file structuring
    118  * conventions. Only one input file is allowed, and it can either be a named (on
    119  * the command line) file or stdin.
    120  *
    121  */
    122 
    123     argc = agc;				/* other routines may want them */
    124     argv = agv;
    125 
    126     prog_name = argv[0];		/* just for error messages */
    127 
    128     fp_in = stdin;
    129     fp_out = stdout;
    130 
    131     init_signals();			/* sets up interrupt handling */
    132     options();				/* first get command line options */
    133     arguments();			/* then process non-option arguments */
    134     done();				/* and clean things up */
    135 
    136     exit(x_stat);			/* not much could be wrong */
    137 
    138 }   /* End of main */
    139 
    140 /*****************************************************************************/
    141 
    142 init_signals()
    143 
    144 {
    145 
    146 /*
    147  *
    148  * Makes sure we handle interrupts properly.
    149  *
    150  */
    151 
    152     if ( signal(SIGINT, interrupt) == SIG_IGN )  {
    153 	signal(SIGINT, SIG_IGN);
    154 	signal(SIGQUIT, SIG_IGN);
    155 	signal(SIGHUP, SIG_IGN);
    156     } else {
    157 	signal(SIGHUP, interrupt);
    158 	signal(SIGQUIT, interrupt);
    159     }   /* End else */
    160 
    161     signal(SIGTERM, interrupt);
    162 
    163 }   /* End of init_signals */
    164 
    165 /*****************************************************************************/
    166 
    167 options()
    168 
    169 {
    170 
    171     int		ch;			/* return value from getopt() */
    172     char	*optnames = "n:o:rvT:DI";
    173 
    174     extern char	*optarg;		/* used by getopt() */
    175     extern int	optind;
    176 
    177 /*
    178  *
    179  * Reads and processes the command line options. The -r option (ie. the one that
    180  * turns page reversal off) is really only useful if you want to take dpost output
    181  * and produce a page independent output file. In that case global definitions
    182  * made within pages and bracketed by %%BeginGlobal/%%EndGlobal comments will be
    183  * moved into the prologue or setup section of the document.
    184  *
    185  */
    186 
    187     while ( (ch = getopt(argc, argv, optnames)) != EOF )  {
    188 	switch ( ch )  {
    189 	    case 'n':			/* forms per page */
    190 		    if ( (forms = atoi(optarg)) <= 0 )
    191 			error(FATAL, "illegal forms request %s", optarg);
    192 		    break;
    193 
    194 	    case 'o':			/* output page list */
    195 		    out_list(optarg);
    196 		    break;
    197 
    198 	    case 'r':			/* don't reverse the pages */
    199 		    noreverse = TRUE;
    200 		    break;
    201 
    202 	    case 'v':			/* ignore possible forms.ps problems */
    203 		    ignoreversion = TRUE;
    204 		    break;
    205 
    206 	    case 'T':			/* temporary file directory */
    207 		    temp_dir = optarg;
    208 		    break;
    209 
    210 	    case 'D':			/* debug flag */
    211 		    debug = ON;
    212 		    break;
    213 
    214 	    case 'I':			/* ignore FATAL errors */
    215 		    ignore = ON;
    216 		    break;
    217 
    218 	    case '?':			/* don't understand the option */
    219 		    error(FATAL, "");
    220 		    break;
    221 
    222 	    default:			/* don't know what to do for ch */
    223 		    error(FATAL, "missing case for option %c\n", ch);
    224 		    break;
    225 	}   /* End switch */
    226     }   /* End while */
    227 
    228     argc -= optind;			/* get ready for non-option args */
    229     argv += optind;
    230 
    231 }   /* End of options */
    232 
    233 /*****************************************************************************/
    234 
    235 arguments()
    236 
    237 {
    238 
    239     char	*name;			/* name of the input file */
    240 
    241 /*
    242  *
    243  * postreverse only handles one input file at a time, so if there's more than one
    244  * argument left when we get here we'll quit. If none remain we copy stdin to a
    245  * temporary file and process that file.
    246  *
    247  */
    248 
    249     if ( argc > 1 )			/* can't handle more than one file */
    250 	error(FATAL, "too many arguments");
    251 
    252     if ( argc == 0 )			/* copy stdin to a temporary file */
    253 	name = copystdin();
    254     else name = *argv;
    255 
    256     if ( (fp_in = fopen(name, "r")) == NULL )
    257 	error(FATAL, "can't open %s", name);
    258 
    259     reverse();
    260 
    261 }   /* End of arguments */
    262 
    263 /*****************************************************************************/
    264 
    265 done()
    266 
    267 {
    268 
    269 /*
    270  *
    271  * Cleans things up after we've finished reversing the pages in the input file.
    272  * All that's really left to do is remove the temp file, provided we used one.
    273  *
    274  */
    275 
    276     if ( temp_file != NULL )
    277 	unlink(temp_file);
    278 
    279 }   /* End of done */
    280 
    281 /*****************************************************************************/
    282 
    283 char *copystdin()
    284 
    285 {
    286 
    287     int		fd_out;			/* for the temporary file */
    288     int		fd_in;			/* for stdin */
    289     int		count;			/* number of bytes put in buf[] */
    290 
    291 /*
    292  *
    293  * Copies stdin to a temporary file and returns the pathname of that file to the
    294  * caller. It's an expensive way of doing things, because it means we end up
    295  * reading the input file three times - rather than just twice. Could probably be
    296  * fixed by creating the temporary file on the fly as we read the file the first
    297  * time.
    298  *
    299  */
    300 
    301     if ( (temp_file = tempnam(temp_dir, "post")) == NULL )
    302 	error(FATAL, "can't generate temp file name");
    303 
    304     if ( (fd_out = creat(temp_file, 0660)) == -1 )
    305 	error(FATAL, "can't open %s", temp_file);
    306 
    307     fd_in = fileno(stdin);
    308 
    309     while ( (count = read(fd_in, buf, sizeof(buf))) > 0 )
    310 	if ( write(fd_out, buf, count) != count )
    311 	    error(FATAL, "error writing to %s", temp_file);
    312 
    313     close(fd_out);
    314 
    315     return(temp_file);
    316 
    317 }   /* End of copystdin */
    318 
    319 /*****************************************************************************/
    320 
    321 reverse()
    322 
    323 {
    324 
    325 /*
    326  *
    327  * Begins by looking for the ENDPROLOG comment in the input file. Everything up to
    328  * that comment is copied to the output file. If the comment isn't found the entire
    329  * input file is copied and moreprolog() returns FALSE. Otherwise readpages() reads
    330  * the rest of the input file and remembers (in pages[]) where each page starts and
    331  * ends. In addition everything bracketed by %%BeginGlobal and %%EndGlobal comments
    332  * is immediately added to the new prologue (or setup section) and ends up being
    333  * removed from the individual pages. When readpages() finds the TRAILER comment
    334  * or gets to the end of the input file we go back to the pages[] array and use
    335  * the saved offsets to write the pages out in reverse order. Finally everything
    336  * from the TRAILER comment to the end of the input file is copied to the output
    337  * file.
    338  *
    339  */
    340 
    341     if ( moreprolog(ENDPROLOG) == TRUE )  {
    342 	readpages();
    343 	writepages();
    344 	trailer();
    345     }	/* End if */
    346 
    347 }   /* End of reverse */
    348 
    349 /*****************************************************************************/
    350 
    351 moreprolog(str)
    352 
    353     char	*str;			/* copy everything up to this string */
    354 
    355 {
    356 
    357     int		len;			/* length of FORMSPERPAGE string */
    358     int		vlen;			/* length of VERSION string */
    359 
    360 /*
    361  *
    362  * Looks for string *str at the start of a line and copies everything up to that
    363  * string to the output file. If *str isn't found the entire input file will end
    364  * up being copied to the output file and FALSE will be returned to the caller.
    365  * The first call (made from reverse()) looks for ENDPROLOG. Any other call comes
    366  * from readpages() and will be looking for the ENDSETUP comment.
    367  *
    368  */
    369 
    370     len = strlen(FORMSPERPAGE);
    371     vlen = strlen(VERSION);
    372 
    373     while ( fgets(buf, sizeof(buf), fp_in) != NULL )  {
    374 	if ( strcmp(buf, str) == 0 )
    375 	    return(TRUE);
    376 	else if ( strncmp(buf, FORMSPERPAGE, len) == 0 )
    377 	    forms = atoi(&buf[len+1]);
    378 	else if ( strncmp(buf, VERSION, vlen) == 0 )
    379 	    version = atof(&buf[vlen+1]);
    380 	fprintf(fp_out, "%s", buf);
    381     }	/* End while */
    382 
    383     return(FALSE);
    384 
    385 }   /* End of moreprolog */
    386 
    387 /*****************************************************************************/
    388 
    389 readpages()
    390 
    391 {
    392 
    393     int		endpagelen;		/* length of ENDPAGE */
    394     int		pagelen;		/* and PAGE strings */
    395     int		sawendpage = TRUE;	/* ENDPAGE equivalent marked last page */
    396     int		gotpage = FALSE;	/* TRUE disables BEGINSETUP stuff */
    397 
    398 /*
    399  *
    400  * Records starting and ending positions of the requested pages (usually all of
    401  * them), puts global definitions in the prologue, and remembers where the TRAILER
    402  * was found.
    403  *
    404  * Page boundaries are marked by the strings PAGE, ENDPAGE, or perhaps both.
    405  * Application programs will normally find one or the other more convenient, so
    406  * in most cases only one kind of page delimiter will be found in a particular
    407  * document.
    408  *
    409  */
    410 
    411     pages[0].start = ftell(fp_in);	/* first page starts after ENDPROLOG */
    412     endprolog = ENDPROLOG;
    413 
    414     endpagelen = strlen(ENDPAGE);
    415     pagelen = strlen(PAGE);
    416 
    417     while ( fgets(buf, sizeof(buf), fp_in) != NULL )
    418 	if ( buf[0] != '%' )
    419 	    continue;
    420 	else if ( strncmp(buf, ENDPAGE, endpagelen) == 0 )  {
    421 	    if ( in_olist(page++) == ON )  {
    422 		pages[next_page].empty = FALSE;
    423 		pages[next_page++].stop = ftell(fp_in);
    424 	    }	/* End if */
    425 	    pages[next_page].start = ftell(fp_in);
    426 	    sawendpage = TRUE;
    427 	    gotpage = TRUE;
    428 	} else if ( strncmp(buf, PAGE, pagelen) == 0 )  {
    429 	    if ( sawendpage == FALSE && in_olist(page++) == ON )  {
    430 		pages[next_page].empty = FALSE;
    431 		pages[next_page++].stop = ftell(fp_in) - strlen(buf);
    432 	    }	/* End if */
    433 	    pages[next_page].start = ftell(fp_in) - strlen(buf);
    434 	    sawendpage = FALSE;
    435 	    gotpage = TRUE;
    436 	} else if ( gotpage == FALSE && strcmp(buf, BEGINSETUP) == 0 )  {
    437 	    fprintf(fp_out, "%s", endprolog);
    438 	    fprintf(fp_out, "%s", BEGINSETUP);
    439 	    moreprolog(ENDSETUP);
    440 	    endprolog = ENDSETUP;
    441 	} else if ( strcmp(buf, BEGINGLOBAL) == 0 )  {
    442 	    moreprolog(ENDGLOBAL);
    443 	} else if ( strcmp(buf, TRAILER) == 0 )  {
    444 	    if ( sawendpage == FALSE )
    445 		pages[next_page++].stop = ftell(fp_in) - strlen(buf);
    446 	    endoff = ftell(fp_in);
    447 	    break;
    448 	}   /* End if */
    449 
    450 }   /* End of readpages */
    451 
    452 /*****************************************************************************/
    453 
    454 writepages()
    455 
    456 {
    457 
    458     int		i, j, k;		/* loop indices */
    459 
    460 /*
    461  *
    462  * Goes through the pages[] array, usually from the bottom up, and writes out all
    463  * the pages. Documents that print more than one form per page cause things to get
    464  * a little more complicated. Each physical page has to have its subpages printed
    465  * in the correct order, and we have to build a few dummy subpages for the last
    466  * (and now first) sheet of paper, otherwise things will only occasionally work.
    467  *
    468  */
    469 
    470     fprintf(fp_out, "%s", endprolog);
    471 
    472     if ( noreverse == FALSE )		/* fill out the first page */
    473 	for ( i = (forms - next_page % forms) % forms; i > 0; i--, next_page++ )
    474 	    pages[next_page].empty = TRUE;
    475     else forms = next_page;		/* turns reversal off in next loop */
    476 
    477     for ( i = next_page - forms; i >= 0; i -= forms )
    478 	for ( j = i, k = 0; k < forms; j++, k++ )
    479 	    if ( pages[j].empty == TRUE ) {
    480 		if ( ignoreversion == TRUE || version > 3.1 ) {
    481 		    fprintf(fp_out, "%s 0 0\n", PAGE);
    482 		    fprintf(fp_out, "/saveobj save def\n");
    483 		    fprintf(fp_out, "showpage\n");
    484 		    fprintf(fp_out, "saveobj restore\n");
    485 		    fprintf(fp_out, "%s 0 0\n", ENDPAGE);
    486 		} else {
    487 		    fprintf(fp_out, "%s 0 0\n", PAGE);
    488 		    fprintf(fp_out, "save showpage restore\n");
    489 		    fprintf(fp_out, "%s 0 0\n", ENDPAGE);
    490 		}   /* End else */
    491 	    } else copypage(pages[j].start, pages[j].stop);
    492 
    493 }   /* End of writepages */
    494 
    495 /*****************************************************************************/
    496 
    497 copypage(start, stop)
    498 
    499     long	start;			/* starting from this offset */
    500     long	stop;			/* and ending here */
    501 
    502 {
    503 
    504 /*
    505  *
    506  * Copies the page beginning at offset start and ending at stop to the output
    507  * file. Global definitions are skipped since they've already been added to the
    508  * prologue.
    509  *
    510  */
    511 
    512     fseek(fp_in, start, 0);
    513 
    514     while ( ftell(fp_in) < stop && fgets(buf, sizeof(buf), fp_in) != NULL )
    515 	if ( buf[0] == '%' && strcmp(buf, BEGINGLOBAL) == 0 )
    516 	    while ( fgets(buf, sizeof(buf), fp_in) != NULL && strcmp(buf, ENDGLOBAL) != 0 ) ;
    517 	else fprintf(fp_out, "%s", buf);
    518 
    519 }   /* End of copypage */
    520 
    521 /*****************************************************************************/
    522 
    523 trailer()
    524 
    525 {
    526 
    527 /*
    528  *
    529  * Makes sure everything from the TRAILER string to EOF is copied to the output
    530  * file.
    531  *
    532  */
    533 
    534     if ( endoff > 0 )  {
    535 	fprintf(fp_out, "%s", TRAILER);
    536 	fseek(fp_in, endoff, 0);
    537 	while ( fgets(buf, sizeof(buf), fp_in) != NULL )
    538 	    fprintf(fp_out, "%s", buf);
    539     }	/* End if */
    540 
    541 }   /* End of trailer */
    542 
    543 /*****************************************************************************/