plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

pcollins.c (4498B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <bio.h>
      4 #include "dict.h"
      5 
      6 /*
      7  * Routines for handling dictionaries in the "Paperback Collins"
      8  * format (with tags surrounded by >....<)
      9  */
     10 enum {
     11 	Buflen=1000
     12 };
     13 
     14 /* More special runes */
     15 enum {
     16 	B = MULTIE+1,	/* bold */
     17 	H,		/* headword start */
     18 	I,		/* italics */
     19 	Ps,		/* pronunciation start */
     20 	Pe,		/* pronunciation end */
     21 	R,		/* roman */
     22 	X		/* headword end */
     23 };
     24 
     25 /* Assoc tables must be sorted on first field */
     26 
     27 static Assoc tagtab[] = {
     28 	{"AA",		0xc5},
     29 	{"AC",		LACU},
     30 	{"B",		B},
     31 	{"CE",		LCED},
     32 	{"CI",		LFRN},
     33 	{"Di",		0x131},
     34 	{"EL",		0x2d},
     35 	{"GR",		LGRV},
     36 	{"H",		H},
     37 	{"I",		I},
     38 	{"OE",		0x152},
     39 	{"R",		R},
     40 	{"TI",		LTIL},
     41 	{"UM",		LUML},
     42 	{"X",		X},
     43 	{"[",		Ps},
     44 	{"]",		Pe},
     45 	{"ac",		LACU},
     46 	{"ce",		LCED},
     47 	{"ci",		LFRN},
     48 	{"gr",		LGRV},
     49 	{"oe",		0x153},
     50 	{"supe",	0x65},		/* should be raised */
     51 	{"supo",	0x6f},		/* should be raised */
     52 	{"ti",		LTIL},
     53 	{"um",		LUML},
     54 	{"{",		Ps},
     55 	{"~",		0x7e},
     56 	{"~~",		MTT}
     57 };
     58 
     59 static Rune normtab[128] = {
     60 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
     61 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
     62 	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE,	NONE,
     63 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
     64 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
     65 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'',
     66 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
     67 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
     68 	0x38,	0x39,	0x3a,	0x3b,	TAGE,	0x3d,	TAGS,	0x3f,
     69 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
     70 	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
     71 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
     72 	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
     73 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
     74 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
     75 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
     76 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
     77 };
     78 
     79 static char *gettag(char *, char *);
     80 
     81 static Entry	curentry;
     82 static char	tag[Buflen];
     83 #define cursize (curentry.end-curentry.start)
     84 
     85 void
     86 pcollprintentry(Entry e, int cmd)
     87 {
     88 	char *p, *pe;
     89 	long r, rprev, t, rlig;
     90 	int saveoi;
     91 	Rune *transtab;
     92 
     93 	p = e.start;
     94 	pe = e.end;
     95 	transtab = normtab;
     96 	rprev = NONE;
     97 	changett(0, 0, 0);
     98 	curentry = e;
     99 	saveoi = 0;
    100 	if(cmd == 'h')
    101 		outinhibit = 1;
    102 	while(p < pe) {
    103 		if(cmd == 'r') {
    104 			outchar(*p++);
    105 			continue;
    106 		}
    107 		r = transtab[(*p++)&0x7F];
    108 		if(r < NONE) {
    109 			/* Emit the rune, but buffer in case of ligature */
    110 			if(rprev != NONE)
    111 				outrune(rprev);
    112 			rprev = r;
    113 		} else if(r == TAGS) {
    114 			p = gettag(p, pe);
    115 			t = lookassoc(tagtab, asize(tagtab), tag);
    116 			if(t == -1) {
    117 				if(debug && !outinhibit)
    118 					err("tag %ld %d %s",
    119 						e.doff, cursize, tag);
    120 				continue;
    121 			}
    122 			if(t < NONE) {
    123 				if(rprev != NONE)
    124 					outrune(rprev);
    125 				rprev = t;
    126 			} else if(t >= LIGS && t < LIGE) {
    127 				/* handle possible ligature */
    128 				rlig = liglookup(t, rprev);
    129 				if(rlig != NONE)
    130 					rprev = rlig;	/* overwrite rprev */
    131 				else {
    132 					/* could print accent, but let's not */
    133 					if(rprev != NONE) outrune(rprev);
    134 					rprev = NONE;
    135 				}
    136 			} else if(t >= MULTI && t < MULTIE) {
    137 				if(rprev != NONE) {
    138 					outrune(rprev);
    139 					rprev = NONE;
    140 				}
    141 				outrunes(multitab[t-MULTI]);
    142 			} else {
    143 				if(rprev != NONE) {
    144 					outrune(rprev);
    145 					rprev = NONE;
    146 				}
    147 				switch(t){
    148 				case H:
    149 					if(cmd == 'h')
    150 						outinhibit = 0;
    151 					else
    152 						outnl(0);
    153 					break;
    154 				case X:
    155 					if(cmd == 'h')
    156 						outinhibit = 1;
    157 					else
    158 						outchars(".  ");
    159 					break;
    160 				case Ps:
    161 					/* don't know enough of pron. key yet */
    162 					saveoi = outinhibit;
    163 					outinhibit = 1;
    164 					break;
    165 				case Pe:
    166 					outinhibit = saveoi;
    167 					break;
    168 				}
    169 			}
    170 		}
    171 	}
    172 	if(cmd == 'h')
    173 		outinhibit = 0;
    174 	outnl(0);
    175 }
    176 
    177 long
    178 pcollnextoff(long fromoff)
    179 {
    180 	long a;
    181 	char *p;
    182 
    183 	a = Bseek(bdict, fromoff, 0);
    184 	if(a < 0)
    185 		return -1;
    186 	for(;;) {
    187 		p = Brdline(bdict, '\n');
    188 		if(!p)
    189 			break;
    190 		if(p[0] == '>' && p[1] == 'H' && p[2] == '<')
    191 			return (Boffset(bdict)-Blinelen(bdict));
    192 	}
    193 	return -1;
    194 }
    195 
    196 void
    197 pcollprintkey(void)
    198 {
    199 	Bprint(bout, "No pronunciation key yet\n");
    200 }
    201 
    202 /*
    203  * f points just after '>'; fe points at end of entry.
    204  * Expect next characters from bin to match:
    205  *  [^ <]+<
    206  *     tag
    207  * Accumulate the tag in tag[].
    208  * Return pointer to after final '<'.
    209  */
    210 static char *
    211 gettag(char *f, char *fe)
    212 {
    213 	char *t;
    214 	int c, i;
    215 
    216 	t = tag;
    217 	i = Buflen;
    218 	while(--i > 0) {
    219 		c = *f++;
    220 		if(c == '<' || f == fe)
    221 			break;
    222 		*t++ = c;
    223 	}
    224 	*t = 0;
    225 	return f;
    226 }