pcollins.c (4498B)
1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "dict.h" 5 6 /* 7 * Routines for handling dictionaries in the "Paperback Collins" 8 * format (with tags surrounded by >....<) 9 */ 10 enum { 11 Buflen=1000 12 }; 13 14 /* More special runes */ 15 enum { 16 B = MULTIE+1, /* bold */ 17 H, /* headword start */ 18 I, /* italics */ 19 Ps, /* pronunciation start */ 20 Pe, /* pronunciation end */ 21 R, /* roman */ 22 X /* headword end */ 23 }; 24 25 /* Assoc tables must be sorted on first field */ 26 27 static Assoc tagtab[] = { 28 {"AA", 0xc5}, 29 {"AC", LACU}, 30 {"B", B}, 31 {"CE", LCED}, 32 {"CI", LFRN}, 33 {"Di", 0x131}, 34 {"EL", 0x2d}, 35 {"GR", LGRV}, 36 {"H", H}, 37 {"I", I}, 38 {"OE", 0x152}, 39 {"R", R}, 40 {"TI", LTIL}, 41 {"UM", LUML}, 42 {"X", X}, 43 {"[", Ps}, 44 {"]", Pe}, 45 {"ac", LACU}, 46 {"ce", LCED}, 47 {"ci", LFRN}, 48 {"gr", LGRV}, 49 {"oe", 0x153}, 50 {"supe", 0x65}, /* should be raised */ 51 {"supo", 0x6f}, /* should be raised */ 52 {"ti", LTIL}, 53 {"um", LUML}, 54 {"{", Ps}, 55 {"~", 0x7e}, 56 {"~~", MTT} 57 }; 58 59 static Rune normtab[128] = { 60 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ 61 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 62 NONE, NONE, 0x20, NONE, NONE, NONE, NONE, NONE, 63 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 64 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 65 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, '\'', 66 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 67 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 68 0x38, 0x39, 0x3a, 0x3b, TAGE, 0x3d, TAGS, 0x3f, 69 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 70 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f, 71 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 72 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f, 73 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 74 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 75 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 76 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE 77 }; 78 79 static char *gettag(char *, char *); 80 81 static Entry curentry; 82 static char tag[Buflen]; 83 #define cursize (curentry.end-curentry.start) 84 85 void 86 pcollprintentry(Entry e, int cmd) 87 { 88 char *p, *pe; 89 long r, rprev, t, rlig; 90 int saveoi; 91 Rune *transtab; 92 93 p = e.start; 94 pe = e.end; 95 transtab = normtab; 96 rprev = NONE; 97 changett(0, 0, 0); 98 curentry = e; 99 saveoi = 0; 100 if(cmd == 'h') 101 outinhibit = 1; 102 while(p < pe) { 103 if(cmd == 'r') { 104 outchar(*p++); 105 continue; 106 } 107 r = transtab[(*p++)&0x7F]; 108 if(r < NONE) { 109 /* Emit the rune, but buffer in case of ligature */ 110 if(rprev != NONE) 111 outrune(rprev); 112 rprev = r; 113 } else if(r == TAGS) { 114 p = gettag(p, pe); 115 t = lookassoc(tagtab, asize(tagtab), tag); 116 if(t == -1) { 117 if(debug && !outinhibit) 118 err("tag %ld %d %s", 119 e.doff, cursize, tag); 120 continue; 121 } 122 if(t < NONE) { 123 if(rprev != NONE) 124 outrune(rprev); 125 rprev = t; 126 } else if(t >= LIGS && t < LIGE) { 127 /* handle possible ligature */ 128 rlig = liglookup(t, rprev); 129 if(rlig != NONE) 130 rprev = rlig; /* overwrite rprev */ 131 else { 132 /* could print accent, but let's not */ 133 if(rprev != NONE) outrune(rprev); 134 rprev = NONE; 135 } 136 } else if(t >= MULTI && t < MULTIE) { 137 if(rprev != NONE) { 138 outrune(rprev); 139 rprev = NONE; 140 } 141 outrunes(multitab[t-MULTI]); 142 } else { 143 if(rprev != NONE) { 144 outrune(rprev); 145 rprev = NONE; 146 } 147 switch(t){ 148 case H: 149 if(cmd == 'h') 150 outinhibit = 0; 151 else 152 outnl(0); 153 break; 154 case X: 155 if(cmd == 'h') 156 outinhibit = 1; 157 else 158 outchars(". "); 159 break; 160 case Ps: 161 /* don't know enough of pron. key yet */ 162 saveoi = outinhibit; 163 outinhibit = 1; 164 break; 165 case Pe: 166 outinhibit = saveoi; 167 break; 168 } 169 } 170 } 171 } 172 if(cmd == 'h') 173 outinhibit = 0; 174 outnl(0); 175 } 176 177 long 178 pcollnextoff(long fromoff) 179 { 180 long a; 181 char *p; 182 183 a = Bseek(bdict, fromoff, 0); 184 if(a < 0) 185 return -1; 186 for(;;) { 187 p = Brdline(bdict, '\n'); 188 if(!p) 189 break; 190 if(p[0] == '>' && p[1] == 'H' && p[2] == '<') 191 return (Boffset(bdict)-Blinelen(bdict)); 192 } 193 return -1; 194 } 195 196 void 197 pcollprintkey(void) 198 { 199 Bprint(bout, "No pronunciation key yet\n"); 200 } 201 202 /* 203 * f points just after '>'; fe points at end of entry. 204 * Expect next characters from bin to match: 205 * [^ <]+< 206 * tag 207 * Accumulate the tag in tag[]. 208 * Return pointer to after final '<'. 209 */ 210 static char * 211 gettag(char *f, char *fe) 212 { 213 char *t; 214 int c, i; 215 216 t = tag; 217 i = Buflen; 218 while(--i > 0) { 219 c = *f++; 220 if(c == '<' || f == fe) 221 break; 222 *t++ = c; 223 } 224 *t = 0; 225 return f; 226 }