plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

oed.c (36018B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <bio.h>
      4 #include "dict.h"
      5 
      6 enum {
      7 	Buflen=1000,
      8 	Maxaux=5
      9 };
     10 
     11 /* Possible tags */
     12 enum {
     13 	A,		/* author in quote (small caps) */
     14 	B,		/* bold */
     15 	Ba,		/* author inside bib */
     16 	Bch,		/* builtup chem component */
     17 	Bib,		/* surrounds word 'in' for bibliographic ref */
     18 	Bl,		/* bold */
     19 	Bo,		/* bond over */
     20 	Bu,		/* bond under */
     21 	Cb,		/* ? block of stuff (indent) */
     22 	Cf,		/* cross ref to another entry (italics) */
     23 	Chem,		/* chemistry formula */
     24 	Co,		/* over (preceding sum, integral, etc.) */
     25 	Col,		/* column of table (aux just may be r) */
     26 	Cu,		/* under (preceding sum, integral, etc.) */
     27 	Dat,		/* date */
     28 	Db,		/* def block? indent */
     29 	Dn,		/* denominator of fraction */
     30 	E,		/* main entry */
     31 	Ed,		/* editor's comments (in [...]) */
     32 	Etym,		/* etymology (in [...]) */
     33 	Fq,		/* frequency count (superscript) */
     34 	Form,		/* formula */
     35 	Fr,		/* fraction (contains <nu>, then <dn>) */
     36 	Gk,		/* greek (transliteration) */
     37 	Gr,		/* grammar? (e.g., around 'pa.' in 'pa. pple.') */
     38 	Hg,		/* headword group */
     39 	Hm,		/* homonym (superscript) */
     40 	Hw,		/* headword (bold) */
     41 	I,		/* italics */
     42 	Il,		/* italic list? */
     43 	In,		/* inferior (subscript) */
     44 	L,		/* row of col of table */
     45 	La,		/* status or usage label (italic) */
     46 	Lc,		/* chapter/verse sort of thing for works */
     47 	N,		/* note (smaller type) */
     48 	Nu,		/* numerator of fraction */
     49 	Ov,		/* needs overline */
     50 	P,		/* paragraph (indent) */
     51 	Ph,		/* pronunciation (transliteration) */
     52 	Pi,		/* pile (frac without line) */
     53 	Pqp,		/* subblock of quote */
     54 	Pr,		/* pronunciation (in (...)) */
     55 	Ps,		/* position (e.g., adv.) (italic) */
     56 	Pt,		/* part (in lc) */
     57 	Q,		/* quote in quote block */
     58 	Qd,		/* quote date (bold) */
     59 	Qig,		/* quote number (greek) */
     60 	Qla,		/* status or usage label in quote (italic) */
     61 	Qp,		/* quote block (small type, indent) */
     62 	Qsn,		/* quote number */
     63 	Qt,		/* quote words */
     64 	R,		/* roman type style */
     65 	Rx,		/* relative cross reference (e.g., next) */
     66 	S,		/* another form? (italic) */
     67 	S0,		/* sense (sometimes surrounds several sx's) */
     68 	S1,		/* sense (aux num: indented bold letter) */
     69 	S2,		/* sense (aux num: indented bold capital rom num) */
     70 	S3,		/* sense (aux num: indented number of asterisks) */
     71 	S4,		/* sense (aux num: indented bold number) */
     72 	S5,		/* sense (aux num: indented number of asterisks) */
     73 	S6,		/* subsense (aux num: bold letter) */
     74 	S7a,		/* subsense (aux num: letter) */
     75 	S7n,		/* subsense (aux num: roman numeral) */
     76 	Sc,		/* small caps */
     77 	Sgk,		/* subsense (aux num: transliterated greek) */
     78 	Sn,		/* sense of subdefinition (aux num: roman letter) */
     79 	Ss,		/* sans serif */
     80 	Ssb,		/* sans serif bold */
     81 	Ssi,		/* sans serif italic */
     82 	Su,		/* superior (superscript) */
     83 	Sub,		/* subdefinition */
     84 	Table,		/* table (aux cols=number of columns) */
     85 	Tt,		/* title? (italics) */
     86 	Vd,		/* numeric label for variant form */
     87 	Ve,		/* variant entry */
     88 	Vf,		/* variant form (light bold) */
     89 	Vfl,		/* list of vf's (starts with Also or Forms) */
     90 	W,		/* work (e.g., Beowulf) (italics) */
     91 	X,		/* cross reference to main word (small caps) */
     92 	Xd,		/* cross reference to quotation by date */
     93 	Xi,		/* internal cross reference ? (italic) */
     94 	Xid,		/* cross reference identifer, in quote ? */
     95 	Xs,		/* cross reference sense (lower number) */
     96 	Xr,		/* list of x's */
     97 	Ntag		/* end of tags */
     98 };
     99 
    100 /* Assoc tables must be sorted on first field */
    101 
    102 static Assoc tagtab[] = {
    103 	{"a",		A},
    104 	{"b",		B},
    105 	{"ba",		Ba},
    106 	{"bch",		Bch},
    107 	{"bib",		Bib},
    108 	{"bl",		Bl},
    109 	{"bo",		Bo},
    110 	{"bu",		Bu},
    111 	{"cb",		Cb},
    112 	{"cf",		Cf},
    113 	{"chem",	Chem},
    114 	{"co",		Co},
    115 	{"col",		Col},
    116 	{"cu",		Cu},
    117 	{"dat",		Dat},
    118 	{"db",		Db},
    119 	{"dn",		Dn},
    120 	{"e",		E},
    121 	{"ed",		Ed},
    122 	{"et",		Etym},
    123 	{"etym",	Etym},
    124 	{"form",	Form},
    125 	{"fq",		Fq},
    126 	{"fr",		Fr},
    127 	{"frac",	Fr},
    128 	{"gk",		Gk},
    129 	{"gr",		Gr},
    130 	{"hg",		Hg},
    131 	{"hm",		Hm},
    132 	{"hw",		Hw},
    133 	{"i",		I},
    134 	{"il",		Il},
    135 	{"in",		In},
    136 	{"l",		L},
    137 	{"la",		La},
    138 	{"lc",		Lc},
    139 	{"n",		N},
    140 	{"nu",		Nu},
    141 	{"ov",		Ov},
    142 	{"p",		P},
    143 	{"ph",		Ph},
    144 	{"pi",		Pi},
    145 	{"pqp",		Pqp},
    146 	{"pr",		Pr},
    147 	{"ps",		Ps},
    148 	{"pt",		Pt},
    149 	{"q",		Q},
    150 	{"qd",		Qd},
    151 	{"qig",		Qig},
    152 	{"qla",		Qla},
    153 	{"qp",		Qp},
    154 	{"qsn",		Qsn},
    155 	{"qt",		Qt},
    156 	{"r",		R},
    157 	{"rx",		Rx},
    158 	{"s",		S},
    159 	{"s0",		S0},
    160 	{"s1",		S1},
    161 	{"s2",		S2},
    162 	{"s3",		S3},
    163 	{"s4",		S4},
    164 	{"s5",		S5},
    165 	{"s6",		S6},
    166 	{"s7a",		S7a},
    167 	{"s7n",		S7n},
    168 	{"sc",		Sc},
    169 	{"sgk",		Sgk},
    170 	{"sn",		Sn},
    171 	{"ss",		Ss,},
    172 	{"ssb",		Ssb},
    173 	{"ssi",		Ssi},
    174 	{"su",		Su},
    175 	{"sub",		Sub},
    176 	{"table",	Table},
    177 	{"tt",		Tt},
    178 	{"vd",		Vd},
    179 	{"ve",		Ve},
    180 	{"vf",		Vf},
    181 	{"vfl",		Vfl},
    182 	{"w",		W},
    183 	{"x",		X},
    184 	{"xd",		Xd},
    185 	{"xi",		Xi},
    186 	{"xid",		Xid},
    187 	{"xr",		Xr},
    188 	{"xs",		Xs}
    189 };
    190 
    191 /* Possible tag auxilliary info */
    192 enum {
    193 	Cols,		/* number of columns in a table */
    194 	Num,		/* letter or number, for a sense */
    195 	St,		/* status (e.g., obs) */
    196 	Naux
    197 };
    198 
    199 static Assoc auxtab[] = {
    200 	{"cols",	Cols},
    201 	{"num",		Num},
    202 	{"st",		St}
    203 };
    204 
    205 static Assoc spectab[] = {
    206 	{"3on4",	0xbe},
    207 	{"Aacu",	0xc1},
    208 	{"Aang",	0xc5},
    209 	{"Abarab",	0x100},
    210 	{"Acirc",	0xc2},
    211 	{"Ae",		0xc6},
    212 	{"Agrave",	0xc0},
    213 	{"Alpha",	0x391},
    214 	{"Amac",	0x100},
    215 	{"Asg",		0x1b7},		/* Unicyle. Cf "Sake" */
    216 	{"Auml",	0xc4},
    217 	{"Beta",	0x392},
    218 	{"Cced",	0xc7},
    219 	{"Chacek",	0x10c},
    220 	{"Chi",		0x3a7},
    221 	{"Chirho",	0x2627},		/* Chi Rho U+2627 */
    222 	{"Csigma",	0x3da},
    223 	{"Delta",	0x394},
    224 	{"Eacu",	0xc9},
    225 	{"Ecirc",	0xca},
    226 	{"Edh",		0xd0},
    227 	{"Epsilon",	0x395},
    228 	{"Eta",		0x397},
    229 	{"Gamma",	0x393},
    230 	{"Iacu",	0xcd},
    231 	{"Icirc",	0xce},
    232 	{"Imac",	0x12a},
    233 	{"Integ",	0x222b},
    234 	{"Iota",	0x399},
    235 	{"Kappa",	0x39a},
    236 	{"Koppa",	0x3de},
    237 	{"Lambda",	0x39b},
    238 	{"Lbar",	0x141},
    239 	{"Mu",		0x39c},
    240 	{"Naira",	0x4e},		/* should have bar through */
    241 	{"Nplus",	0x4e},		/* should have plus above */
    242 	{"Ntilde",	0xd1},
    243 	{"Nu",		0x39d},
    244 	{"Oacu",	0xd3},
    245 	{"Obar",	0xd8},
    246 	{"Ocirc",	0xd4},
    247 	{"Oe",		0x152},
    248 	{"Omega",	0x3a9},
    249 	{"Omicron",	0x39f},
    250 	{"Ouml",	0xd6},
    251 	{"Phi",		0x3a6},
    252 	{"Pi",		0x3a0},
    253 	{"Psi",		0x3a8},
    254 	{"Rho",		0x3a1},
    255 	{"Sacu",	0x15a},
    256 	{"Sigma",	0x3a3},
    257 	{"Summ",	0x2211},
    258 	{"Tau",		0x3a4},
    259 	{"Th",		0xde},
    260 	{"Theta",	0x398},
    261 	{"Tse",		0x426},
    262 	{"Uacu",	0xda},
    263 	{"Ucirc",	0xdb},
    264 	{"Upsilon",	0x3a5},
    265 	{"Uuml",	0xdc},
    266 	{"Wyn",		0x1bf},		/* wynn U+01BF */
    267 	{"Xi",		0x39e},
    268 	{"Ygh",		0x1b7},		/* Yogh	U+01B7 */
    269 	{"Zeta",	0x396},
    270 	{"Zh",		0x1b7},		/* looks like Yogh. Cf "Sake" */
    271 	{"a",		0x61},		/* ante */
    272 	{"aacu",	0xe1},
    273 	{"aang",	0xe5},
    274 	{"aasper",	MAAS},
    275 	{"abreve",	0x103},
    276 	{"acirc",	0xe2},
    277 	{"acu",		LACU},
    278 	{"ae",		0xe6},
    279 	{"agrave",	0xe0},
    280 	{"ahook",	0x105},
    281 	{"alenis",	MALN},
    282 	{"alpha",	0x3b1},
    283 	{"amac",	0x101},
    284 	{"amp",		0x26},
    285 	{"and",		MAND},
    286 	{"ang",		LRNG},
    287 	{"angle",	0x2220},
    288 	{"ankh",	0x2625},		/* ankh U+2625 */
    289 	{"ante",	0x61},		/* before (year) */
    290 	{"aonq",	MAOQ},
    291 	{"appreq",	0x2243},
    292 	{"aquar",	0x2652},
    293 	{"arDadfull",	0x636},		/* Dad U+0636 */
    294 	{"arHa",	0x62d},		/* haa U+062D */
    295 	{"arTa",	0x62a},		/* taa U+062A */
    296 	{"arain",	0x639},		/* ain U+0639 */
    297 	{"arainfull",	0x639},		/* ain U+0639 */
    298 	{"aralif",	0x627},		/* alef U+0627 */
    299 	{"arba",	0x628},		/* baa U+0628 */
    300 	{"arha",	0x647},		/* ha U+0647 */
    301 	{"aries",	0x2648},
    302 	{"arnun",	0x646},		/* noon U+0646 */
    303 	{"arnunfull",	0x646},		/* noon U+0646 */
    304 	{"arpa",	0x647},		/* ha U+0647 */
    305 	{"arqoph",	0x642},		/* qaf U+0642 */
    306 	{"arshinfull",	0x634},		/* sheen U+0634 */
    307 	{"arta",	0x62a},		/* taa U+062A */
    308 	{"artafull",	0x62a},		/* taa U+062A */
    309 	{"artha",	0x62b},		/* thaa U+062B */
    310 	{"arwaw",	0x648},		/* waw U+0648 */
    311 	{"arya",	0x64a},		/* ya U+064A */
    312 	{"aryafull",	0x64a},		/* ya U+064A */
    313 	{"arzero",	0x660},		/* indic zero U+0660 */
    314 	{"asg",		0x292},		/* unicycle character. Cf "hallow" */
    315 	{"asper",	LASP},
    316 	{"assert",	0x22a2},
    317 	{"astm",	0x2042},		/* asterism: should be upside down */
    318 	{"at",		0x40},
    319 	{"atilde",	0xe3},
    320 	{"auml",	0xe4},
    321 	{"ayin",	0x639},		/* arabic ain U+0639 */
    322 	{"b1",		0x2d},		/* single bond */
    323 	{"b2",		0x3d},		/* double bond */
    324 	{"b3",		0x2261},		/* triple bond */
    325 	{"bbar",	0x180},		/* b with bar U+0180 */
    326 	{"beta",	0x3b2},
    327 	{"bigobl",	0x2f},
    328 	{"blC",		0x43},		/* should be black letter */
    329 	{"blJ",		0x4a},		/* should be black letter */
    330 	{"blU",		0x55},		/* should be black letter */
    331 	{"blb",		0x62},		/* should be black letter */
    332 	{"blozenge",	0x25ca},		/* U+25CA; should be black */
    333 	{"bly",		0x79},		/* should be black letter */
    334 	{"bra",		MBRA},
    335 	{"brbl",	LBRB},
    336 	{"breve",	LBRV},
    337 	{"bslash",	'\\'},
    338 	{"bsquare",	0x25a0},		/* black square U+25A0 */
    339 	{"btril",	0x25c0},		/* U+25C0 */
    340 	{"btrir",	0x25b6},		/* U+25B6 */
    341 	{"c",		0x63},		/* circa */
    342 	{"cab",		0x232a},
    343 	{"cacu",	0x107},
    344 	{"canc",	0x264b},
    345 	{"capr",	0x2651},
    346 	{"caret",	0x5e},
    347 	{"cb",		0x7d},
    348 	{"cbigb",	0x7d},
    349 	{"cbigpren",	0x29},
    350 	{"cbigsb",	0x5d},
    351 	{"cced",	0xe7},
    352 	{"cdil",	LCED},
    353 	{"cdsb",	0x301b},		/* ]] U+301b */
    354 	{"cent",	0xa2},
    355 	{"chacek",	0x10d},
    356 	{"chi",		0x3c7},
    357 	{"circ",	LRNG},
    358 	{"circa",	0x63},		/* about (year) */
    359 	{"circbl",	0x325},		/* ring below accent U+0325 */
    360 	{"circle",	0x25cb},		/* U+25CB */
    361 	{"circledot",	0x2299},
    362 	{"click",	0x296},
    363 	{"club",	0x2663},
    364 	{"comtime",	0x43},
    365 	{"conj",	0x260c},
    366 	{"cprt",	0xa9},
    367 	{"cq",		'\''},
    368 	{"cqq",		0x201d},
    369 	{"cross",	0x2720},		/* maltese cross U+2720 */
    370 	{"crotchet",	0x2669},
    371 	{"csb",		0x5d},
    372 	{"ctilde",	0x63},		/* +tilde */
    373 	{"ctlig",	MLCT},
    374 	{"cyra",	0x430},
    375 	{"cyre",	0x435},
    376 	{"cyrhard",	0x44a},
    377 	{"cyrjat",	0x463},
    378 	{"cyrm",	0x43c},
    379 	{"cyrn",	0x43d},
    380 	{"cyrr",	0x440},
    381 	{"cyrsoft",	0x44c},
    382 	{"cyrt",	0x442},
    383 	{"cyry",	0x44b},
    384 	{"dag",		0x2020},
    385 	{"dbar",	0x111},
    386 	{"dblar",	0x21cb},
    387 	{"dblgt",	0x226b},
    388 	{"dbllt",	0x226a},
    389 	{"dced",	0x64},		/* +cedilla */
    390 	{"dd",		MDD},
    391 	{"ddag",	0x2021},
    392 	{"ddd",		MDDD},
    393 	{"decr",	0x2193},
    394 	{"deg",		0xb0},
    395 	{"dele",	0x64},		/* should be dele */
    396 	{"delta",	0x3b4},
    397 	{"descnode",	0x260b},		/* descending node U+260B */
    398 	{"diamond",	0x2662},
    399 	{"digamma",	0x3dd},
    400 	{"div",		0xf7},
    401 	{"dlessi",	0x131},
    402 	{"dlessj1",	0x6a},		/* should be dotless */
    403 	{"dlessj2",	0x6a},		/* should be dotless */
    404 	{"dlessj3",	0x6a},		/* should be dotless */
    405 	{"dollar",	0x24},
    406 	{"dotab",	LDOT},
    407 	{"dotbl",	LDTB},
    408 	{"drachm",	0x292},
    409 	{"dubh",	0x2d},
    410 	{"eacu",	0xe9},
    411 	{"earth",	0x2641},
    412 	{"easper",	MEAS},
    413 	{"ebreve",	0x115},
    414 	{"ecirc",	0xea},
    415 	{"edh",		0xf0},
    416 	{"egrave",	0xe8},
    417 	{"ehacek",	0x11b},
    418 	{"ehook",	0x119},
    419 	{"elem",	0x220a},
    420 	{"elenis",	MELN},
    421 	{"em",		0x2014},
    422 	{"emac",	0x113},
    423 	{"emem",	MEMM},
    424 	{"en",		0x2013},
    425 	{"epsilon",	0x3b5},
    426 	{"equil",	0x21cb},
    427 	{"ergo",	0x2234},
    428 	{"es",		MES},
    429 	{"eszett",	0xdf},
    430 	{"eta",		0x3b7},
    431 	{"eth",		0xf0},
    432 	{"euml",	0xeb},
    433 	{"expon",	0x2191},
    434 	{"fact",	0x21},
    435 	{"fata",	0x251},
    436 	{"fatpara",	0xb6},		/* should have fatter, filled in bowl */
    437 	{"female",	0x2640},
    438 	{"ffilig",	MLFFI},
    439 	{"fflig",	MLFF},
    440 	{"ffllig",	MLFFL},
    441 	{"filig",	MLFI},
    442 	{"flat",	0x266d},
    443 	{"fllig",	MLFL},
    444 	{"frE",		0x45},		/* should be curly */
    445 	{"frL",		'L'},		/* should be curly */
    446 	{"frR",		0x52},		/* should be curly */
    447 	{"frakB",	0x42},		/* should have fraktur style */
    448 	{"frakG",	0x47},
    449 	{"frakH",	0x48},
    450 	{"frakI",	0x49},
    451 	{"frakM",	0x4d},
    452 	{"frakU",	0x55},
    453 	{"frakX",	0x58},
    454 	{"frakY",	0x59},
    455 	{"frakh",	0x68},
    456 	{"frbl",	LFRB},
    457 	{"frown",	LFRN},
    458 	{"fs",		0x20},
    459 	{"fsigma",	0x3c2},
    460 	{"gAacu",	0xc1},		/* should be Α+acute */
    461 	{"gaacu",	0x3b1},		/* +acute */
    462 	{"gabreve",	0x3b1},		/* +breve */
    463 	{"gafrown",	0x3b1},		/* +frown */
    464 	{"gagrave",	0x3b1},		/* +grave */
    465 	{"gamac",	0x3b1},		/* +macron */
    466 	{"gamma",	0x3b3},
    467 	{"gauml",	0x3b1},		/* +umlaut */
    468 	{"ge",		0x2267},
    469 	{"geacu",	0x3b5},		/* +acute */
    470 	{"gegrave",	0x3b5},		/* +grave */
    471 	{"ghacu",	0x3b7},		/* +acute */
    472 	{"ghfrown",	0x3b7},		/* +frown */
    473 	{"ghgrave",	0x3b7},		/* +grave */
    474 	{"ghmac",	0x3b7},		/* +macron */
    475 	{"giacu",	0x3b9},		/* +acute */
    476 	{"gibreve",	0x3b9},		/* +breve */
    477 	{"gifrown",	0x3b9},		/* +frown */
    478 	{"gigrave",	0x3b9},		/* +grave */
    479 	{"gimac",	0x3b9},		/* +macron */
    480 	{"giuml",	0x3b9},		/* +umlaut */
    481 	{"glagjat",	0x467},
    482 	{"glots",	0x2c0},
    483 	{"goacu",	0x3bf},		/* +acute */
    484 	{"gobreve",	0x3bf},		/* +breve */
    485 	{"grave",	LGRV},
    486 	{"gt",		0x3e},
    487 	{"guacu",	0x3c5},		/* +acute */
    488 	{"gufrown",	0x3c5},		/* +frown */
    489 	{"gugrave",	0x3c5},		/* +grave */
    490 	{"gumac",	0x3c5},		/* +macron */
    491 	{"guuml",	0x3c5},		/* +umlaut */
    492 	{"gwacu",	0x3c9},		/* +acute */
    493 	{"gwfrown",	0x3c9},		/* +frown */
    494 	{"gwgrave",	0x3c9},		/* +grave */
    495 	{"hacek",	LHCK},
    496 	{"halft",	0x2308},
    497 	{"hash",	0x23},
    498 	{"hasper",	MHAS},
    499 	{"hatpath",	0x5b2},		/* hataf patah U+05B2 */
    500 	{"hatqam",	0x5b3},		/* hataf qamats U+05B3 */
    501 	{"hatseg",	0x5b1},		/* hataf segol U+05B1 */
    502 	{"hbar",	0x127},
    503 	{"heart",	0x2661},
    504 	{"hebaleph",	0x5d0},		/* aleph U+05D0 */
    505 	{"hebayin",	0x5e2},		/* ayin U+05E2 */
    506 	{"hebbet",	0x5d1},		/* bet U+05D1 */
    507 	{"hebbeth",	0x5d1},		/* bet U+05D1 */
    508 	{"hebcheth",	0x5d7},		/* bet U+05D7 */
    509 	{"hebdaleth",	0x5d3},		/* dalet U+05D3 */
    510 	{"hebgimel",	0x5d2},		/* gimel U+05D2 */
    511 	{"hebhe",	0x5d4},		/* he U+05D4 */
    512 	{"hebkaph",	0x5db},		/* kaf U+05DB */
    513 	{"heblamed",	0x5dc},		/* lamed U+05DC */
    514 	{"hebmem",	0x5de},		/* mem U+05DE */
    515 	{"hebnun",	0x5e0},		/* nun U+05E0 */
    516 	{"hebnunfin",	0x5df},		/* final nun U+05DF */
    517 	{"hebpe",	0x5e4},		/* pe U+05E4 */
    518 	{"hebpedag",	0x5e3},		/* final pe? U+05E3 */
    519 	{"hebqoph",	0x5e7},		/* qof U+05E7 */
    520 	{"hebresh",	0x5e8},		/* resh U+05E8 */
    521 	{"hebshin",	0x5e9},		/* shin U+05E9 */
    522 	{"hebtav",	0x5ea},		/* tav U+05EA */
    523 	{"hebtsade",	0x5e6},		/* tsadi U+05E6 */
    524 	{"hebwaw",	0x5d5},		/* vav? U+05D5 */
    525 	{"hebyod",	0x5d9},		/* yod U+05D9 */
    526 	{"hebzayin",	0x5d6},		/* zayin U+05D6 */
    527 	{"hgz",		0x292},		/* ??? Cf "alet" */
    528 	{"hireq",	0x5b4},		/* U+05B4 */
    529 	{"hlenis",	MHLN},
    530 	{"hook",	LOGO},
    531 	{"horizE",	0x45},		/* should be on side */
    532 	{"horizP",	0x50},		/* should be on side */
    533 	{"horizS",	0x223d},
    534 	{"horizT",	0x22a3},
    535 	{"horizb",	0x7b},		/* should be underbrace */
    536 	{"ia",		0x3b1},
    537 	{"iacu",	0xed},
    538 	{"iasper",	MIAS},
    539 	{"ib",		0x3b2},
    540 	{"ibar",	0x268},
    541 	{"ibreve",	0x12d},
    542 	{"icirc",	0xee},
    543 	{"id",		0x3b4},
    544 	{"ident",	0x2261},
    545 	{"ie",		0x3b5},
    546 	{"ifilig",	MLFI},
    547 	{"ifflig",	MLFF},
    548 	{"ig",		0x3b3},
    549 	{"igrave",	0xec},
    550 	{"ih",		0x3b7},
    551 	{"ii",		0x3b9},
    552 	{"ik",		0x3ba},
    553 	{"ilenis",	MILN},
    554 	{"imac",	0x12b},
    555 	{"implies",	0x21d2},
    556 	{"index",	0x261e},
    557 	{"infin",	0x221e},
    558 	{"integ",	0x222b},
    559 	{"intsec",	0x2229},
    560 	{"invpri",	0x2cf},
    561 	{"iota",	0x3b9},
    562 	{"iq",		0x3c8},
    563 	{"istlig",	MLST},
    564 	{"isub",	0x3f5},		/* iota below accent */
    565 	{"iuml",	0xef},
    566 	{"iz",		0x3b6},
    567 	{"jup",		0x2643},
    568 	{"kappa",	0x3ba},
    569 	{"koppa",	0x3df},
    570 	{"lambda",	0x3bb},
    571 	{"lar",		0x2190},
    572 	{"lbar",	0x142},
    573 	{"le",		0x2266},
    574 	{"lenis",	LLEN},
    575 	{"leo",		0x264c},
    576 	{"lhalfbr",	0x2308},
    577 	{"lhshoe",	0x2283},
    578 	{"libra",	0x264e},
    579 	{"llswing",	MLLS},
    580 	{"lm",		0x2d0},
    581 	{"logicand",	0x2227},
    582 	{"logicor",	0x2228},
    583 	{"longs",	0x283},
    584 	{"lrar",	0x2194},
    585 	{"lt",		0x3c},
    586 	{"ltappr",	0x227e},
    587 	{"ltflat",	0x2220},
    588 	{"lumlbl",	0x6c},		/* +umlaut below */
    589 	{"mac",		LMAC},
    590 	{"male",	0x2642},
    591 	{"mc",		0x63},		/* should be raised */
    592 	{"merc",	0x263f},		/* mercury U+263F */
    593 	{"min",		0x2212},
    594 	{"moonfq",	0x263d},		/* first quarter moon U+263D */
    595 	{"moonlq",	0x263e},		/* last quarter moon U+263E */
    596 	{"msylab",	0x6d},		/* +sylab (ˌ) */
    597 	{"mu",		0x3bc},
    598 	{"nacu",	0x144},
    599 	{"natural",	0x266e},
    600 	{"neq",		0x2260},
    601 	{"nfacu",	0x2032},
    602 	{"nfasper",	0x2bd},
    603 	{"nfbreve",	0x2d8},
    604 	{"nfced",	0xb8},
    605 	{"nfcirc",	0x2c6},
    606 	{"nffrown",	0x2322},
    607 	{"nfgra",	0x2cb},
    608 	{"nfhacek",	0x2c7},
    609 	{"nfmac",	0xaf},
    610 	{"nftilde",	0x2dc},
    611 	{"nfuml",	0xa8},
    612 	{"ng",		0x14b},
    613 	{"not",		0xac},
    614 	{"notelem",	0x2209},
    615 	{"ntilde",	0xf1},
    616 	{"nu",		0x3bd},
    617 	{"oab",		0x2329},
    618 	{"oacu",	0xf3},
    619 	{"oasper",	MOAS},
    620 	{"ob",		0x7b},
    621 	{"obar",	0xf8},
    622 	{"obigb",	0x7b},		/* should be big */
    623 	{"obigpren",	0x28},
    624 	{"obigsb",	0x5b},		/* should be big */
    625 	{"obreve",	0x14f},
    626 	{"ocirc",	0xf4},
    627 	{"odsb",	0x301a},		/* [[ U+301A */
    628 	{"oe",		0x153},
    629 	{"oeamp",	0x26},
    630 	{"ograve",	0xf2},
    631 	{"ohook",	0x6f},		/* +hook */
    632 	{"olenis",	MOLN},
    633 	{"omac",	0x14d},
    634 	{"omega",	0x3c9},
    635 	{"omicron",	0x3bf},
    636 	{"ope",		0x25b},
    637 	{"opp",		0x260d},
    638 	{"oq",		0x60},
    639 	{"oqq",		0x201c},
    640 	{"or",		MOR},
    641 	{"osb",		0x5b},
    642 	{"otilde",	0xf5},
    643 	{"ouml",	0xf6},
    644 	{"ounce",	0x2125},		/* ounce U+2125 */
    645 	{"ovparen",	0x2322},		/* should be sideways ( */
    646 	{"p",		0x2032},
    647 	{"pa",		0x2202},
    648 	{"page",	0x50},
    649 	{"pall",	0x28e},
    650 	{"paln",	0x272},
    651 	{"par",		PAR},
    652 	{"para",	0xb6},
    653 	{"pbar",	0x70},		/* +bar */
    654 	{"per",		0x2118},		/* per U+2118 */
    655 	{"phi",		0x3c6},
    656 	{"phi2",	0x3d5},
    657 	{"pi",		0x3c0},
    658 	{"pisces",	0x2653},
    659 	{"planck",	0x127},
    660 	{"plantinJ",	0x4a},		/* should be script */
    661 	{"pm",		0xb1},
    662 	{"pmil",	0x2030},
    663 	{"pp",		0x2033},
    664 	{"ppp",		0x2034},
    665 	{"prop",	0x221d},
    666 	{"psi",		0x3c8},
    667 	{"pstlg",	0xa3},
    668 	{"q",		0x3f},		/* should be raised */
    669 	{"qamets",	0x5b3},		/* U+05B3 */
    670 	{"quaver",	0x266a},
    671 	{"rar",		0x2192},
    672 	{"rasper",	MRAS},
    673 	{"rdot",	0xb7},
    674 	{"recipe",	0x211e},		/* U+211E */
    675 	{"reg",		0xae},
    676 	{"revC",	0x186},		/* open O U+0186 */
    677 	{"reva",	0x252},
    678 	{"revc",	0x254},
    679 	{"revope",	0x25c},
    680 	{"revr",	0x279},
    681 	{"revsc",	0x2d2},		/* upside-down semicolon */
    682 	{"revv",	0x28c},
    683 	{"rfa",		0x6f},		/* +hook (Cf "goal") */
    684 	{"rhacek",	0x159},
    685 	{"rhalfbr",	0x2309},
    686 	{"rho",		0x3c1},
    687 	{"rhshoe",	0x2282},
    688 	{"rlenis",	MRLN},
    689 	{"rsylab",	0x72},		/* +sylab */
    690 	{"runash",	0x46},		/* should be runic 'ash' */
    691 	{"rvow",	0x2d4},
    692 	{"sacu",	0x15b},
    693 	{"sagit",	0x2650},
    694 	{"sampi",	0x3e1},
    695 	{"saturn",	0x2644},
    696 	{"sced",	0x15f},
    697 	{"schwa",	0x259},
    698 	{"scorpio",	0x264f},
    699 	{"scrA",	0x41},		/* should be script */
    700 	{"scrC",	0x43},
    701 	{"scrE",	0x45},
    702 	{"scrF",	0x46},
    703 	{"scrI",	0x49},
    704 	{"scrJ",	0x4a},
    705 	{"scrL",	'L'},
    706 	{"scrO",	0x4f},
    707 	{"scrP",	0x50},
    708 	{"scrQ",	0x51},
    709 	{"scrS",	0x53},
    710 	{"scrT",	0x54},
    711 	{"scrb",	0x62},
    712 	{"scrd",	0x64},
    713 	{"scrh",	0x68},
    714 	{"scrl",	0x6c},
    715 	{"scruple",	0x2108},		/* U+2108 */
    716 	{"sdd",		0x2d0},
    717 	{"sect",	0xa7},
    718 	{"semE",	0x2203},
    719 	{"sh",		0x283},
    720 	{"shacek",	0x161},
    721 	{"sharp",	0x266f},
    722 	{"sheva",	0x5b0},		/* U+05B0 */
    723 	{"shti",	0x26a},
    724 	{"shtsyll",	0x222a},
    725 	{"shtu",	0x28a},
    726 	{"sidetri",	0x22b2},
    727 	{"sigma",	0x3c3},
    728 	{"since",	0x2235},
    729 	{"slge",	0x2265},		/* should have slanted line under */
    730 	{"slle",	0x2264},		/* should have slanted line under */
    731 	{"sm",		0x2c8},
    732 	{"smm",		0x2cc},
    733 	{"spade",	0x2660},
    734 	{"sqrt",	0x221a},
    735 	{"square",	0x25a1},		/* U+25A1 */
    736 	{"ssChi",	0x3a7},		/* should be sans serif */
    737 	{"ssIota",	0x399},
    738 	{"ssOmicron",	0x39f},
    739 	{"ssPi",	0x3a0},
    740 	{"ssRho",	0x3a1},
    741 	{"ssSigma",	0x3a3},
    742 	{"ssTau",	0x3a4},
    743 	{"star",	0x2a},
    744 	{"stlig",	MLST},
    745 	{"sup2",	0x2072},
    746 	{"supgt",	0x2c3},
    747 	{"suplt",	0x2c2},
    748 	{"sur",		0x2b3},
    749 	{"swing",	0x223c},
    750 	{"tau",		0x3c4},
    751 	{"taur",	0x2649},
    752 	{"th",		0xfe},
    753 	{"thbar",	0xfe},		/* +bar */
    754 	{"theta",	0x3b8},
    755 	{"thinqm",	0x3f},		/* should be thinner */
    756 	{"tilde",	LTIL},
    757 	{"times",	0xd7},
    758 	{"tri",		0x2206},
    759 	{"trli",	0x2016},
    760 	{"ts",		0x2009},
    761 	{"uacu",	0xfa},
    762 	{"uasper",	MUAS},
    763 	{"ubar",	0x75},		/* +bar */
    764 	{"ubreve",	0x16d},
    765 	{"ucirc",	0xfb},
    766 	{"udA",		0x2200},
    767 	{"udT",		0x22a5},
    768 	{"uda",		0x250},
    769 	{"udh",		0x265},
    770 	{"udqm",	0xbf},
    771 	{"udpsi",	0x22d4},
    772 	{"udtr",	0x2207},
    773 	{"ugrave",	0xf9},
    774 	{"ulenis",	MULN},
    775 	{"umac",	0x16b},
    776 	{"uml",		LUML},
    777 	{"undl",	0x2cd},		/* underline accent */
    778 	{"union",	0x222a},
    779 	{"upsilon",	0x3c5},
    780 	{"uuml",	0xfc},
    781 	{"vavpath",	0x5d5},		/* vav U+05D5 (+patah) */
    782 	{"vavsheva",	0x5d5},		/* vav U+05D5 (+sheva) */
    783 	{"vb",		0x7c},
    784 	{"vddd",	0x22ee},
    785 	{"versicle2",	0x2123},		/* U+2123 */
    786 	{"vinc",	0xaf},
    787 	{"virgo",	0x264d},
    788 	{"vpal",	0x25f},
    789 	{"vvf",		0x263},
    790 	{"wasper",	MWAS},
    791 	{"wavyeq",	0x2248},
    792 	{"wlenis",	MWLN},
    793 	{"wyn",		0x1bf},		/* wynn U+01BF */
    794 	{"xi",		0x3be},
    795 	{"yacu",	0xfd},
    796 	{"ycirc",	0x177},
    797 	{"ygh",		0x292},
    798 	{"ymac",	0x79},		/* +macron */
    799 	{"yuml",	0xff},
    800 	{"zced",	0x7a},		/* +cedilla */
    801 	{"zeta",	0x3b6},
    802 	{"zh",		0x292},
    803 	{"zhacek",	0x17e}
    804 };
    805 /*
    806    The following special characters don't have close enough
    807    equivalents in Unicode, so aren't in the above table.
    808 	22n		2^(2^n) Cf Fermat
    809 	2on4		2/4
    810 	3on8		3/8
    811 	Bantuo		Bantu O. Cf Otshi-herero
    812 	Car		C with circular arrow on top
    813 	albrtime 	cut-time: C with vertical line
    814 	ardal		Cf dental
    815 	bantuo		Bantu o. Cf Otshi-herero
    816 	bbc1		single chem bond below
    817 	bbc2		double chem bond below
    818 	bbl1		chem bond like /
    819 	bbl2		chem bond like //
    820 	bbr1		chem bond like \
    821 	bbr2		chem bond \\
    822 	bcop1		copper symbol. Cf copper
    823 	bcop2		copper symbol. Cf copper
    824 	benchm		Cf benchmark
    825 	btc1		single chem bond above
    826 	btc2		double chem bond above
    827 	btl1		chem bond like \
    828 	btl2		chem bond like \\
    829 	btr1		chem bond like /
    830 	btr2		chem bond line //
    831 	burman		Cf Burman
    832 	devph		sanskrit letter. Cf ph
    833 	devrfls		sanskrit letter. Cf cerebral
    834 	duplong[12]	musical note
    835 	egchi		early form of chi
    836 	eggamma[12]	early form of gamma
    837 	egiota		early form of iota
    838 	egkappa		early form of kappa
    839 	eglambda	early form of lambda
    840 	egmu[12]	early form of mu
    841 	egnu[12]	early form of nu
    842 	egpi[123]	early form of pi
    843 	egrho[12]	early form of rho
    844 	egsampi		early form of sampi
    845 	egsan		early form of san
    846 	egsigma[12]	early form of sigma
    847 	egxi[123]	early form of xi
    848 	elatS		early form of S
    849 	elatc[12]	early form of C
    850 	elatg[12]	early form of G
    851 	glagjeri	Slavonic Glagolitic jeri
    852 	glagjeru	Slavonic Glagolitic jeru
    853 	hypolem		hypolemisk (line with underdot)
    854 	lhrbr		lower half }
    855 	longmord	long mordent
    856 	mbwvow		backwards scretched C. Cf retract.
    857 	mord		music symbol.  Cf mordent
    858 	mostra		Cf direct
    859 	ohgcirc		old form of circumflex
    860 	oldbeta		old form of β. Cf perturbate
    861 	oldsemibr[12]	old forms of semibreve. Cf prolation
    862 	ormg		old form of g. Cf G
    863 	para[12345]	form of ¶
    864 	pauseo		musical pause sign
    865 	pauseu		musical pause sign
    866 	pharyng		Cf pharyngal
    867 	ragr		Black letter ragged r
    868 	repetn		musical repeat. Cf retort
    869 	segno		musical segno sign
    870 	semain[12]	semitic ain
    871 	semhe		semitic he
    872 	semheth		semitic heth
    873 	semkaph		semitic kaph
    874 	semlamed[12]	semitic lamed
    875 	semmem		semitic mem
    876 	semnum		semitic nun
    877 	sempe		semitic pe
    878 	semqoph[123]	semitic qoph
    879 	semresh		semitic resh
    880 	semtav[1234]	semitic tav
    881 	semyod		semitic yod
    882 	semzayin[123]	semitic zayin
    883 	shtlong[12]	U with underbar. Cf glyconic
    884 	sigmatau	σ,τ combination
    885 	squaver		sixteenth note
    886 	sqbreve		square musical breve note
    887 	swast		swastika
    888 	uhrbr		upper half of big }
    889 	versicle1		Cf versicle
    890  */
    891 
    892 
    893 static Rune normtab[128] = {
    894 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    895 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    896 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    897 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    898 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    899 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    900 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
    901 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
    902 	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
    903 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
    904 	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
    905 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
    906 	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
    907 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    908 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    909 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    910 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    911 };
    912 static Rune phtab[128] = {
    913 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    914 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    915 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    916 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    917 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    918 /*20*/	0x20,	0x21,	0x2c8,	0x23,	0x24,	0x2cc,	0xe6,	'\'',
    919 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
    920 /*30*/  0x30,	0x31,	0x32,	0x25c,	0x34,	0x35,	0x36,	0x37,
    921 	0x38,	0xf8,	0x2d0,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
    922 /*40*/  0x259,	0x251,	0x42,	0x43,	0xf0,	0x25b,	0x46,	0x47,
    923 	0x48,	0x26a,	0x4a,	0x4b,	'L',	0x4d,	0x14b,	0x254,
    924 /*50*/	0x50,	0x252,	0x52,	0x283,	0x3b8,	0x28a,	0x28c,	0x57,
    925 	0x58,	0x59,	0x292,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
    926 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    927 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    928 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    929 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    930 };
    931 static Rune grtab[128] = {
    932 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    933 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    934 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    935 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    936 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    937 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    938 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
    939 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
    940 	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
    941 /*40*/  0x40,	0x391,	0x392,	0x39e,	0x394,	0x395,	0x3a6,	0x393,
    942 	0x397,	0x399,	0x3da,	0x39a,	0x39b,	0x39c,	0x39d,	0x39f,
    943 /*50*/	0x3a0,	0x398,	0x3a1,	0x3a3,	0x3a4,	0x3a5,	0x56,	0x3a9,
    944 	0x3a7,	0x3a8,	0x396,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
    945 /*60*/	0x60,	0x3b1,	0x3b2,	0x3be,	0x3b4,	0x3b5,	0x3c6,	0x3b3,
    946 	0x3b7,	0x3b9,	0x3c2,	0x3ba,	0x3bb,	0x3bc,	0x3bd,	0x3bf,
    947 /*70*/	0x3c0,	0x3b8,	0x3c1,	0x3c3,	0x3c4,	0x3c5,	0x76,	0x3c9,
    948 	0x3c7,	0x3c8,	0x3b6,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    949 };
    950 static Rune subtab[128] = {
    951 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    952 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    953 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    954 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    955 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    956 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    957 	0x208d,	0x208e,	0x2a,	0x208a,	0x2c,	0x208b,	0x2e,	0x2f,
    958 /*30*/  0x2080,	0x2081,	0x2082,	0x2083,	0x2084,	0x2085,	0x2086,	0x2087,
    959 	0x2088,	0x2089,	0x3a,	0x3b,	TAGS,	0x208c,	TAGE,	0x3f,
    960 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
    961 	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
    962 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
    963 	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
    964 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    965 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    966 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    967 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    968 };
    969 static Rune suptab[128] = {
    970 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    971 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    972 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    973 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    974 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    975 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    976 	0x207d,	0x207e,	0x2a,	0x207a,	0x2c,	0x207b,	0x2e,	0x2f,
    977 /*30*/  0x2070,	0x2071,	0x2072,	0x2073,	0x2074,	0x2075,	0x2076,	0x2077,
    978 	0x2078,	0x2079,	0x3a,	0x3b,	TAGS,	0x207c,	TAGE,	0x3f,
    979 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
    980 	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
    981 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
    982 	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
    983 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    984 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    985 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    986 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    987 };
    988 
    989 static int	tagstarts;
    990 static char	tag[Buflen];
    991 static int	naux;
    992 static char	auxname[Maxaux][Buflen];
    993 static char	auxval[Maxaux][Buflen];
    994 static char	spec[Buflen];
    995 static char	*auxstate[Naux];	/* vals for most recent tag */
    996 static Entry	curentry;
    997 #define cursize (curentry.end-curentry.start)
    998 
    999 static char	*getspec(char *, char *);
   1000 static char	*gettag(char *, char *);
   1001 static void	dostatus(void);
   1002 
   1003 /*
   1004  * cmd is one of:
   1005  *    'p': normal print
   1006  *    'h': just print headwords
   1007  *    'P': print raw
   1008  */
   1009 void
   1010 oedprintentry(Entry e, int cmd)
   1011 {
   1012 	char *p, *pe;
   1013 	int t, a, i;
   1014 	long r, rprev, rlig;
   1015 	Rune *transtab;
   1016 
   1017 	p = e.start;
   1018 	pe = e.end;
   1019 	transtab = normtab;
   1020 	rprev = NONE;
   1021 	changett(0, 0, 0);
   1022 	curentry = e;
   1023 	if(cmd == 'h')
   1024 		outinhibit = 1;
   1025 	while(p < pe) {
   1026 		if(cmd == 'r') {
   1027 			outchar(*p++);
   1028 			continue;
   1029 		}
   1030 		r = transtab[(*p++)&0x7F];
   1031 		if(r < NONE) {
   1032 			/* Emit the rune, but buffer in case of ligature */
   1033 			if(rprev != NONE)
   1034 				outrune(rprev);
   1035 			rprev = r;
   1036 		} else if(r == SPCS) {
   1037 			/* Start of special character name */
   1038 			p = getspec(p, pe);
   1039 			r = lookassoc(spectab, asize(spectab), spec);
   1040 			if(r == -1) {
   1041 				if(debug)
   1042 					err("spec %ld %d %s",
   1043 						e.doff, cursize, spec);
   1044 				r = 0xfffd;
   1045 			}
   1046 			if(r >= LIGS && r < LIGE) {
   1047 				/* handle possible ligature */
   1048 				rlig = liglookup(r, rprev);
   1049 				if(rlig != NONE)
   1050 					rprev = rlig;	/* overwrite rprev */
   1051 				else {
   1052 					/* could print accent, but let's not */
   1053 					if(rprev != NONE) outrune(rprev);
   1054 					rprev = NONE;
   1055 				}
   1056 			} else if(r >= MULTI && r < MULTIE) {
   1057 				if(rprev != NONE) {
   1058 					outrune(rprev);
   1059 					rprev = NONE;
   1060 				}
   1061 				outrunes(multitab[r-MULTI]);
   1062 			} else if(r == PAR) {
   1063 				if(rprev != NONE) {
   1064 					outrune(rprev);
   1065 					rprev = NONE;
   1066 				}
   1067 				outnl(1);
   1068 			} else {
   1069 				if(rprev != NONE) outrune(rprev);
   1070 				rprev = r;
   1071 			}
   1072 		} else if(r == TAGS) {
   1073 			/* Start of tag name */
   1074 			if(rprev != NONE) {
   1075 				outrune(rprev);
   1076 				rprev = NONE;
   1077 			}
   1078 			p = gettag(p, pe);
   1079 			t = lookassoc(tagtab, asize(tagtab), tag);
   1080 			if(t == -1) {
   1081 				if(debug)
   1082 					err("tag %ld %d %s",
   1083 						e.doff, cursize, tag);
   1084 				continue;
   1085 			}
   1086 			for(i = 0; i < Naux; i++)
   1087 				auxstate[i] = 0;
   1088 			for(i = 0; i < naux; i++) {
   1089 				a = lookassoc(auxtab, asize(auxtab), auxname[i]);
   1090 				if(a == -1) {
   1091 					if(debug)
   1092 						err("aux %ld %d %s",
   1093 							e.doff, cursize, auxname[i]);
   1094 				} else
   1095 					auxstate[a] = auxval[i];
   1096 			}
   1097 			switch(t){
   1098 			case E:
   1099 			case Ve:
   1100 				outnl(0);
   1101 				if(tagstarts)
   1102 					dostatus();
   1103 				break;
   1104 			case Ed:
   1105 			case Etym:
   1106 				outchar(tagstarts? '[' : ']');
   1107 				break;
   1108 			case Pr:
   1109 				outchar(tagstarts? '(' : ')');
   1110 				break;
   1111 			case In:
   1112 				transtab = changett(transtab, subtab, tagstarts);
   1113 				break;
   1114 			case Hm:
   1115 			case Su:
   1116 			case Fq:
   1117 				transtab = changett(transtab, suptab, tagstarts);
   1118 				break;
   1119 			case Gk:
   1120 				transtab = changett(transtab, grtab, tagstarts);
   1121 				break;
   1122 			case Ph:
   1123 				transtab = changett(transtab, phtab, tagstarts);
   1124 				break;
   1125 			case Hw:
   1126 				if(cmd == 'h') {
   1127 					if(!tagstarts)
   1128 						outchar(' ');
   1129 					outinhibit = !tagstarts;
   1130 				}
   1131 				break;
   1132 			case S0:
   1133 			case S1:
   1134 			case S2:
   1135 			case S3:
   1136 			case S4:
   1137 			case S5:
   1138 			case S6:
   1139 			case S7a:
   1140 			case S7n:
   1141 			case Sn:
   1142 			case Sgk:
   1143 				if(tagstarts) {
   1144 					outnl(2);
   1145 					dostatus();
   1146 					if(auxstate[Num]) {
   1147 						if(t == S3 || t == S5) {
   1148 							i = atoi(auxstate[Num]);
   1149 							while(i--)
   1150 								outchar('*');
   1151 							outchars("  ");
   1152 						} else if(t == S7a || t == S7n || t == Sn) {
   1153 							outchar('(');
   1154 							outchars(auxstate[Num]);
   1155 							outchars(") ");
   1156 						} else if(t == Sgk) {
   1157 							i = grtab[(uchar)auxstate[Num][0]];
   1158 							if(i != NONE)
   1159 								outrune(i);
   1160 							outchars(".  ");
   1161 						} else {
   1162 							outchars(auxstate[Num]);
   1163 							outchars(".  ");
   1164 						}
   1165 					}
   1166 				}
   1167 				break;
   1168 			case Cb:
   1169 			case Db:
   1170 			case Qp:
   1171 			case P:
   1172 				if(tagstarts)
   1173 					outnl(1);
   1174 				break;
   1175 			case Table:
   1176 				/*
   1177 				 * Todo: gather columns, justify them, etc.
   1178 				 * For now, just let colums come out as rows
   1179 				 */
   1180 				if(!tagstarts)
   1181 					outnl(0);
   1182 				break;
   1183 			case Col:
   1184 				if(tagstarts)
   1185 					outnl(0);
   1186 				break;
   1187 			case Dn:
   1188 				if(tagstarts)
   1189 					outchar('/');
   1190 				break;
   1191 			}
   1192 		}
   1193 	}
   1194 	if(cmd == 'h') {
   1195 		outinhibit = 0;
   1196 		outnl(0);
   1197 	}
   1198 }
   1199 
   1200 /*
   1201  * Return offset into bdict where next oed entry after fromoff starts.
   1202  * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
   1203  */
   1204 long
   1205 oednextoff(long fromoff)
   1206 {
   1207 	long a, n;
   1208 	int c;
   1209 
   1210 	a = Bseek(bdict, fromoff, 0);
   1211 	if(a < 0)
   1212 		return -1;
   1213 	n = 0;
   1214 	for(;;) {
   1215 		c = Bgetc(bdict);
   1216 		if(c < 0)
   1217 			break;
   1218 		if(c == '<') {
   1219 			c = Bgetc(bdict);
   1220 			if(c == 'e') {
   1221 				c = Bgetc(bdict);
   1222 				if(c == '>' || c == ' ')
   1223 					n = 3;
   1224 			} else if(c == 'v' && Bgetc(bdict) == 'e') {
   1225 				c = Bgetc(bdict);
   1226 				if(c == '>' || c == ' ')
   1227 					n = 4;
   1228 			}
   1229 			if(n)
   1230 				break;
   1231 		}
   1232 	}
   1233 	return (Boffset(bdict)-n);
   1234 }
   1235 
   1236 static char *prkey1 =
   1237 "KEY TO THE PRONUNCIATION\n"
   1238 "\n"
   1239 "I. CONSONANTS\n"
   1240 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
   1241 "\n"
   1242 "g as in go (gəʊ)\n"
   1243 "h  ...  ho! (həʊ)\n"
   1244 "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
   1245 "(r)...  her (hɜː(r))\n"
   1246 "s  ...  see (siː), success (səkˈsɜs)\n"
   1247 "w  ...  wear (wɛə(r))\n"
   1248 "hw ...  when (hwɛn)\n"
   1249 "j  ...  yes (jɛs)\n"
   1250 "θ  ...  thin (θin), bath (bɑːθ)\n"
   1251 "ð  ...  then (ðɛn), bathe (beɪð)\n"
   1252 "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
   1253 "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
   1254 "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
   1255 ;
   1256 static char *prkey2 =
   1257 "dʒ ...  judge (dʒʌdʒ)\n"
   1258 "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
   1259 "ŋg ...  finger (ˈfiŋgə(r))\n"
   1260 "\n"
   1261 "Foreign\n"
   1262 "ʎ as in It. seraglio (serˈraʎo)\n"
   1263 "ɲ  ...  Fr. cognac (kɔɲak)\n"
   1264 "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
   1265 "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
   1266 "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
   1267 "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
   1268 "ɥ  ...  Fr. cuisine (kɥizin)\n"
   1269 "\n"
   1270 ;
   1271 static char *prkey3 =
   1272 "II. VOWELS AND DIPTHONGS\n"
   1273 "\n"
   1274 "Short\n"
   1275 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
   1276 "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
   1277 "æ  ...  pat (pæt)\n"
   1278 "ʌ  ...  putt (pʌt)\n"
   1279 "ɒ  ...  pot (pɒt)\n"
   1280 "ʊ  ...  put (pʊt)\n"
   1281 "ə  ...  another (əˈnʌðə(r))\n"
   1282 "(ə)...  beaten (ˈbiːt(ə)n)\n"
   1283 "i  ...  Fr. si (si)\n"
   1284 "e  ...  Fr. bébé (bebe)\n"
   1285 "a  ...  Fr. mari (mari)\n"
   1286 "ɑ  ...  Fr. bâtiment (bɑtimã)\n"
   1287 "ɔ  ...  Fr. homme (ɔm)\n"
   1288 "o  ...  Fr. eau (o)\n"
   1289 "ø  ...  Fr. peu (pø)\n"
   1290 ;
   1291 static char *prkey4 =
   1292 "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
   1293 "u  ...  Fr. douce (dus)\n"
   1294 "ʏ  ...  Ger. Müller (ˈmʏlər)\n"
   1295 "y  ...  Fr. du (dy)\n"
   1296 "\n"
   1297 "Long\n"
   1298 "iː as in bean (biːn)\n"
   1299 "ɑː ...  barn (bɑːn)\n"
   1300 "ɔː ...  born (bɔːn)\n"
   1301 "uː ...  boon (buːn)\n"
   1302 "ɜː ...  burn (bɜːn)\n"
   1303 "eː ...  Ger. Schnee (ʃneː)\n"
   1304 "ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
   1305 "aː ...  Ger. Tag (taːk)\n"
   1306 "oː ...  Ger. Sohn (zoːn)\n"
   1307 "øː ...  Ger. Goethe (gøːtə)\n"
   1308 "yː ...  Ger. grün (gryːn)\n"
   1309 "\n"
   1310 ;
   1311 static char *prkey5 =
   1312 "Nasal\n"
   1313 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
   1314 "ã  ...  Fr. franc (frã)\n"
   1315 "ɔ˜ ...  Fr. bon (bɔ˜n)\n"
   1316 "œ˜ ...  Fr. un (œ˜)\n"
   1317 "\n"
   1318 "Dipthongs, etc.\n"
   1319 "eɪ as in bay (beɪ)\n"
   1320 "aɪ ...  buy (baɪ)\n"
   1321 "ɔɪ ...  boy (bɔɪ)\n"
   1322 "əʊ ...  no (nəʊ)\n"
   1323 "aʊ ...  now (naʊ)\n"
   1324 "ɪə ...  peer (pɪə(r))\n"
   1325 "ɛə ...  pair (pɛə(r))\n"
   1326 "ʊə ...  tour (tʊə(r))\n"
   1327 "ɔə ...  boar (bɔə(r))\n"
   1328 "\n"
   1329 ;
   1330 static char *prkey6 =
   1331 "III. STRESS\n"
   1332 "\n"
   1333 "Main stress: ˈ preceding stressed syllable\n"
   1334 "Secondary stress: ˌ preceding stressed syllable\n"
   1335 "\n"
   1336 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
   1337 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
   1338 
   1339 void
   1340 oedprintkey(void)
   1341 {
   1342 	Bprint(bout, "%s%s%s%s%s%s",
   1343 		prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
   1344 }
   1345 
   1346 /*
   1347  * f points just after a '&', fe points at end of entry.
   1348  * Accumulate the special name, starting after the &
   1349  * and continuing until the next '.', in spec[].
   1350  * Return pointer to char after '.'.
   1351  */
   1352 static char *
   1353 getspec(char *f, char *fe)
   1354 {
   1355 	char *t;
   1356 	int c, i;
   1357 
   1358 	t = spec;
   1359 	i = sizeof spec;
   1360 	while(--i > 0) {
   1361 		c = *f++;
   1362 		if(c == '.' || f == fe)
   1363 			break;
   1364 		*t++ = c;
   1365 	}
   1366 	*t = 0;
   1367 	return f;
   1368 }
   1369 
   1370 /*
   1371  * f points just after '<'; fe points at end of entry.
   1372  * Expect next characters from bin to match:
   1373  *  [/][^ >]+( [^>=]+=[^ >]+)*>
   1374  *      tag   auxname auxval
   1375  * Accumulate the tag and its auxilliary information in
   1376  * tag[], auxname[][] and auxval[][].
   1377  * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
   1378  * Set naux to the number of aux pairs found.
   1379  * Return pointer to after final '>'.
   1380  */
   1381 static char *
   1382 gettag(char *f, char *fe)
   1383 {
   1384 	char *t;
   1385 	int c, i;
   1386 
   1387 	t = tag;
   1388 	c = *f++;
   1389 	if(c == '/')
   1390 		tagstarts = 0;
   1391 	else {
   1392 		tagstarts = 1;
   1393 		*t++ = c;
   1394 	}
   1395 	i = Buflen;
   1396 	naux = 0;
   1397 	while(--i > 0) {
   1398 		c = *f++;
   1399 		if(c == '>' || f == fe)
   1400 			break;
   1401 		if(c == ' ') {
   1402 			*t = 0;
   1403 			t = auxname[naux];
   1404 			i = Buflen;
   1405 			if(naux < Maxaux-1)
   1406 				naux++;
   1407 		} else if(naux && c == '=') {
   1408 			*t = 0;
   1409 			t = auxval[naux-1];
   1410 			i = Buflen;
   1411 		} else
   1412 			*t++ = c;
   1413 	}
   1414 	*t = 0;
   1415 	return f;
   1416 }
   1417 
   1418 static void
   1419 dostatus(void)
   1420 {
   1421 	char *s;
   1422 
   1423 	s = auxstate[St];
   1424 	if(s) {
   1425 		if(strcmp(s, "obs") == 0)
   1426 			outrune(0x2020);
   1427 		else if(strcmp(s, "ali") == 0)
   1428 			outrune(0x2016);
   1429 		else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
   1430 			outrune(0xb6);
   1431 		else if(strcmp(s, "xref") == 0)
   1432 			{/* nothing */}
   1433 		else if(debug)
   1434 			err("status %ld %d %s", curentry.doff, cursize, s);
   1435 	}
   1436 }