plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

pgw.c (29812B)


      1 /* thanks to Caerwyn Jones <caerwyn@comcast.net> for this module */
      2 #include <u.h>
      3 #include <libc.h>
      4 #include <bio.h>
      5 #include "dict.h"
      6 
      7 enum {
      8 	Buflen=1000,
      9 	Maxaux=5
     10 };
     11 
     12 /* Possible tags */
     13 enum {
     14 	B,		/* Bold */
     15 	Blockquote,	/* Block quote */
     16 	Br,		/* Break line */
     17 	Cd,		/* ? coloquial data */
     18 	Col,		/* ? Coloquial */
     19 	Def,		/* Definition */
     20 	Hw, 		/* Head Word */
     21 	I,		/* Italics */
     22 	P,		/* Paragraph */
     23 	Pos,		/* Part of Speach */
     24 	Sn,		/* Sense */
     25 	U,		/* ? cross reference*/
     26 	Wf,		/* ? word form */
     27 	Ntag		/* end of tags */
     28 };
     29 
     30 /* Assoc tables must be sorted on first field */
     31 
     32 static Assoc tagtab[] = {
     33 	{"b",			B},
     34 	{"blockquote",	Blockquote},
     35 	{"BR",		Br},
     36 	{"cd",		Cd},
     37 	{"col",		Col},
     38 	{"def",		Def},
     39 	{"hw",		Hw},
     40 	{"i",			I},
     41 	{"p",			P},
     42 	{"pos",		Pos},
     43 	{"sn",		Sn},
     44 	{"u",			U},
     45 	{"wf",		Wf}
     46 };
     47 
     48 /* Possible tag auxilliary info */
     49 enum {
     50 	Cols,		/* number of columns in a table */
     51 	Num,		/* letter or number, for a sense */
     52 	St,		/* status (e.g., obs) */
     53 	Naux
     54 };
     55 
     56 #if 0
     57 static Assoc auxtab[] = {
     58 	{"cols",	Cols},
     59 	{"num",		Num},
     60 	{"st",		St}
     61 };
     62 #endif
     63 
     64 static Assoc spectab[] = {
     65 	{"3on4",	0xbe},
     66 	{"AElig",		0xc6},
     67 	{"Aacute",	0xc1},
     68 	{"Aang",	0xc5},
     69 	{"Abarab",	0x100},
     70 	{"Acirc",	0xc2},
     71 	{"Agrave",	0xc0},
     72 	{"Alpha",	0x391},
     73 	{"Amacr",	0x100},
     74 	{"Asg",		0x1b7},		/* Unicyle. Cf "Sake" */
     75 	{"Auml",	0xc4},
     76 	{"Beta",	0x392},
     77 	{"Cced",	0xc7},
     78 	{"Chacek",	0x10c},
     79 	{"Chi",		0x3a7},
     80 	{"Chirho",	0x2627},		/* Chi Rho U+2627 */
     81 	{"Csigma",	0x3da},
     82 	{"Delta",	0x394},
     83 	{"Eacute",	0xc9},
     84 	{"Ecirc",	0xca},
     85 	{"Edh",		0xd0},
     86 	{"Epsilon",	0x395},
     87 	{"Eta",		0x397},
     88 	{"Gamma",	0x393},
     89 	{"Iacute",	0xcd},
     90 	{"Icirc",	0xce},
     91 	{"Imacr",	0x12a},
     92 	{"Integ",	0x222b},
     93 	{"Iota",	0x399},
     94 	{"Kappa",	0x39a},
     95 	{"Koppa",	0x3de},
     96 	{"Lambda",	0x39b},
     97 	{"Lbar",	0x141},
     98 	{"Mu",		0x39c},
     99 	{"Naira",	0x4e},		/* should have bar through */
    100 	{"Nplus",	0x4e},		/* should have plus above */
    101 	{"Ntilde",	0xd1},
    102 	{"Nu",		0x39d},
    103 	{"Oacute",	0xd3},
    104 	{"Obar",	0xd8},
    105 	{"Ocirc",	0xd4},
    106 	{"Oe",		0x152},
    107 	{"Omega",	0x3a9},
    108 	{"Omicron",	0x39f},
    109 	{"Ouml",	0xd6},
    110 	{"Phi",		0x3a6},
    111 	{"Pi",		0x3a0},
    112 	{"Psi",		0x3a8},
    113 	{"Rho",		0x3a1},
    114 	{"Sacute",	0x15a},
    115 	{"Sigma",	0x3a3},
    116 	{"Summ",	0x2211},
    117 	{"Tau",		0x3a4},
    118 	{"Th",		0xde},
    119 	{"Theta",	0x398},
    120 	{"Tse",		0x426},
    121 	{"Uacute",	0xda},
    122 	{"Ucirc",	0xdb},
    123 	{"Upsilon",	0x3a5},
    124 	{"Uuml",	0xdc},
    125 	{"Wyn",		0x1bf},		/* wynn U+01BF */
    126 	{"Xi",		0x39e},
    127 	{"Ygh",		0x1b7},		/* Yogh	U+01B7 */
    128 	{"Zeta",	0x396},
    129 	{"Zh",		0x1b7},		/* looks like Yogh. Cf "Sake" */
    130 	{"a",		0x61},		/* ante */
    131 	{"aacute",	0xe1},
    132 	{"aang",	0xe5},
    133 	{"aasper",	MAAS},
    134 	{"abreve",	0x103},
    135 	{"acirc",	0xe2},
    136 	{"acute",		LACU},
    137 	{"aelig",		0xe6},
    138 	{"agrave",	0xe0},
    139 	{"ahook",	0x105},
    140 	{"alenis",	MALN},
    141 	{"alpha",	0x3b1},
    142 	{"amacr",	0x101},
    143 	{"amp",		0x26},
    144 	{"and",		MAND},
    145 	{"ang",		LRNG},
    146 	{"angle",	0x2220},
    147 	{"ankh",	0x2625},		/* ankh U+2625 */
    148 	{"ante",	0x61},		/* before (year) */
    149 	{"aonq",	MAOQ},
    150 	{"appreq",	0x2243},
    151 	{"aquar",	0x2652},
    152 	{"arDadfull",	0x636},		/* Dad U+0636 */
    153 	{"arHa",	0x62d},		/* haa U+062D */
    154 	{"arTa",	0x62a},		/* taa U+062A */
    155 	{"arain",	0x639},		/* ain U+0639 */
    156 	{"arainfull",	0x639},		/* ain U+0639 */
    157 	{"aralif",	0x627},		/* alef U+0627 */
    158 	{"arba",	0x628},		/* baa U+0628 */
    159 	{"arha",	0x647},		/* ha U+0647 */
    160 	{"aries",	0x2648},
    161 	{"arnun",	0x646},		/* noon U+0646 */
    162 	{"arnunfull",	0x646},		/* noon U+0646 */
    163 	{"arpa",	0x647},		/* ha U+0647 */
    164 	{"arqoph",	0x642},		/* qaf U+0642 */
    165 	{"arshinfull",	0x634},		/* sheen U+0634 */
    166 	{"arta",	0x62a},		/* taa U+062A */
    167 	{"artafull",	0x62a},		/* taa U+062A */
    168 	{"artha",	0x62b},		/* thaa U+062B */
    169 	{"arwaw",	0x648},		/* waw U+0648 */
    170 	{"arya",	0x64a},		/* ya U+064A */
    171 	{"aryafull",	0x64a},		/* ya U+064A */
    172 	{"arzero",	0x660},		/* indic zero U+0660 */
    173 	{"asg",		0x292},		/* unicycle character. Cf "hallow" */
    174 	{"asper",	LASP},
    175 	{"assert",	0x22a2},
    176 	{"astm",	0x2042},		/* asterism: should be upside down */
    177 	{"at",		0x40},
    178 	{"atilde",	0xe3},
    179 	{"auml",	0xe4},
    180 	{"ayin",	0x639},		/* arabic ain U+0639 */
    181 	{"b1",		0x2d},		/* single bond */
    182 	{"b2",		0x3d},		/* double bond */
    183 	{"b3",		0x2261},		/* triple bond */
    184 	{"bbar",	0x180},		/* b with bar U+0180 */
    185 	{"beta",	0x3b2},
    186 	{"bigobl",	0x2f},
    187 	{"blC",		0x43},		/* should be black letter */
    188 	{"blJ",		0x4a},		/* should be black letter */
    189 	{"blU",		0x55},		/* should be black letter */
    190 	{"blb",		0x62},		/* should be black letter */
    191 	{"blozenge",	0x25ca},		/* U+25CA; should be black */
    192 	{"bly",		0x79},		/* should be black letter */
    193 	{"bra",		MBRA},
    194 	{"brbl",	LBRB},
    195 	{"breve",	LBRV},
    196 	{"bslash",'\\'},
    197 	{"bsquare",	0x25a0},		/* black square U+25A0 */
    198 	{"btril",	0x25c0},		/* U+25C0 */
    199 	{"btrir",	0x25b6},		/* U+25B6 */
    200 	{"c",		0x63},		/* circa */
    201 	{"cab",		0x232a},
    202 	{"cacute",	0x107},
    203 	{"canc",	0x264b},
    204 	{"capr",	0x2651},
    205 	{"caret",	0x5e},
    206 	{"cb",		0x7d},
    207 	{"cbigb",	0x7d},
    208 	{"cbigpren",	0x29},
    209 	{"cbigsb",	0x5d},
    210 	{"cced",	0xe7},
    211 	{"cdil",	LCED},
    212 	{"cdsb",	0x301b},		/* ]] U+301b */
    213 	{"cent",	0xa2},
    214 	{"chacek",	0x10d},
    215 	{"chi",		0x3c7},
    216 	{"circ",	LRNG},
    217 	{"circa",	0x63},		/* about (year) */
    218 	{"circbl",	0x325},		/* ring below accent U+0325 */
    219 	{"circle",	0x25cb},		/* U+25CB */
    220 	{"circledot",	0x2299},
    221 	{"click",	0x296},
    222 	{"club",	0x2663},
    223 	{"comtime",	0x43},
    224 	{"conj",	0x260c},
    225 	{"cprt",	0xa9},
    226 	{"cq",		'\''},
    227 	{"cqq",		0x201d},
    228 	{"cross",	0x2720},		/* maltese cross U+2720 */
    229 	{"crotchet",	0x2669},
    230 	{"csb",		0x5d},
    231 	{"ctilde",	0x63},		/* +tilde */
    232 	{"ctlig",	MLCT},
    233 	{"cyra",	0x430},
    234 	{"cyre",	0x435},
    235 	{"cyrhard",	0x44a},
    236 	{"cyrjat",	0x463},
    237 	{"cyrm",	0x43c},
    238 	{"cyrn",	0x43d},
    239 	{"cyrr",	0x440},
    240 	{"cyrsoft",	0x44c},
    241 	{"cyrt",	0x442},
    242 	{"cyry",	0x44b},
    243 	{"dag",		0x2020},
    244 	{"dbar",	0x111},
    245 	{"dblar",	0x21cb},
    246 	{"dblgt",	0x226b},
    247 	{"dbllt",	0x226a},
    248 	{"dced",	0x64},		/* +cedilla */
    249 	{"dd",		MDD},
    250 	{"ddag",	0x2021},
    251 	{"ddd",		MDDD},
    252 	{"decr",	0x2193},
    253 	{"deg",		0xb0},
    254 	{"dele",	0x64},		/* should be dele */
    255 	{"delta",	0x3b4},
    256 	{"descnode",	0x260b},		/* descending node U+260B */
    257 	{"diamond",	0x2662},
    258 	{"digamma",	0x3dd},
    259 	{"div",		0xf7},
    260 	{"dlessi",	0x131},
    261 	{"dlessj1",	0x6a},		/* should be dotless */
    262 	{"dlessj2",	0x6a},		/* should be dotless */
    263 	{"dlessj3",	0x6a},		/* should be dotless */
    264 	{"dollar",	0x24},
    265 	{"dotab",	LDOT},
    266 	{"dotbl",	LDTB},
    267 	{"drachm",	0x292},
    268 	{"dubh",	0x2d},
    269 	{"eacute",	0xe9},
    270 	{"earth",	0x2641},
    271 	{"easper",	MEAS},
    272 	{"ebreve",	0x115},
    273 	{"ecirc",	0xea},
    274 	{"edh",		0xf0},
    275 	{"egrave",	0xe8},
    276 	{"ehacek",	0x11b},
    277 	{"ehook",	0x119},
    278 	{"elem",	0x220a},
    279 	{"elenis",	MELN},
    280 	{"em",		0x2014},
    281 	{"emacr",	0x113},
    282 	{"emem",	MEMM},
    283 	{"en",		0x2013},
    284 	{"epsilon",	0x3b5},
    285 	{"equil",	0x21cb},
    286 	{"ergo",	0x2234},
    287 	{"es",		MES},
    288 	{"eszett",	0xdf},
    289 	{"eta",		0x3b7},
    290 	{"eth",		0xf0},
    291 	{"euml",	0xeb},
    292 	{"expon",	0x2191},
    293 	{"fact",	0x21},
    294 	{"fata",	0x251},
    295 	{"fatpara",	0xb6},		/* should have fatter, filled in bowl */
    296 	{"female",	0x2640},
    297 	{"ffilig",	MLFFI},
    298 	{"fflig",	MLFF},
    299 	{"ffllig",	MLFFL},
    300 	{"filig",	MLFI},
    301 	{"flat",	0x266d},
    302 	{"fllig",	MLFL},
    303 	{"frE",		0x45},		/* should be curly */
    304 	{"frL",	'L'},		/* should be curly */
    305 	{"frR",		0x52},		/* should be curly */
    306 	{"frakB",	0x42},		/* should have fraktur style */
    307 	{"frakG",	0x47},
    308 	{"frakH",	0x48},
    309 	{"frakI",	0x49},
    310 	{"frakM",	0x4d},
    311 	{"frakU",	0x55},
    312 	{"frakX",	0x58},
    313 	{"frakY",	0x59},
    314 	{"frakh",	0x68},
    315 	{"frbl",	LFRB},
    316 	{"frown",	LFRN},
    317 	{"fs",		0x20},
    318 	{"fsigma",	0x3c2},
    319 	{"gAacute",	0xc1},		/* should be Α+acute */
    320 	{"gaacute",	0x3b1},		/* +acute */
    321 	{"gabreve",	0x3b1},		/* +breve */
    322 	{"gafrown",	0x3b1},		/* +frown */
    323 	{"gagrave",	0x3b1},		/* +grave */
    324 	{"gamacr",	0x3b1},		/* +macron */
    325 	{"gamma",	0x3b3},
    326 	{"gauml",	0x3b1},		/* +umlaut */
    327 	{"ge",		0x2267},
    328 	{"geacute",	0x3b5},		/* +acute */
    329 	{"gegrave",	0x3b5},		/* +grave */
    330 	{"ghacute",	0x3b7},		/* +acute */
    331 	{"ghfrown",	0x3b7},		/* +frown */
    332 	{"ghgrave",	0x3b7},		/* +grave */
    333 	{"ghmacr",	0x3b7},		/* +macron */
    334 	{"giacute",	0x3b9},		/* +acute */
    335 	{"gibreve",	0x3b9},		/* +breve */
    336 	{"gifrown",	0x3b9},		/* +frown */
    337 	{"gigrave",	0x3b9},		/* +grave */
    338 	{"gimacr",	0x3b9},		/* +macron */
    339 	{"giuml",	0x3b9},		/* +umlaut */
    340 	{"glagjat",	0x467},
    341 	{"glots",	0x2c0},
    342 	{"goacute",	0x3bf},		/* +acute */
    343 	{"gobreve",	0x3bf},		/* +breve */
    344 	{"grave",	LGRV},
    345 	{"gt",		0x3e},
    346 	{"guacute",	0x3c5},		/* +acute */
    347 	{"gufrown",	0x3c5},		/* +frown */
    348 	{"gugrave",	0x3c5},		/* +grave */
    349 	{"gumacr",	0x3c5},		/* +macron */
    350 	{"guuml",	0x3c5},		/* +umlaut */
    351 	{"gwacute",	0x3c9},		/* +acute */
    352 	{"gwfrown",	0x3c9},		/* +frown */
    353 	{"gwgrave",	0x3c9},		/* +grave */
    354 	{"hacek",	LHCK},
    355 	{"halft",	0x2308},
    356 	{"hash",	0x23},
    357 	{"hasper",	MHAS},
    358 	{"hatpath",	0x5b2},		/* hataf patah U+05B2 */
    359 	{"hatqam",	0x5b3},		/* hataf qamats U+05B3 */
    360 	{"hatseg",	0x5b1},		/* hataf segol U+05B1 */
    361 	{"hbar",	0x127},
    362 	{"heart",	0x2661},
    363 	{"hebaleph",	0x5d0},		/* aleph U+05D0 */
    364 	{"hebayin",	0x5e2},		/* ayin U+05E2 */
    365 	{"hebbet",	0x5d1},		/* bet U+05D1 */
    366 	{"hebbeth",	0x5d1},		/* bet U+05D1 */
    367 	{"hebcheth",	0x5d7},		/* bet U+05D7 */
    368 	{"hebdaleth",	0x5d3},		/* dalet U+05D3 */
    369 	{"hebgimel",	0x5d2},		/* gimel U+05D2 */
    370 	{"hebhe",	0x5d4},		/* he U+05D4 */
    371 	{"hebkaph",	0x5db},		/* kaf U+05DB */
    372 	{"heblamed",	0x5dc},		/* lamed U+05DC */
    373 	{"hebmem",	0x5de},		/* mem U+05DE */
    374 	{"hebnun",	0x5e0},		/* nun U+05E0 */
    375 	{"hebnunfin",	0x5df},		/* final nun U+05DF */
    376 	{"hebpe",	0x5e4},		/* pe U+05E4 */
    377 	{"hebpedag",	0x5e3},		/* final pe? U+05E3 */
    378 	{"hebqoph",	0x5e7},		/* qof U+05E7 */
    379 	{"hebresh",	0x5e8},		/* resh U+05E8 */
    380 	{"hebshin",	0x5e9},		/* shin U+05E9 */
    381 	{"hebtav",	0x5ea},		/* tav U+05EA */
    382 	{"hebtsade",	0x5e6},		/* tsadi U+05E6 */
    383 	{"hebwaw",	0x5d5},		/* vav? U+05D5 */
    384 	{"hebyod",	0x5d9},		/* yod U+05D9 */
    385 	{"hebzayin",	0x5d6},		/* zayin U+05D6 */
    386 	{"hgz",		0x292},		/* ??? Cf "alet" */
    387 	{"hireq",	0x5b4},		/* U+05B4 */
    388 	{"hlenis",	MHLN},
    389 	{"hook",	LOGO},
    390 	{"horizE",	0x45},		/* should be on side */
    391 	{"horizP",	0x50},		/* should be on side */
    392 	{"horizS",	0x223d},
    393 	{"horizT",	0x22a3},
    394 	{"horizb",	0x7b},		/* should be underbrace */
    395 	{"ia",		0x3b1},
    396 	{"iacute",	0xed},
    397 	{"iasper",	MIAS},
    398 	{"ib",		0x3b2},
    399 	{"ibar",	0x268},
    400 	{"ibreve",	0x12d},
    401 	{"icirc",	0xee},
    402 	{"id",		0x3b4},
    403 	{"ident",	0x2261},
    404 	{"ie",		0x3b5},
    405 	{"ifilig",	MLFI},
    406 	{"ifflig",	MLFF},
    407 	{"ig",		0x3b3},
    408 	{"igrave",	0xec},
    409 	{"ih",		0x3b7},
    410 	{"ii",		0x3b9},
    411 	{"ik",		0x3ba},
    412 	{"ilenis",	MILN},
    413 	{"imacr",	0x12b},
    414 	{"implies",	0x21d2},
    415 	{"index",	0x261e},
    416 	{"infin",	0x221e},
    417 	{"integ",	0x222b},
    418 	{"intsec",	0x2229},
    419 	{"invpri",	0x2cf},
    420 	{"iota",	0x3b9},
    421 	{"iq",		0x3c8},
    422 	{"istlig",	MLST},
    423 	{"isub",	0x3f5},		/* iota below accent */
    424 	{"iuml",	0xef},
    425 	{"iz",		0x3b6},
    426 	{"jup",		0x2643},
    427 	{"kappa",	0x3ba},
    428 	{"koppa",	0x3df},
    429 	{"lambda",	0x3bb},
    430 	{"lar",		0x2190},
    431 	{"lbar",	0x142},
    432 	{"le",		0x2266},
    433 	{"lenis",	LLEN},
    434 	{"leo",		0x264c},
    435 	{"lhalfbr",	0x2308},
    436 	{"lhshoe",	0x2283},
    437 	{"libra",	0x264e},
    438 	{"llswing",	MLLS},
    439 	{"lm",		0x2d0},
    440 	{"logicand",	0x2227},
    441 	{"logicor",	0x2228},
    442 	{"longs",	0x283},
    443 	{"lrar",	0x2194},
    444 	{"lt",		0x3c},
    445 	{"ltappr",	0x227e},
    446 	{"ltflat",	0x2220},
    447 	{"lumlbl",	0x6c},		/* +umlaut below */
    448 	{"mac",		LMAC},
    449 	{"male",	0x2642},
    450 	{"mc",		0x63},		/* should be raised */
    451 	{"merc",	0x263f},		/* mercury U+263F */
    452 	{"min",		0x2212},
    453 	{"moonfq",	0x263d},		/* first quarter moon U+263D */
    454 	{"moonlq",	0x263e},		/* last quarter moon U+263E */
    455 	{"msylab",	0x6d},		/* +sylab (ˌ) */
    456 	{"mu",		0x3bc},
    457 	{"nacute",	0x144},
    458 	{"natural",	0x266e},
    459 	{"neq",		0x2260},
    460 	{"nfacute",	0x2032},
    461 	{"nfasper",	0x2bd},
    462 	{"nfbreve",	0x2d8},
    463 	{"nfced",	0xb8},
    464 	{"nfcirc",	0x2c6},
    465 	{"nffrown",	0x2322},
    466 	{"nfgra",	0x2cb},
    467 	{"nfhacek",	0x2c7},
    468 	{"nfmac",	0xaf},
    469 	{"nftilde",	0x2dc},
    470 	{"nfuml",	0xa8},
    471 	{"ng",		0x14b},
    472 	{"not",		0xac},
    473 	{"notelem",	0x2209},
    474 	{"ntilde",	0xf1},
    475 	{"nu",		0x3bd},
    476 	{"oab",		0x2329},
    477 	{"oacute",	0xf3},
    478 	{"oasper",	MOAS},
    479 	{"ob",		0x7b},
    480 	{"obar",	0xf8},
    481 	{"obigb",	0x7b},		/* should be big */
    482 	{"obigpren",	0x28},
    483 	{"obigsb",	0x5b},		/* should be big */
    484 	{"obreve",	0x14f},
    485 	{"ocirc",	0xf4},
    486 	{"odsb",	0x301a},		/* [[ U+301A */
    487 	{"oelig",		0x153},
    488 	{"oeamp",	0x26},
    489 	{"ograve",	0xf2},
    490 	{"ohook",	0x6f},		/* +hook */
    491 	{"olenis",	MOLN},
    492 	{"omacr",	0x14d},
    493 	{"omega",	0x3c9},
    494 	{"omicron",	0x3bf},
    495 	{"ope",		0x25b},
    496 	{"opp",		0x260d},
    497 	{"oq",		0x60},
    498 	{"oqq",		0x201c},
    499 	{"or",		MOR},
    500 	{"osb",		0x5b},
    501 	{"otilde",	0xf5},
    502 	{"ouml",	0xf6},
    503 	{"ounce",	0x2125},		/* ounce U+2125 */
    504 	{"ovparen",	0x2322},		/* should be sideways ( */
    505 	{"p",		0x2032},
    506 	{"pa",		0x2202},
    507 	{"page",	0x50},
    508 	{"pall",	0x28e},
    509 	{"paln",	0x272},
    510 	{"par",		PAR},
    511 	{"para",	0xb6},
    512 	{"pbar",	0x70},		/* +bar */
    513 	{"per",		0x2118},		/* per U+2118 */
    514 	{"phi",		0x3c6},
    515 	{"phi2",	0x3d5},
    516 	{"pi",		0x3c0},
    517 	{"pisces",	0x2653},
    518 	{"planck",	0x127},
    519 	{"plantinJ",	0x4a},		/* should be script */
    520 	{"pm",		0xb1},
    521 	{"pmil",	0x2030},
    522 	{"pp",		0x2033},
    523 	{"ppp",		0x2034},
    524 	{"prop",	0x221d},
    525 	{"psi",		0x3c8},
    526 	{"pstlg",	0xa3},
    527 	{"q",		0x3f},		/* should be raised */
    528 	{"qamets",	0x5b3},		/* U+05B3 */
    529 	{"quaver",	0x266a},
    530 	{"rar",		0x2192},
    531 	{"rasper",	MRAS},
    532 	{"rdot",	0xb7},
    533 	{"recipe",	0x211e},		/* U+211E */
    534 	{"reg",		0xae},
    535 	{"revC",	0x186},		/* open O U+0186 */
    536 	{"reva",	0x252},
    537 	{"revc",	0x254},
    538 	{"revope",	0x25c},
    539 	{"revr",	0x279},
    540 	{"revsc",	0x2d2},		/* upside-down semicolon */
    541 	{"revv",	0x28c},
    542 	{"rfa",		0x6f},		/* +hook (Cf "goal") */
    543 	{"rhacek",	0x159},
    544 	{"rhalfbr",	0x2309},
    545 	{"rho",		0x3c1},
    546 	{"rhshoe",	0x2282},
    547 	{"rlenis",	MRLN},
    548 	{"rsylab",	0x72},		/* +sylab */
    549 	{"runash",	0x46},		/* should be runic 'ash' */
    550 	{"rvow",	0x2d4},
    551 	{"sacute",	0x15b},
    552 	{"sagit",	0x2650},
    553 	{"sampi",	0x3e1},
    554 	{"saturn",	0x2644},
    555 	{"sced",	0x15f},
    556 	{"schwa",	0x259},
    557 	{"scorpio",	0x264f},
    558 	{"scrA",	0x41},		/* should be script */
    559 	{"scrC",	0x43},
    560 	{"scrE",	0x45},
    561 	{"scrF",	0x46},
    562 	{"scrI",	0x49},
    563 	{"scrJ",	0x4a},
    564 	{"scrL",'L'},
    565 	{"scrO",	0x4f},
    566 	{"scrP",	0x50},
    567 	{"scrQ",	0x51},
    568 	{"scrS",	0x53},
    569 	{"scrT",	0x54},
    570 	{"scrb",	0x62},
    571 	{"scrd",	0x64},
    572 	{"scrh",	0x68},
    573 	{"scrl",	0x6c},
    574 	{"scruple",	0x2108},		/* U+2108 */
    575 	{"sdd",		0x2d0},
    576 	{"sect",	0xa7},
    577 	{"semE",	0x2203},
    578 	{"sh",		0x283},
    579 	{"shacek",	0x161},
    580 	{"sharp",	0x266f},
    581 	{"sheva",	0x5b0},		/* U+05B0 */
    582 	{"shti",	0x26a},
    583 	{"shtsyll",	0x222a},
    584 	{"shtu",	0x28a},
    585 	{"sidetri",	0x22b2},
    586 	{"sigma",	0x3c3},
    587 	{"since",	0x2235},
    588 	{"slge",	0x2265},		/* should have slanted line under */
    589 	{"slle",	0x2264},		/* should have slanted line under */
    590 	{"sm",		0x2c8},
    591 	{"smm",		0x2cc},
    592 	{"spade",	0x2660},
    593 	{"sqrt",	0x221a},
    594 	{"square",	0x25a1},		/* U+25A1 */
    595 	{"ssChi",	0x3a7},		/* should be sans serif */
    596 	{"ssIota",	0x399},
    597 	{"ssOmicron",	0x39f},
    598 	{"ssPi",	0x3a0},
    599 	{"ssRho",	0x3a1},
    600 	{"ssSigma",	0x3a3},
    601 	{"ssTau",	0x3a4},
    602 	{"star",	0x2a},
    603 	{"stlig",	MLST},
    604 	{"sup2",	0x2072},
    605 	{"supgt",	0x2c3},
    606 	{"suplt",	0x2c2},
    607 	{"sur",		0x2b3},
    608 	{"swing",	0x223c},
    609 	{"tau",		0x3c4},
    610 	{"taur",	0x2649},
    611 	{"th",		0xfe},
    612 	{"thbar",	0xfe},		/* +bar */
    613 	{"theta",	0x3b8},
    614 	{"thinqm",	0x3f},		/* should be thinner */
    615 	{"tilde",	LTIL},
    616 	{"times",	0xd7},
    617 	{"tri",		0x2206},
    618 	{"trli",	0x2016},
    619 	{"ts",		0x2009},
    620 	{"uacute",	0xfa},
    621 	{"uasper",	MUAS},
    622 	{"ubar",	0x75},		/* +bar */
    623 	{"ubreve",	0x16d},
    624 	{"ucirc",	0xfb},
    625 	{"udA",		0x2200},
    626 	{"udT",		0x22a5},
    627 	{"uda",		0x250},
    628 	{"udh",		0x265},
    629 	{"udqm",	0xbf},
    630 	{"udpsi",	0x22d4},
    631 	{"udtr",	0x2207},
    632 	{"ugrave",	0xf9},
    633 	{"ulenis",	MULN},
    634 	{"umacr",	0x16b},
    635 	{"uml",		LUML},
    636 	{"undl",	0x2cd},		/* underline accent */
    637 	{"union",	0x222a},
    638 	{"upsilon",	0x3c5},
    639 	{"uuml",	0xfc},
    640 	{"vavpath",	0x5d5},		/* vav U+05D5 (+patah) */
    641 	{"vavsheva",	0x5d5},		/* vav U+05D5 (+sheva) */
    642 	{"vb",		0x7c},
    643 	{"vddd",	0x22ee},
    644 	{"versicle2",	0x2123},		/* U+2123 */
    645 	{"vinc",	0xaf},
    646 	{"virgo",	0x264d},
    647 	{"vpal",	0x25f},
    648 	{"vvf",		0x263},
    649 	{"wasper",	MWAS},
    650 	{"wavyeq",	0x2248},
    651 	{"wlenis",	MWLN},
    652 	{"wyn",		0x1bf},		/* wynn U+01BF */
    653 	{"xi",		0x3be},
    654 	{"yacute",	0xfd},
    655 	{"ycirc",	0x177},
    656 	{"ygh",		0x292},
    657 	{"ymacr",	0x79},		/* +macron */
    658 	{"yuml",	0xff},
    659 	{"zced",	0x7a},		/* +cedilla */
    660 	{"zeta",	0x3b6},
    661 	{"zh",		0x292},
    662 	{"zhacek",	0x17e}
    663 };
    664 /*
    665    The following special characters don't have close enough
    666    equivalents in Unicode, so aren't in the above table.
    667 	22n		2^(2^n) Cf Fermat
    668 	2on4		2/4
    669 	3on8		3/8
    670 	Bantuo		Bantu O. Cf Otshi-herero
    671 	Car		C with circular arrow on top
    672 	albrtime 	cut-time: C with vertical line
    673 	ardal		Cf dental
    674 	bantuo		Bantu o. Cf Otshi-herero
    675 	bbc1		single chem bond below
    676 	bbc2		double chem bond below
    677 	bbl1		chem bond like /
    678 	bbl2		chem bond like //
    679 	bbr1		chem bond like \
    680 	bbr2		chem bond \\
    681 	bcop1		copper symbol. Cf copper
    682 	bcop2		copper symbol. Cf copper
    683 	benchm		Cf benchmark
    684 	btc1		single chem bond above
    685 	btc2		double chem bond above
    686 	btl1		chem bond like \
    687 	btl2		chem bond like \\
    688 	btr1		chem bond like /
    689 	btr2		chem bond line //
    690 	burman		Cf Burman
    691 	devph		sanskrit letter. Cf ph
    692 	devrfls		sanskrit letter. Cf cerebral
    693 	duplong[12]	musical note
    694 	egchi		early form of chi
    695 	eggamma[12]	early form of gamma
    696 	egiota		early form of iota
    697 	egkappa		early form of kappa
    698 	eglambda	early form of lambda
    699 	egmu[12]	early form of mu
    700 	egnu[12]	early form of nu
    701 	egpi[123]	early form of pi
    702 	egrho[12]	early form of rho
    703 	egsampi		early form of sampi
    704 	egsan		early form of san
    705 	egsigma[12]	early form of sigma
    706 	egxi[123]	early form of xi
    707 	elatS		early form of S
    708 	elatc[12]	early form of C
    709 	elatg[12]	early form of G
    710 	glagjeri	Slavonic Glagolitic jeri
    711 	glagjeru	Slavonic Glagolitic jeru
    712 	hypolem		hypolemisk (line with underdot)
    713 	lhrbr		lower half }
    714 	longmord	long mordent
    715 	mbwvow		backwards scretched C. Cf retract.
    716 	mord		music symbol.  Cf mordent
    717 	mostra		Cf direct
    718 	ohgcirc		old form of circumflex
    719 	oldbeta		old form of β. Cf perturbate
    720 	oldsemibr[12]	old forms of semibreve. Cf prolation
    721 	ormg		old form of g. Cf G
    722 	para[12345]	form of ¶
    723 	pauseo		musical pause sign
    724 	pauseu		musical pause sign
    725 	pharyng		Cf pharyngal
    726 	ragr		Black letter ragged r
    727 	repetn		musical repeat. Cf retort
    728 	segno		musical segno sign
    729 	semain[12]	semitic ain
    730 	semhe		semitic he
    731 	semheth		semitic heth
    732 	semkaph		semitic kaph
    733 	semlamed[12]	semitic lamed
    734 	semmem		semitic mem
    735 	semnum		semitic nun
    736 	sempe		semitic pe
    737 	semqoph[123]	semitic qoph
    738 	semresh		semitic resh
    739 	semtav[1234]	semitic tav
    740 	semyod		semitic yod
    741 	semzayin[123]	semitic zayin
    742 	shtlong[12]	U with underbar. Cf glyconic
    743 	sigmatau	σ,τ combination
    744 	squaver		sixteenth note
    745 	sqbreve		square musical breve note
    746 	swast		swastika
    747 	uhrbr		upper half of big }
    748 	versicle1		Cf versicle
    749  */
    750 
    751 
    752 static Rune normtab[128] = {
    753 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    754 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    755 	NONE,	NONE,	' ',	NONE,	NONE,	NONE,	NONE,	NONE,
    756 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    757 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    758 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    759 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
    760 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
    761 	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
    762 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
    763 	0x48,	0x49,	0x4a,	0x4b,'L',	0x4d,	0x4e,	0x4f,
    764 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
    765 	0x58,	0x59,	0x5a,	0x5b,'\\',	0x5d,	0x5e,	0x5f,
    766 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    767 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    768 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    769 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    770 };
    771 #if 0
    772 static Rune phtab[128] = {
    773 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    774 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    775 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    776 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    777 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    778 /*20*/	0x20,	0x21,	0x2c8,	0x23,	0x24,	0x2cc,	0xe6,	'\'',
    779 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
    780 /*30*/  0x30,	0x31,	0x32,	0x25c,	0x34,	0x35,	0x36,	0x37,
    781 	0x38,	0xf8,	0x2d0,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
    782 /*40*/  0x259,	0x251,	0x42,	0x43,	0xf0,	0x25b,	0x46,	0x47,
    783 	0x48,	0x26a,	0x4a,	0x4b,'L',	0x4d,	0x14b,	0x254,
    784 /*50*/	0x50,	0x252,	0x52,	0x283,	0x3b8,	0x28a,	0x28c,	0x57,
    785 	0x58,	0x59,	0x292,	0x5b,'\\',	0x5d,	0x5e,	0x5f,
    786 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    787 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    788 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    789 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    790 };
    791 static Rune grtab[128] = {
    792 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    793 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    794 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    795 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    796 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    797 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    798 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
    799 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
    800 	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
    801 /*40*/  0x40,	0x391,	0x392,	0x39e,	0x394,	0x395,	0x3a6,	0x393,
    802 	0x397,	0x399,	0x3da,	0x39a,	0x39b,	0x39c,	0x39d,	0x39f,
    803 /*50*/	0x3a0,	0x398,	0x3a1,	0x3a3,	0x3a4,	0x3a5,	0x56,	0x3a9,
    804 	0x3a7,	0x3a8,	0x396,	0x5b,'\\',	0x5d,	0x5e,	0x5f,
    805 /*60*/	0x60,	0x3b1,	0x3b2,	0x3be,	0x3b4,	0x3b5,	0x3c6,	0x3b3,
    806 	0x3b7,	0x3b9,	0x3c2,	0x3ba,	0x3bb,	0x3bc,	0x3bd,	0x3bf,
    807 /*70*/	0x3c0,	0x3b8,	0x3c1,	0x3c3,	0x3c4,	0x3c5,	0x76,	0x3c9,
    808 	0x3c7,	0x3c8,	0x3b6,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    809 };
    810 static Rune subtab[128] = {
    811 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    812 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    813 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    814 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    815 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    816 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    817 	0x208d,	0x208e,	0x2a,	0x208a,	0x2c,	0x208b,	0x2e,	0x2f,
    818 /*30*/  0x2080,	0x2081,	0x2082,	0x2083,	0x2084,	0x2085,	0x2086,	0x2087,
    819 	0x2088,	0x2089,	0x3a,	0x3b,	TAGS,	0x208c,	TAGE,	0x3f,
    820 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
    821 	0x48,	0x49,	0x4a,	0x4b,'L',	0x4d,	0x4e,	0x4f,
    822 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
    823 	0x58,	0x59,	0x5a,	0x5b,'\\',	0x5d,	0x5e,	0x5f,
    824 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    825 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    826 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    827 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    828 };
    829 static Rune suptab[128] = {
    830 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
    831 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    832 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    833 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    834 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
    835 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
    836 	0x207d,	0x207e,	0x2a,	0x207a,	0x2c,	0x207b,	0x2e,	0x2f,
    837 /*30*/  0x2070,	0x2071,	0x2072,	0x2073,	0x2074,	0x2075,	0x2076,	0x2077,
    838 	0x2078,	0x2079,	0x3a,	0x3b,	TAGS,	0x207c,	TAGE,	0x3f,
    839 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
    840 	0x48,	0x49,	0x4a,	0x4b,'L',	0x4d,	0x4e,	0x4f,
    841 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
    842 	0x58,	0x59,	0x5a,	0x5b,'\\',	0x5d,	0x5e,	0x5f,
    843 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
    844 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
    845 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
    846 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
    847 };
    848 #endif
    849 
    850 static int	tagstarts;
    851 static char	tag[Buflen];
    852 static char	spec[Buflen];
    853 static Entry	curentry;
    854 #define cursize (curentry.end-curentry.start)
    855 
    856 static char	*getspec(char *, char *);
    857 static char	*gettag(char *, char *);
    858 
    859 /*
    860  * cmd is one of:
    861  *    'p': normal print
    862  *    'h': just print headwords
    863  *    'P': print raw
    864  */
    865 void
    866 pgwprintentry(Entry e, int cmd)
    867 {
    868 	char *p, *pe;
    869 	int t;
    870 	long r, rprev, rlig;
    871 	Rune *transtab;
    872 
    873 	p = e.start;
    874 	pe = e.end;
    875 	transtab = normtab;
    876 	rprev = NONE;
    877 	changett(0, 0, 0);
    878 	curentry = e;
    879 	if(cmd == 'h')
    880 		outinhibit = 1;
    881 	while(p < pe) {
    882 		if(cmd == 'r') {
    883 			outchar(*p++);
    884 			continue;
    885 		}
    886 		r = transtab[(*p++)&0x7F];
    887 		if(r < NONE) {
    888 			/* Emit the rune, but buffer in case of ligature */
    889 			if(rprev != NONE)
    890 				outrune(rprev);
    891 			rprev = r;
    892 		} else if(r == SPCS) {
    893 			/* Start of special character name */
    894 			p = getspec(p, pe);
    895 			r = lookassoc(spectab, asize(spectab), spec);
    896 			if(r == -1) {
    897 				if(debug)
    898 					err("spec %ld %d %s",
    899 						e.doff, cursize, spec);
    900 				r = 0xfffd;
    901 			}
    902 			if(r >= LIGS && r < LIGE) {
    903 				/* handle possible ligature */
    904 				rlig = liglookup(r, rprev);
    905 				if(rlig != NONE)
    906 					rprev = rlig;	/* overwrite rprev */
    907 				else {
    908 					/* could print accent, but let's not */
    909 					if(rprev != NONE) outrune(rprev);
    910 					rprev = NONE;
    911 				}
    912 			} else if(r >= MULTI && r < MULTIE) {
    913 				if(rprev != NONE) {
    914 					outrune(rprev);
    915 					rprev = NONE;
    916 				}
    917 				outrunes(multitab[r-MULTI]);
    918 			} else if(r == PAR) {
    919 				if(rprev != NONE) {
    920 					outrune(rprev);
    921 					rprev = NONE;
    922 				}
    923 				outnl(1);
    924 			} else {
    925 				if(rprev != NONE) outrune(rprev);
    926 				rprev = r;
    927 			}
    928 		} else if(r == TAGS) {
    929 			/* Start of tag name */
    930 			if(rprev != NONE) {
    931 				outrune(rprev);
    932 				rprev = NONE;
    933 			}
    934 			p = gettag(p, pe);
    935 			t = lookassoc(tagtab, asize(tagtab), tag);
    936 			if(t == -1) {
    937 				if(debug)
    938 					err("tag %ld %d %s",
    939 						e.doff, cursize, tag);
    940 				continue;
    941 			}
    942 			switch(t){
    943 			case Hw:
    944 				if(cmd == 'h') {
    945 					if(!tagstarts)
    946 						outchar(' ');
    947 					outinhibit = !tagstarts;
    948 				}
    949 				break;
    950 			case Sn:
    951 				if(tagstarts) {
    952 					outnl(2);
    953 				}
    954 				break;
    955 			case P:
    956 				outnl(tagstarts);
    957 				break;
    958 			case Col:
    959 			case Br:
    960 			case Blockquote:
    961 				if(tagstarts)
    962 					outnl(1);
    963 				break;
    964 			case U:
    965 				outchar('/');
    966 			}
    967 		}
    968 	}
    969 	if(cmd == 'h') {
    970 		outinhibit = 0;
    971 		outnl(0);
    972 	}
    973 }
    974 
    975 /*
    976  * Return offset into bdict where next webster entry after fromoff starts.
    977  * Webster entries start with <p><hw>
    978  */
    979 long
    980 pgwnextoff(long fromoff)
    981 {
    982 	long a, n;
    983 	int c;
    984 
    985 	a = Bseek(bdict, fromoff, 0);
    986 	if(a != fromoff)
    987 		return -1;
    988 	n = 0;
    989 	for(;;) {
    990 		c = Bgetc(bdict);
    991 		if(c < 0)
    992 			break;
    993 		if(c == '<' && Bgetc(bdict) == 'p' && Bgetc(bdict) == '>') {
    994 			c = Bgetc(bdict);
    995 			if(c == '<') {
    996 				if (Bgetc(bdict) == 'h' && Bgetc(bdict) == 'w'
    997 					&& Bgetc(bdict) == '>')
    998 						n = 7;
    999 			}else if (c == '{')
   1000 				n = 4;
   1001 			if(n)
   1002 				break;
   1003 		}
   1004 	}
   1005 	return (Boffset(bdict)-n);
   1006 }
   1007 
   1008 static char *prkey1 =
   1009 "KEY TO THE PRONUNCIATION\n"
   1010 "\n"
   1011 "I. CONSONANTS\n"
   1012 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
   1013 "\n"
   1014 "g as in go (gəʊ)\n"
   1015 "h  ...  ho! (həʊ)\n"
   1016 "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
   1017 "(r)...  her (hɜː(r))\n"
   1018 "s  ...  see (siː), success (səkˈsɜs)\n"
   1019 "w  ...  wear (wɛə(r))\n"
   1020 "hw ...  when (hwɛn)\n"
   1021 "j  ...  yes (jɛs)\n"
   1022 "θ  ...  thin (θin), bath (bɑːθ)\n"
   1023 "ð  ...  then (ðɛn), bathe (beɪð)\n"
   1024 "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
   1025 "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
   1026 "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
   1027 ;
   1028 static char *prkey2 =
   1029 "dʒ ...  judge (dʒʌdʒ)\n"
   1030 "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
   1031 "ŋg ...  finger (ˈfiŋgə(r))\n"
   1032 "\n"
   1033 "Foreign\n"
   1034 "ʎ as in It. seraglio (serˈraʎo)\n"
   1035 "ɲ  ...  Fr. cognac (kɔɲak)\n"
   1036 "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
   1037 "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
   1038 "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
   1039 "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
   1040 "ɥ  ...  Fr. cuisine (kɥizin)\n"
   1041 "\n"
   1042 ;
   1043 static char *prkey3 =
   1044 "II. VOWELS AND DIPTHONGS\n"
   1045 "\n"
   1046 "Short\n"
   1047 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
   1048 "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
   1049 "æ  ...  pat (pæt)\n"
   1050 "ʌ  ...  putt (pʌt)\n"
   1051 "ɒ  ...  pot (pɒt)\n"
   1052 "ʊ  ...  put (pʊt)\n"
   1053 "ə  ...  another (əˈnʌðə(r))\n"
   1054 "(ə)...  beaten (ˈbiːt(ə)n)\n"
   1055 "i  ...  Fr. si (si)\n"
   1056 "e  ...  Fr. bébé (bebe)\n"
   1057 "a  ...  Fr. mari (mari)\n"
   1058 "ɑ  ...  Fr. bâtiment (bɑtimã)\n"
   1059 "ɔ  ...  Fr. homme (ɔm)\n"
   1060 "o  ...  Fr. eau (o)\n"
   1061 "ø  ...  Fr. peu (pø)\n"
   1062 ;
   1063 static char *prkey4 =
   1064 "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
   1065 "u  ...  Fr. douce (dus)\n"
   1066 "ʏ  ...  Ger. Müller (ˈmʏlər)\n"
   1067 "y  ...  Fr. du (dy)\n"
   1068 "\n"
   1069 "Long\n"
   1070 "iː as in bean (biːn)\n"
   1071 "ɑː ...  barn (bɑːn)\n"
   1072 "ɔː ...  born (bɔːn)\n"
   1073 "uː ...  boon (buːn)\n"
   1074 "ɜː ...  burn (bɜːn)\n"
   1075 "eː ...  Ger. Schnee (ʃneː)\n"
   1076 "ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
   1077 "aː ...  Ger. Tag (taːk)\n"
   1078 "oː ...  Ger. Sohn (zoːn)\n"
   1079 "øː ...  Ger. Goethe (gøːtə)\n"
   1080 "yː ...  Ger. grün (gryːn)\n"
   1081 "\n"
   1082 ;
   1083 static char *prkey5 =
   1084 "Nasal\n"
   1085 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
   1086 "ã  ...  Fr. franc (frã)\n"
   1087 "ɔ˜ ...  Fr. bon (bɔ˜n)\n"
   1088 "œ˜ ...  Fr. un (œ˜)\n"
   1089 "\n"
   1090 "Dipthongs, etc.\n"
   1091 "eɪ as in bay (beɪ)\n"
   1092 "aɪ ...  buy (baɪ)\n"
   1093 "ɔɪ ...  boy (bɔɪ)\n"
   1094 "əʊ ...  no (nəʊ)\n"
   1095 "aʊ ...  now (naʊ)\n"
   1096 "ɪə ...  peer (pɪə(r))\n"
   1097 "ɛə ...  pair (pɛə(r))\n"
   1098 "ʊə ...  tour (tʊə(r))\n"
   1099 "ɔə ...  boar (bɔə(r))\n"
   1100 "\n"
   1101 ;
   1102 static char *prkey6 =
   1103 "III. STRESS\n"
   1104 "\n"
   1105 "Main stress: ˈ preceding stressed syllable\n"
   1106 "Secondary stress: ˌ preceding stressed syllable\n"
   1107 "\n"
   1108 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
   1109 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
   1110 
   1111 void
   1112 pgwprintkey(void)
   1113 {
   1114 	Bprint(bout, "%s%s%s%s%s%s",
   1115 		prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
   1116 }
   1117 
   1118 /*
   1119  * f points just after a '&', fe points at end of entry.
   1120  * Accumulate the special name, starting after the &
   1121  * and continuing until the next ';', in spec[].
   1122  * Return pointer to char after ';'.
   1123  */
   1124 static char *
   1125 getspec(char *f, char *fe)
   1126 {
   1127 	char *t;
   1128 	int c, i;
   1129 
   1130 	t = spec;
   1131 	i = sizeof spec;
   1132 	while(--i > 0) {
   1133 		c = *f++;
   1134 		if(c == ';' || f == fe)
   1135 			break;
   1136 		*t++ = c;
   1137 	}
   1138 	*t = 0;
   1139 	return f;
   1140 }
   1141 
   1142 /*
   1143  * f points just after '<'; fe points at end of entry.
   1144  * Expect next characters from bin to match:
   1145  *  [/][^ >]+( [^>=]+=[^ >]+)*>
   1146  *      tag   auxname auxval
   1147  * Accumulate the tag and its auxilliary information in
   1148  * tag[], auxname[][] and auxval[][].
   1149  * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
   1150  * Set naux to the number of aux pairs found.
   1151  * Return pointer to after final '>'.
   1152  */
   1153 static char *
   1154 gettag(char *f, char *fe)
   1155 {
   1156 	char *t;
   1157 	int c, i;
   1158 
   1159 	t = tag;
   1160 	c = *f++;
   1161 	if(c == '/')
   1162 		tagstarts = 0;
   1163 	else {
   1164 		tagstarts = 1;
   1165 		*t++ = c;
   1166 	}
   1167 	i = Buflen;
   1168 	while(--i > 0) {
   1169 		c = *f++;
   1170 		if(c == '>' || f == fe)
   1171 			break;
   1172 		*t++ = c;
   1173 	}
   1174 	*t = 0;
   1175 	return f;
   1176 }