pgw.c (29812B)
1 /* thanks to Caerwyn Jones <caerwyn@comcast.net> for this module */ 2 #include <u.h> 3 #include <libc.h> 4 #include <bio.h> 5 #include "dict.h" 6 7 enum { 8 Buflen=1000, 9 Maxaux=5 10 }; 11 12 /* Possible tags */ 13 enum { 14 B, /* Bold */ 15 Blockquote, /* Block quote */ 16 Br, /* Break line */ 17 Cd, /* ? coloquial data */ 18 Col, /* ? Coloquial */ 19 Def, /* Definition */ 20 Hw, /* Head Word */ 21 I, /* Italics */ 22 P, /* Paragraph */ 23 Pos, /* Part of Speach */ 24 Sn, /* Sense */ 25 U, /* ? cross reference*/ 26 Wf, /* ? word form */ 27 Ntag /* end of tags */ 28 }; 29 30 /* Assoc tables must be sorted on first field */ 31 32 static Assoc tagtab[] = { 33 {"b", B}, 34 {"blockquote", Blockquote}, 35 {"BR", Br}, 36 {"cd", Cd}, 37 {"col", Col}, 38 {"def", Def}, 39 {"hw", Hw}, 40 {"i", I}, 41 {"p", P}, 42 {"pos", Pos}, 43 {"sn", Sn}, 44 {"u", U}, 45 {"wf", Wf} 46 }; 47 48 /* Possible tag auxilliary info */ 49 enum { 50 Cols, /* number of columns in a table */ 51 Num, /* letter or number, for a sense */ 52 St, /* status (e.g., obs) */ 53 Naux 54 }; 55 56 #if 0 57 static Assoc auxtab[] = { 58 {"cols", Cols}, 59 {"num", Num}, 60 {"st", St} 61 }; 62 #endif 63 64 static Assoc spectab[] = { 65 {"3on4", 0xbe}, 66 {"AElig", 0xc6}, 67 {"Aacute", 0xc1}, 68 {"Aang", 0xc5}, 69 {"Abarab", 0x100}, 70 {"Acirc", 0xc2}, 71 {"Agrave", 0xc0}, 72 {"Alpha", 0x391}, 73 {"Amacr", 0x100}, 74 {"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */ 75 {"Auml", 0xc4}, 76 {"Beta", 0x392}, 77 {"Cced", 0xc7}, 78 {"Chacek", 0x10c}, 79 {"Chi", 0x3a7}, 80 {"Chirho", 0x2627}, /* Chi Rho U+2627 */ 81 {"Csigma", 0x3da}, 82 {"Delta", 0x394}, 83 {"Eacute", 0xc9}, 84 {"Ecirc", 0xca}, 85 {"Edh", 0xd0}, 86 {"Epsilon", 0x395}, 87 {"Eta", 0x397}, 88 {"Gamma", 0x393}, 89 {"Iacute", 0xcd}, 90 {"Icirc", 0xce}, 91 {"Imacr", 0x12a}, 92 {"Integ", 0x222b}, 93 {"Iota", 0x399}, 94 {"Kappa", 0x39a}, 95 {"Koppa", 0x3de}, 96 {"Lambda", 0x39b}, 97 {"Lbar", 0x141}, 98 {"Mu", 0x39c}, 99 {"Naira", 0x4e}, /* should have bar through */ 100 {"Nplus", 0x4e}, /* should have plus above */ 101 {"Ntilde", 0xd1}, 102 {"Nu", 0x39d}, 103 {"Oacute", 0xd3}, 104 {"Obar", 0xd8}, 105 {"Ocirc", 0xd4}, 106 {"Oe", 0x152}, 107 {"Omega", 0x3a9}, 108 {"Omicron", 0x39f}, 109 {"Ouml", 0xd6}, 110 {"Phi", 0x3a6}, 111 {"Pi", 0x3a0}, 112 {"Psi", 0x3a8}, 113 {"Rho", 0x3a1}, 114 {"Sacute", 0x15a}, 115 {"Sigma", 0x3a3}, 116 {"Summ", 0x2211}, 117 {"Tau", 0x3a4}, 118 {"Th", 0xde}, 119 {"Theta", 0x398}, 120 {"Tse", 0x426}, 121 {"Uacute", 0xda}, 122 {"Ucirc", 0xdb}, 123 {"Upsilon", 0x3a5}, 124 {"Uuml", 0xdc}, 125 {"Wyn", 0x1bf}, /* wynn U+01BF */ 126 {"Xi", 0x39e}, 127 {"Ygh", 0x1b7}, /* Yogh U+01B7 */ 128 {"Zeta", 0x396}, 129 {"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */ 130 {"a", 0x61}, /* ante */ 131 {"aacute", 0xe1}, 132 {"aang", 0xe5}, 133 {"aasper", MAAS}, 134 {"abreve", 0x103}, 135 {"acirc", 0xe2}, 136 {"acute", LACU}, 137 {"aelig", 0xe6}, 138 {"agrave", 0xe0}, 139 {"ahook", 0x105}, 140 {"alenis", MALN}, 141 {"alpha", 0x3b1}, 142 {"amacr", 0x101}, 143 {"amp", 0x26}, 144 {"and", MAND}, 145 {"ang", LRNG}, 146 {"angle", 0x2220}, 147 {"ankh", 0x2625}, /* ankh U+2625 */ 148 {"ante", 0x61}, /* before (year) */ 149 {"aonq", MAOQ}, 150 {"appreq", 0x2243}, 151 {"aquar", 0x2652}, 152 {"arDadfull", 0x636}, /* Dad U+0636 */ 153 {"arHa", 0x62d}, /* haa U+062D */ 154 {"arTa", 0x62a}, /* taa U+062A */ 155 {"arain", 0x639}, /* ain U+0639 */ 156 {"arainfull", 0x639}, /* ain U+0639 */ 157 {"aralif", 0x627}, /* alef U+0627 */ 158 {"arba", 0x628}, /* baa U+0628 */ 159 {"arha", 0x647}, /* ha U+0647 */ 160 {"aries", 0x2648}, 161 {"arnun", 0x646}, /* noon U+0646 */ 162 {"arnunfull", 0x646}, /* noon U+0646 */ 163 {"arpa", 0x647}, /* ha U+0647 */ 164 {"arqoph", 0x642}, /* qaf U+0642 */ 165 {"arshinfull", 0x634}, /* sheen U+0634 */ 166 {"arta", 0x62a}, /* taa U+062A */ 167 {"artafull", 0x62a}, /* taa U+062A */ 168 {"artha", 0x62b}, /* thaa U+062B */ 169 {"arwaw", 0x648}, /* waw U+0648 */ 170 {"arya", 0x64a}, /* ya U+064A */ 171 {"aryafull", 0x64a}, /* ya U+064A */ 172 {"arzero", 0x660}, /* indic zero U+0660 */ 173 {"asg", 0x292}, /* unicycle character. Cf "hallow" */ 174 {"asper", LASP}, 175 {"assert", 0x22a2}, 176 {"astm", 0x2042}, /* asterism: should be upside down */ 177 {"at", 0x40}, 178 {"atilde", 0xe3}, 179 {"auml", 0xe4}, 180 {"ayin", 0x639}, /* arabic ain U+0639 */ 181 {"b1", 0x2d}, /* single bond */ 182 {"b2", 0x3d}, /* double bond */ 183 {"b3", 0x2261}, /* triple bond */ 184 {"bbar", 0x180}, /* b with bar U+0180 */ 185 {"beta", 0x3b2}, 186 {"bigobl", 0x2f}, 187 {"blC", 0x43}, /* should be black letter */ 188 {"blJ", 0x4a}, /* should be black letter */ 189 {"blU", 0x55}, /* should be black letter */ 190 {"blb", 0x62}, /* should be black letter */ 191 {"blozenge", 0x25ca}, /* U+25CA; should be black */ 192 {"bly", 0x79}, /* should be black letter */ 193 {"bra", MBRA}, 194 {"brbl", LBRB}, 195 {"breve", LBRV}, 196 {"bslash",'\\'}, 197 {"bsquare", 0x25a0}, /* black square U+25A0 */ 198 {"btril", 0x25c0}, /* U+25C0 */ 199 {"btrir", 0x25b6}, /* U+25B6 */ 200 {"c", 0x63}, /* circa */ 201 {"cab", 0x232a}, 202 {"cacute", 0x107}, 203 {"canc", 0x264b}, 204 {"capr", 0x2651}, 205 {"caret", 0x5e}, 206 {"cb", 0x7d}, 207 {"cbigb", 0x7d}, 208 {"cbigpren", 0x29}, 209 {"cbigsb", 0x5d}, 210 {"cced", 0xe7}, 211 {"cdil", LCED}, 212 {"cdsb", 0x301b}, /* ]] U+301b */ 213 {"cent", 0xa2}, 214 {"chacek", 0x10d}, 215 {"chi", 0x3c7}, 216 {"circ", LRNG}, 217 {"circa", 0x63}, /* about (year) */ 218 {"circbl", 0x325}, /* ring below accent U+0325 */ 219 {"circle", 0x25cb}, /* U+25CB */ 220 {"circledot", 0x2299}, 221 {"click", 0x296}, 222 {"club", 0x2663}, 223 {"comtime", 0x43}, 224 {"conj", 0x260c}, 225 {"cprt", 0xa9}, 226 {"cq", '\''}, 227 {"cqq", 0x201d}, 228 {"cross", 0x2720}, /* maltese cross U+2720 */ 229 {"crotchet", 0x2669}, 230 {"csb", 0x5d}, 231 {"ctilde", 0x63}, /* +tilde */ 232 {"ctlig", MLCT}, 233 {"cyra", 0x430}, 234 {"cyre", 0x435}, 235 {"cyrhard", 0x44a}, 236 {"cyrjat", 0x463}, 237 {"cyrm", 0x43c}, 238 {"cyrn", 0x43d}, 239 {"cyrr", 0x440}, 240 {"cyrsoft", 0x44c}, 241 {"cyrt", 0x442}, 242 {"cyry", 0x44b}, 243 {"dag", 0x2020}, 244 {"dbar", 0x111}, 245 {"dblar", 0x21cb}, 246 {"dblgt", 0x226b}, 247 {"dbllt", 0x226a}, 248 {"dced", 0x64}, /* +cedilla */ 249 {"dd", MDD}, 250 {"ddag", 0x2021}, 251 {"ddd", MDDD}, 252 {"decr", 0x2193}, 253 {"deg", 0xb0}, 254 {"dele", 0x64}, /* should be dele */ 255 {"delta", 0x3b4}, 256 {"descnode", 0x260b}, /* descending node U+260B */ 257 {"diamond", 0x2662}, 258 {"digamma", 0x3dd}, 259 {"div", 0xf7}, 260 {"dlessi", 0x131}, 261 {"dlessj1", 0x6a}, /* should be dotless */ 262 {"dlessj2", 0x6a}, /* should be dotless */ 263 {"dlessj3", 0x6a}, /* should be dotless */ 264 {"dollar", 0x24}, 265 {"dotab", LDOT}, 266 {"dotbl", LDTB}, 267 {"drachm", 0x292}, 268 {"dubh", 0x2d}, 269 {"eacute", 0xe9}, 270 {"earth", 0x2641}, 271 {"easper", MEAS}, 272 {"ebreve", 0x115}, 273 {"ecirc", 0xea}, 274 {"edh", 0xf0}, 275 {"egrave", 0xe8}, 276 {"ehacek", 0x11b}, 277 {"ehook", 0x119}, 278 {"elem", 0x220a}, 279 {"elenis", MELN}, 280 {"em", 0x2014}, 281 {"emacr", 0x113}, 282 {"emem", MEMM}, 283 {"en", 0x2013}, 284 {"epsilon", 0x3b5}, 285 {"equil", 0x21cb}, 286 {"ergo", 0x2234}, 287 {"es", MES}, 288 {"eszett", 0xdf}, 289 {"eta", 0x3b7}, 290 {"eth", 0xf0}, 291 {"euml", 0xeb}, 292 {"expon", 0x2191}, 293 {"fact", 0x21}, 294 {"fata", 0x251}, 295 {"fatpara", 0xb6}, /* should have fatter, filled in bowl */ 296 {"female", 0x2640}, 297 {"ffilig", MLFFI}, 298 {"fflig", MLFF}, 299 {"ffllig", MLFFL}, 300 {"filig", MLFI}, 301 {"flat", 0x266d}, 302 {"fllig", MLFL}, 303 {"frE", 0x45}, /* should be curly */ 304 {"frL", 'L'}, /* should be curly */ 305 {"frR", 0x52}, /* should be curly */ 306 {"frakB", 0x42}, /* should have fraktur style */ 307 {"frakG", 0x47}, 308 {"frakH", 0x48}, 309 {"frakI", 0x49}, 310 {"frakM", 0x4d}, 311 {"frakU", 0x55}, 312 {"frakX", 0x58}, 313 {"frakY", 0x59}, 314 {"frakh", 0x68}, 315 {"frbl", LFRB}, 316 {"frown", LFRN}, 317 {"fs", 0x20}, 318 {"fsigma", 0x3c2}, 319 {"gAacute", 0xc1}, /* should be Α+acute */ 320 {"gaacute", 0x3b1}, /* +acute */ 321 {"gabreve", 0x3b1}, /* +breve */ 322 {"gafrown", 0x3b1}, /* +frown */ 323 {"gagrave", 0x3b1}, /* +grave */ 324 {"gamacr", 0x3b1}, /* +macron */ 325 {"gamma", 0x3b3}, 326 {"gauml", 0x3b1}, /* +umlaut */ 327 {"ge", 0x2267}, 328 {"geacute", 0x3b5}, /* +acute */ 329 {"gegrave", 0x3b5}, /* +grave */ 330 {"ghacute", 0x3b7}, /* +acute */ 331 {"ghfrown", 0x3b7}, /* +frown */ 332 {"ghgrave", 0x3b7}, /* +grave */ 333 {"ghmacr", 0x3b7}, /* +macron */ 334 {"giacute", 0x3b9}, /* +acute */ 335 {"gibreve", 0x3b9}, /* +breve */ 336 {"gifrown", 0x3b9}, /* +frown */ 337 {"gigrave", 0x3b9}, /* +grave */ 338 {"gimacr", 0x3b9}, /* +macron */ 339 {"giuml", 0x3b9}, /* +umlaut */ 340 {"glagjat", 0x467}, 341 {"glots", 0x2c0}, 342 {"goacute", 0x3bf}, /* +acute */ 343 {"gobreve", 0x3bf}, /* +breve */ 344 {"grave", LGRV}, 345 {"gt", 0x3e}, 346 {"guacute", 0x3c5}, /* +acute */ 347 {"gufrown", 0x3c5}, /* +frown */ 348 {"gugrave", 0x3c5}, /* +grave */ 349 {"gumacr", 0x3c5}, /* +macron */ 350 {"guuml", 0x3c5}, /* +umlaut */ 351 {"gwacute", 0x3c9}, /* +acute */ 352 {"gwfrown", 0x3c9}, /* +frown */ 353 {"gwgrave", 0x3c9}, /* +grave */ 354 {"hacek", LHCK}, 355 {"halft", 0x2308}, 356 {"hash", 0x23}, 357 {"hasper", MHAS}, 358 {"hatpath", 0x5b2}, /* hataf patah U+05B2 */ 359 {"hatqam", 0x5b3}, /* hataf qamats U+05B3 */ 360 {"hatseg", 0x5b1}, /* hataf segol U+05B1 */ 361 {"hbar", 0x127}, 362 {"heart", 0x2661}, 363 {"hebaleph", 0x5d0}, /* aleph U+05D0 */ 364 {"hebayin", 0x5e2}, /* ayin U+05E2 */ 365 {"hebbet", 0x5d1}, /* bet U+05D1 */ 366 {"hebbeth", 0x5d1}, /* bet U+05D1 */ 367 {"hebcheth", 0x5d7}, /* bet U+05D7 */ 368 {"hebdaleth", 0x5d3}, /* dalet U+05D3 */ 369 {"hebgimel", 0x5d2}, /* gimel U+05D2 */ 370 {"hebhe", 0x5d4}, /* he U+05D4 */ 371 {"hebkaph", 0x5db}, /* kaf U+05DB */ 372 {"heblamed", 0x5dc}, /* lamed U+05DC */ 373 {"hebmem", 0x5de}, /* mem U+05DE */ 374 {"hebnun", 0x5e0}, /* nun U+05E0 */ 375 {"hebnunfin", 0x5df}, /* final nun U+05DF */ 376 {"hebpe", 0x5e4}, /* pe U+05E4 */ 377 {"hebpedag", 0x5e3}, /* final pe? U+05E3 */ 378 {"hebqoph", 0x5e7}, /* qof U+05E7 */ 379 {"hebresh", 0x5e8}, /* resh U+05E8 */ 380 {"hebshin", 0x5e9}, /* shin U+05E9 */ 381 {"hebtav", 0x5ea}, /* tav U+05EA */ 382 {"hebtsade", 0x5e6}, /* tsadi U+05E6 */ 383 {"hebwaw", 0x5d5}, /* vav? U+05D5 */ 384 {"hebyod", 0x5d9}, /* yod U+05D9 */ 385 {"hebzayin", 0x5d6}, /* zayin U+05D6 */ 386 {"hgz", 0x292}, /* ??? Cf "alet" */ 387 {"hireq", 0x5b4}, /* U+05B4 */ 388 {"hlenis", MHLN}, 389 {"hook", LOGO}, 390 {"horizE", 0x45}, /* should be on side */ 391 {"horizP", 0x50}, /* should be on side */ 392 {"horizS", 0x223d}, 393 {"horizT", 0x22a3}, 394 {"horizb", 0x7b}, /* should be underbrace */ 395 {"ia", 0x3b1}, 396 {"iacute", 0xed}, 397 {"iasper", MIAS}, 398 {"ib", 0x3b2}, 399 {"ibar", 0x268}, 400 {"ibreve", 0x12d}, 401 {"icirc", 0xee}, 402 {"id", 0x3b4}, 403 {"ident", 0x2261}, 404 {"ie", 0x3b5}, 405 {"ifilig", MLFI}, 406 {"ifflig", MLFF}, 407 {"ig", 0x3b3}, 408 {"igrave", 0xec}, 409 {"ih", 0x3b7}, 410 {"ii", 0x3b9}, 411 {"ik", 0x3ba}, 412 {"ilenis", MILN}, 413 {"imacr", 0x12b}, 414 {"implies", 0x21d2}, 415 {"index", 0x261e}, 416 {"infin", 0x221e}, 417 {"integ", 0x222b}, 418 {"intsec", 0x2229}, 419 {"invpri", 0x2cf}, 420 {"iota", 0x3b9}, 421 {"iq", 0x3c8}, 422 {"istlig", MLST}, 423 {"isub", 0x3f5}, /* iota below accent */ 424 {"iuml", 0xef}, 425 {"iz", 0x3b6}, 426 {"jup", 0x2643}, 427 {"kappa", 0x3ba}, 428 {"koppa", 0x3df}, 429 {"lambda", 0x3bb}, 430 {"lar", 0x2190}, 431 {"lbar", 0x142}, 432 {"le", 0x2266}, 433 {"lenis", LLEN}, 434 {"leo", 0x264c}, 435 {"lhalfbr", 0x2308}, 436 {"lhshoe", 0x2283}, 437 {"libra", 0x264e}, 438 {"llswing", MLLS}, 439 {"lm", 0x2d0}, 440 {"logicand", 0x2227}, 441 {"logicor", 0x2228}, 442 {"longs", 0x283}, 443 {"lrar", 0x2194}, 444 {"lt", 0x3c}, 445 {"ltappr", 0x227e}, 446 {"ltflat", 0x2220}, 447 {"lumlbl", 0x6c}, /* +umlaut below */ 448 {"mac", LMAC}, 449 {"male", 0x2642}, 450 {"mc", 0x63}, /* should be raised */ 451 {"merc", 0x263f}, /* mercury U+263F */ 452 {"min", 0x2212}, 453 {"moonfq", 0x263d}, /* first quarter moon U+263D */ 454 {"moonlq", 0x263e}, /* last quarter moon U+263E */ 455 {"msylab", 0x6d}, /* +sylab (ˌ) */ 456 {"mu", 0x3bc}, 457 {"nacute", 0x144}, 458 {"natural", 0x266e}, 459 {"neq", 0x2260}, 460 {"nfacute", 0x2032}, 461 {"nfasper", 0x2bd}, 462 {"nfbreve", 0x2d8}, 463 {"nfced", 0xb8}, 464 {"nfcirc", 0x2c6}, 465 {"nffrown", 0x2322}, 466 {"nfgra", 0x2cb}, 467 {"nfhacek", 0x2c7}, 468 {"nfmac", 0xaf}, 469 {"nftilde", 0x2dc}, 470 {"nfuml", 0xa8}, 471 {"ng", 0x14b}, 472 {"not", 0xac}, 473 {"notelem", 0x2209}, 474 {"ntilde", 0xf1}, 475 {"nu", 0x3bd}, 476 {"oab", 0x2329}, 477 {"oacute", 0xf3}, 478 {"oasper", MOAS}, 479 {"ob", 0x7b}, 480 {"obar", 0xf8}, 481 {"obigb", 0x7b}, /* should be big */ 482 {"obigpren", 0x28}, 483 {"obigsb", 0x5b}, /* should be big */ 484 {"obreve", 0x14f}, 485 {"ocirc", 0xf4}, 486 {"odsb", 0x301a}, /* [[ U+301A */ 487 {"oelig", 0x153}, 488 {"oeamp", 0x26}, 489 {"ograve", 0xf2}, 490 {"ohook", 0x6f}, /* +hook */ 491 {"olenis", MOLN}, 492 {"omacr", 0x14d}, 493 {"omega", 0x3c9}, 494 {"omicron", 0x3bf}, 495 {"ope", 0x25b}, 496 {"opp", 0x260d}, 497 {"oq", 0x60}, 498 {"oqq", 0x201c}, 499 {"or", MOR}, 500 {"osb", 0x5b}, 501 {"otilde", 0xf5}, 502 {"ouml", 0xf6}, 503 {"ounce", 0x2125}, /* ounce U+2125 */ 504 {"ovparen", 0x2322}, /* should be sideways ( */ 505 {"p", 0x2032}, 506 {"pa", 0x2202}, 507 {"page", 0x50}, 508 {"pall", 0x28e}, 509 {"paln", 0x272}, 510 {"par", PAR}, 511 {"para", 0xb6}, 512 {"pbar", 0x70}, /* +bar */ 513 {"per", 0x2118}, /* per U+2118 */ 514 {"phi", 0x3c6}, 515 {"phi2", 0x3d5}, 516 {"pi", 0x3c0}, 517 {"pisces", 0x2653}, 518 {"planck", 0x127}, 519 {"plantinJ", 0x4a}, /* should be script */ 520 {"pm", 0xb1}, 521 {"pmil", 0x2030}, 522 {"pp", 0x2033}, 523 {"ppp", 0x2034}, 524 {"prop", 0x221d}, 525 {"psi", 0x3c8}, 526 {"pstlg", 0xa3}, 527 {"q", 0x3f}, /* should be raised */ 528 {"qamets", 0x5b3}, /* U+05B3 */ 529 {"quaver", 0x266a}, 530 {"rar", 0x2192}, 531 {"rasper", MRAS}, 532 {"rdot", 0xb7}, 533 {"recipe", 0x211e}, /* U+211E */ 534 {"reg", 0xae}, 535 {"revC", 0x186}, /* open O U+0186 */ 536 {"reva", 0x252}, 537 {"revc", 0x254}, 538 {"revope", 0x25c}, 539 {"revr", 0x279}, 540 {"revsc", 0x2d2}, /* upside-down semicolon */ 541 {"revv", 0x28c}, 542 {"rfa", 0x6f}, /* +hook (Cf "goal") */ 543 {"rhacek", 0x159}, 544 {"rhalfbr", 0x2309}, 545 {"rho", 0x3c1}, 546 {"rhshoe", 0x2282}, 547 {"rlenis", MRLN}, 548 {"rsylab", 0x72}, /* +sylab */ 549 {"runash", 0x46}, /* should be runic 'ash' */ 550 {"rvow", 0x2d4}, 551 {"sacute", 0x15b}, 552 {"sagit", 0x2650}, 553 {"sampi", 0x3e1}, 554 {"saturn", 0x2644}, 555 {"sced", 0x15f}, 556 {"schwa", 0x259}, 557 {"scorpio", 0x264f}, 558 {"scrA", 0x41}, /* should be script */ 559 {"scrC", 0x43}, 560 {"scrE", 0x45}, 561 {"scrF", 0x46}, 562 {"scrI", 0x49}, 563 {"scrJ", 0x4a}, 564 {"scrL",'L'}, 565 {"scrO", 0x4f}, 566 {"scrP", 0x50}, 567 {"scrQ", 0x51}, 568 {"scrS", 0x53}, 569 {"scrT", 0x54}, 570 {"scrb", 0x62}, 571 {"scrd", 0x64}, 572 {"scrh", 0x68}, 573 {"scrl", 0x6c}, 574 {"scruple", 0x2108}, /* U+2108 */ 575 {"sdd", 0x2d0}, 576 {"sect", 0xa7}, 577 {"semE", 0x2203}, 578 {"sh", 0x283}, 579 {"shacek", 0x161}, 580 {"sharp", 0x266f}, 581 {"sheva", 0x5b0}, /* U+05B0 */ 582 {"shti", 0x26a}, 583 {"shtsyll", 0x222a}, 584 {"shtu", 0x28a}, 585 {"sidetri", 0x22b2}, 586 {"sigma", 0x3c3}, 587 {"since", 0x2235}, 588 {"slge", 0x2265}, /* should have slanted line under */ 589 {"slle", 0x2264}, /* should have slanted line under */ 590 {"sm", 0x2c8}, 591 {"smm", 0x2cc}, 592 {"spade", 0x2660}, 593 {"sqrt", 0x221a}, 594 {"square", 0x25a1}, /* U+25A1 */ 595 {"ssChi", 0x3a7}, /* should be sans serif */ 596 {"ssIota", 0x399}, 597 {"ssOmicron", 0x39f}, 598 {"ssPi", 0x3a0}, 599 {"ssRho", 0x3a1}, 600 {"ssSigma", 0x3a3}, 601 {"ssTau", 0x3a4}, 602 {"star", 0x2a}, 603 {"stlig", MLST}, 604 {"sup2", 0x2072}, 605 {"supgt", 0x2c3}, 606 {"suplt", 0x2c2}, 607 {"sur", 0x2b3}, 608 {"swing", 0x223c}, 609 {"tau", 0x3c4}, 610 {"taur", 0x2649}, 611 {"th", 0xfe}, 612 {"thbar", 0xfe}, /* +bar */ 613 {"theta", 0x3b8}, 614 {"thinqm", 0x3f}, /* should be thinner */ 615 {"tilde", LTIL}, 616 {"times", 0xd7}, 617 {"tri", 0x2206}, 618 {"trli", 0x2016}, 619 {"ts", 0x2009}, 620 {"uacute", 0xfa}, 621 {"uasper", MUAS}, 622 {"ubar", 0x75}, /* +bar */ 623 {"ubreve", 0x16d}, 624 {"ucirc", 0xfb}, 625 {"udA", 0x2200}, 626 {"udT", 0x22a5}, 627 {"uda", 0x250}, 628 {"udh", 0x265}, 629 {"udqm", 0xbf}, 630 {"udpsi", 0x22d4}, 631 {"udtr", 0x2207}, 632 {"ugrave", 0xf9}, 633 {"ulenis", MULN}, 634 {"umacr", 0x16b}, 635 {"uml", LUML}, 636 {"undl", 0x2cd}, /* underline accent */ 637 {"union", 0x222a}, 638 {"upsilon", 0x3c5}, 639 {"uuml", 0xfc}, 640 {"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */ 641 {"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */ 642 {"vb", 0x7c}, 643 {"vddd", 0x22ee}, 644 {"versicle2", 0x2123}, /* U+2123 */ 645 {"vinc", 0xaf}, 646 {"virgo", 0x264d}, 647 {"vpal", 0x25f}, 648 {"vvf", 0x263}, 649 {"wasper", MWAS}, 650 {"wavyeq", 0x2248}, 651 {"wlenis", MWLN}, 652 {"wyn", 0x1bf}, /* wynn U+01BF */ 653 {"xi", 0x3be}, 654 {"yacute", 0xfd}, 655 {"ycirc", 0x177}, 656 {"ygh", 0x292}, 657 {"ymacr", 0x79}, /* +macron */ 658 {"yuml", 0xff}, 659 {"zced", 0x7a}, /* +cedilla */ 660 {"zeta", 0x3b6}, 661 {"zh", 0x292}, 662 {"zhacek", 0x17e} 663 }; 664 /* 665 The following special characters don't have close enough 666 equivalents in Unicode, so aren't in the above table. 667 22n 2^(2^n) Cf Fermat 668 2on4 2/4 669 3on8 3/8 670 Bantuo Bantu O. Cf Otshi-herero 671 Car C with circular arrow on top 672 albrtime cut-time: C with vertical line 673 ardal Cf dental 674 bantuo Bantu o. Cf Otshi-herero 675 bbc1 single chem bond below 676 bbc2 double chem bond below 677 bbl1 chem bond like / 678 bbl2 chem bond like // 679 bbr1 chem bond like \ 680 bbr2 chem bond \\ 681 bcop1 copper symbol. Cf copper 682 bcop2 copper symbol. Cf copper 683 benchm Cf benchmark 684 btc1 single chem bond above 685 btc2 double chem bond above 686 btl1 chem bond like \ 687 btl2 chem bond like \\ 688 btr1 chem bond like / 689 btr2 chem bond line // 690 burman Cf Burman 691 devph sanskrit letter. Cf ph 692 devrfls sanskrit letter. Cf cerebral 693 duplong[12] musical note 694 egchi early form of chi 695 eggamma[12] early form of gamma 696 egiota early form of iota 697 egkappa early form of kappa 698 eglambda early form of lambda 699 egmu[12] early form of mu 700 egnu[12] early form of nu 701 egpi[123] early form of pi 702 egrho[12] early form of rho 703 egsampi early form of sampi 704 egsan early form of san 705 egsigma[12] early form of sigma 706 egxi[123] early form of xi 707 elatS early form of S 708 elatc[12] early form of C 709 elatg[12] early form of G 710 glagjeri Slavonic Glagolitic jeri 711 glagjeru Slavonic Glagolitic jeru 712 hypolem hypolemisk (line with underdot) 713 lhrbr lower half } 714 longmord long mordent 715 mbwvow backwards scretched C. Cf retract. 716 mord music symbol. Cf mordent 717 mostra Cf direct 718 ohgcirc old form of circumflex 719 oldbeta old form of β. Cf perturbate 720 oldsemibr[12] old forms of semibreve. Cf prolation 721 ormg old form of g. Cf G 722 para[12345] form of ¶ 723 pauseo musical pause sign 724 pauseu musical pause sign 725 pharyng Cf pharyngal 726 ragr Black letter ragged r 727 repetn musical repeat. Cf retort 728 segno musical segno sign 729 semain[12] semitic ain 730 semhe semitic he 731 semheth semitic heth 732 semkaph semitic kaph 733 semlamed[12] semitic lamed 734 semmem semitic mem 735 semnum semitic nun 736 sempe semitic pe 737 semqoph[123] semitic qoph 738 semresh semitic resh 739 semtav[1234] semitic tav 740 semyod semitic yod 741 semzayin[123] semitic zayin 742 shtlong[12] U with underbar. Cf glyconic 743 sigmatau σ,τ combination 744 squaver sixteenth note 745 sqbreve square musical breve note 746 swast swastika 747 uhrbr upper half of big } 748 versicle1 Cf versicle 749 */ 750 751 752 static Rune normtab[128] = { 753 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ 754 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 755 NONE, NONE, ' ', NONE, NONE, NONE, NONE, NONE, 756 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 757 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 758 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', 759 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 760 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 761 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f, 762 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 763 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f, 764 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 765 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f, 766 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 767 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 768 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 769 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE 770 }; 771 #if 0 772 static Rune phtab[128] = { 773 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ 774 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 775 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 776 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 777 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 778 /*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'', 779 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 780 /*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37, 781 0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f, 782 /*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47, 783 0x48, 0x26a, 0x4a, 0x4b,'L', 0x4d, 0x14b, 0x254, 784 /*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57, 785 0x58, 0x59, 0x292, 0x5b,'\\', 0x5d, 0x5e, 0x5f, 786 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 787 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 788 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 789 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE 790 }; 791 static Rune grtab[128] = { 792 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ 793 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 794 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 795 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 796 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 797 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', 798 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 799 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 800 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f, 801 /*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393, 802 0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f, 803 /*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9, 804 0x3a7, 0x3a8, 0x396, 0x5b,'\\', 0x5d, 0x5e, 0x5f, 805 /*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3, 806 0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf, 807 /*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9, 808 0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE 809 }; 810 static Rune subtab[128] = { 811 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ 812 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 813 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 814 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 815 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 816 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', 817 0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f, 818 /*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087, 819 0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f, 820 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 821 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f, 822 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 823 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f, 824 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 825 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 826 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 827 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE 828 }; 829 static Rune suptab[128] = { 830 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ 831 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 832 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 833 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 834 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 835 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', 836 0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f, 837 /*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077, 838 0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f, 839 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 840 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f, 841 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 842 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f, 843 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 844 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 845 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 846 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE 847 }; 848 #endif 849 850 static int tagstarts; 851 static char tag[Buflen]; 852 static char spec[Buflen]; 853 static Entry curentry; 854 #define cursize (curentry.end-curentry.start) 855 856 static char *getspec(char *, char *); 857 static char *gettag(char *, char *); 858 859 /* 860 * cmd is one of: 861 * 'p': normal print 862 * 'h': just print headwords 863 * 'P': print raw 864 */ 865 void 866 pgwprintentry(Entry e, int cmd) 867 { 868 char *p, *pe; 869 int t; 870 long r, rprev, rlig; 871 Rune *transtab; 872 873 p = e.start; 874 pe = e.end; 875 transtab = normtab; 876 rprev = NONE; 877 changett(0, 0, 0); 878 curentry = e; 879 if(cmd == 'h') 880 outinhibit = 1; 881 while(p < pe) { 882 if(cmd == 'r') { 883 outchar(*p++); 884 continue; 885 } 886 r = transtab[(*p++)&0x7F]; 887 if(r < NONE) { 888 /* Emit the rune, but buffer in case of ligature */ 889 if(rprev != NONE) 890 outrune(rprev); 891 rprev = r; 892 } else if(r == SPCS) { 893 /* Start of special character name */ 894 p = getspec(p, pe); 895 r = lookassoc(spectab, asize(spectab), spec); 896 if(r == -1) { 897 if(debug) 898 err("spec %ld %d %s", 899 e.doff, cursize, spec); 900 r = 0xfffd; 901 } 902 if(r >= LIGS && r < LIGE) { 903 /* handle possible ligature */ 904 rlig = liglookup(r, rprev); 905 if(rlig != NONE) 906 rprev = rlig; /* overwrite rprev */ 907 else { 908 /* could print accent, but let's not */ 909 if(rprev != NONE) outrune(rprev); 910 rprev = NONE; 911 } 912 } else if(r >= MULTI && r < MULTIE) { 913 if(rprev != NONE) { 914 outrune(rprev); 915 rprev = NONE; 916 } 917 outrunes(multitab[r-MULTI]); 918 } else if(r == PAR) { 919 if(rprev != NONE) { 920 outrune(rprev); 921 rprev = NONE; 922 } 923 outnl(1); 924 } else { 925 if(rprev != NONE) outrune(rprev); 926 rprev = r; 927 } 928 } else if(r == TAGS) { 929 /* Start of tag name */ 930 if(rprev != NONE) { 931 outrune(rprev); 932 rprev = NONE; 933 } 934 p = gettag(p, pe); 935 t = lookassoc(tagtab, asize(tagtab), tag); 936 if(t == -1) { 937 if(debug) 938 err("tag %ld %d %s", 939 e.doff, cursize, tag); 940 continue; 941 } 942 switch(t){ 943 case Hw: 944 if(cmd == 'h') { 945 if(!tagstarts) 946 outchar(' '); 947 outinhibit = !tagstarts; 948 } 949 break; 950 case Sn: 951 if(tagstarts) { 952 outnl(2); 953 } 954 break; 955 case P: 956 outnl(tagstarts); 957 break; 958 case Col: 959 case Br: 960 case Blockquote: 961 if(tagstarts) 962 outnl(1); 963 break; 964 case U: 965 outchar('/'); 966 } 967 } 968 } 969 if(cmd == 'h') { 970 outinhibit = 0; 971 outnl(0); 972 } 973 } 974 975 /* 976 * Return offset into bdict where next webster entry after fromoff starts. 977 * Webster entries start with <p><hw> 978 */ 979 long 980 pgwnextoff(long fromoff) 981 { 982 long a, n; 983 int c; 984 985 a = Bseek(bdict, fromoff, 0); 986 if(a != fromoff) 987 return -1; 988 n = 0; 989 for(;;) { 990 c = Bgetc(bdict); 991 if(c < 0) 992 break; 993 if(c == '<' && Bgetc(bdict) == 'p' && Bgetc(bdict) == '>') { 994 c = Bgetc(bdict); 995 if(c == '<') { 996 if (Bgetc(bdict) == 'h' && Bgetc(bdict) == 'w' 997 && Bgetc(bdict) == '>') 998 n = 7; 999 }else if (c == '{') 1000 n = 4; 1001 if(n) 1002 break; 1003 } 1004 } 1005 return (Boffset(bdict)-n); 1006 } 1007 1008 static char *prkey1 = 1009 "KEY TO THE PRONUNCIATION\n" 1010 "\n" 1011 "I. CONSONANTS\n" 1012 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n" 1013 "\n" 1014 "g as in go (gəʊ)\n" 1015 "h ... ho! (həʊ)\n" 1016 "r ... run (rʌn), terrier (ˈtɛriə(r))\n" 1017 "(r)... her (hɜː(r))\n" 1018 "s ... see (siː), success (səkˈsɜs)\n" 1019 "w ... wear (wɛə(r))\n" 1020 "hw ... when (hwɛn)\n" 1021 "j ... yes (jɛs)\n" 1022 "θ ... thin (θin), bath (bɑːθ)\n" 1023 "ð ... then (ðɛn), bathe (beɪð)\n" 1024 "ʃ ... shop (ʃɒp), dish (dɪʃ)\n" 1025 "tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n" 1026 "ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n" 1027 ; 1028 static char *prkey2 = 1029 "dʒ ... judge (dʒʌdʒ)\n" 1030 "ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n" 1031 "ŋg ... finger (ˈfiŋgə(r))\n" 1032 "\n" 1033 "Foreign\n" 1034 "ʎ as in It. seraglio (serˈraʎo)\n" 1035 "ɲ ... Fr. cognac (kɔɲak)\n" 1036 "x ... Ger. ach (ax), Sc. loch (lɒx)\n" 1037 "ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n" 1038 "ɣ ... North Ger. sagen (ˈzaːɣən)\n" 1039 "c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n" 1040 "ɥ ... Fr. cuisine (kɥizin)\n" 1041 "\n" 1042 ; 1043 static char *prkey3 = 1044 "II. VOWELS AND DIPTHONGS\n" 1045 "\n" 1046 "Short\n" 1047 "ɪ as in pit (pɪt), -ness (-nɪs)\n" 1048 "ɛ ... pet (pɛt), Fr. sept (sɛt)\n" 1049 "æ ... pat (pæt)\n" 1050 "ʌ ... putt (pʌt)\n" 1051 "ɒ ... pot (pɒt)\n" 1052 "ʊ ... put (pʊt)\n" 1053 "ə ... another (əˈnʌðə(r))\n" 1054 "(ə)... beaten (ˈbiːt(ə)n)\n" 1055 "i ... Fr. si (si)\n" 1056 "e ... Fr. bébé (bebe)\n" 1057 "a ... Fr. mari (mari)\n" 1058 "ɑ ... Fr. bâtiment (bɑtimã)\n" 1059 "ɔ ... Fr. homme (ɔm)\n" 1060 "o ... Fr. eau (o)\n" 1061 "ø ... Fr. peu (pø)\n" 1062 ; 1063 static char *prkey4 = 1064 "œ ... Fr. boeuf (bœf), coeur (kœr)\n" 1065 "u ... Fr. douce (dus)\n" 1066 "ʏ ... Ger. Müller (ˈmʏlər)\n" 1067 "y ... Fr. du (dy)\n" 1068 "\n" 1069 "Long\n" 1070 "iː as in bean (biːn)\n" 1071 "ɑː ... barn (bɑːn)\n" 1072 "ɔː ... born (bɔːn)\n" 1073 "uː ... boon (buːn)\n" 1074 "ɜː ... burn (bɜːn)\n" 1075 "eː ... Ger. Schnee (ʃneː)\n" 1076 "ɛː ... Ger. Fähre (ˈfɛːrə)\n" 1077 "aː ... Ger. Tag (taːk)\n" 1078 "oː ... Ger. Sohn (zoːn)\n" 1079 "øː ... Ger. Goethe (gøːtə)\n" 1080 "yː ... Ger. grün (gryːn)\n" 1081 "\n" 1082 ; 1083 static char *prkey5 = 1084 "Nasal\n" 1085 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n" 1086 "ã ... Fr. franc (frã)\n" 1087 "ɔ˜ ... Fr. bon (bɔ˜n)\n" 1088 "œ˜ ... Fr. un (œ˜)\n" 1089 "\n" 1090 "Dipthongs, etc.\n" 1091 "eɪ as in bay (beɪ)\n" 1092 "aɪ ... buy (baɪ)\n" 1093 "ɔɪ ... boy (bɔɪ)\n" 1094 "əʊ ... no (nəʊ)\n" 1095 "aʊ ... now (naʊ)\n" 1096 "ɪə ... peer (pɪə(r))\n" 1097 "ɛə ... pair (pɛə(r))\n" 1098 "ʊə ... tour (tʊə(r))\n" 1099 "ɔə ... boar (bɔə(r))\n" 1100 "\n" 1101 ; 1102 static char *prkey6 = 1103 "III. STRESS\n" 1104 "\n" 1105 "Main stress: ˈ preceding stressed syllable\n" 1106 "Secondary stress: ˌ preceding stressed syllable\n" 1107 "\n" 1108 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n"; 1109 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */ 1110 1111 void 1112 pgwprintkey(void) 1113 { 1114 Bprint(bout, "%s%s%s%s%s%s", 1115 prkey1, prkey2, prkey3, prkey4, prkey5, prkey6); 1116 } 1117 1118 /* 1119 * f points just after a '&', fe points at end of entry. 1120 * Accumulate the special name, starting after the & 1121 * and continuing until the next ';', in spec[]. 1122 * Return pointer to char after ';'. 1123 */ 1124 static char * 1125 getspec(char *f, char *fe) 1126 { 1127 char *t; 1128 int c, i; 1129 1130 t = spec; 1131 i = sizeof spec; 1132 while(--i > 0) { 1133 c = *f++; 1134 if(c == ';' || f == fe) 1135 break; 1136 *t++ = c; 1137 } 1138 *t = 0; 1139 return f; 1140 } 1141 1142 /* 1143 * f points just after '<'; fe points at end of entry. 1144 * Expect next characters from bin to match: 1145 * [/][^ >]+( [^>=]+=[^ >]+)*> 1146 * tag auxname auxval 1147 * Accumulate the tag and its auxilliary information in 1148 * tag[], auxname[][] and auxval[][]. 1149 * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0. 1150 * Set naux to the number of aux pairs found. 1151 * Return pointer to after final '>'. 1152 */ 1153 static char * 1154 gettag(char *f, char *fe) 1155 { 1156 char *t; 1157 int c, i; 1158 1159 t = tag; 1160 c = *f++; 1161 if(c == '/') 1162 tagstarts = 0; 1163 else { 1164 tagstarts = 1; 1165 *t++ = c; 1166 } 1167 i = Buflen; 1168 while(--i > 0) { 1169 c = *f++; 1170 if(c == '>' || f == fe) 1171 break; 1172 *t++ = c; 1173 } 1174 *t = 0; 1175 return f; 1176 }