plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

world.c (4215B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <bio.h>
      4 #include "dict.h"
      5 #include "kuten.h"
      6 
      7 /*
      8  * Routines for handling dictionaries in the "Languages of the World"
      9  * format.  worldnextoff *must* be called with <address of valid entry>+1.
     10  */
     11 
     12 #define	GSHORT(p)	(((p)[0]<<8)|(p)[1])
     13 
     14 #define putchar dictputchar
     15 
     16 static void	putchar(int, int*);
     17 
     18 #define	NONE	0xffff
     19 
     20 /* adapted from jhelling@cs.ruu.nl (Jeroen Hellingman) */
     21 
     22 static Rune chartab[] = {
     23 
     24 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
     25 	NONE,	NONE,'\n',	0xe6,	0xf8,	0xe5,	0xe4,	0xf6,
     26 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
     27 	NONE,	NONE,	NONE,	0xc6,	0xd8,	0xc5,	0xc4,	0xd6,
     28 
     29 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'',
     30 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
     31 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
     32 	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,
     33 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
     34 	0x48,	0x49,	0x4a,	0x4b,'L',	0x4d,	0x4e,	0x4f,
     35 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
     36 	0x58,	0x59,	0x5a,	0x5b,'\\',	0x5d,	0x5e,	0x5f,
     37 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
     38 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
     39 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
     40 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
     41 
     42 /*80*/	0xc7,	0xfc,	0xe9,	0xe2,	0xe4,	0xe0,	0xe5,	0xe7,
     43 	0xea,	0xeb,	0xe8,	0xef,	0xee,	0xec,	0xc4,	0xc5,
     44 /*90*/	0xc9,	0xe6,	0xc6,	0xf4,	0xf6,	0xf2,	0xfb,	0xf9,
     45 	0xff,	0xd6,	0xdc,	0xa2,	0xa3,	0xa5,	0x20a7,	0x283,
     46 /*a0*/	0xe1,	0xed,	0xf3,	0xfa,	0xf1,	0xd1,	0xaa,	0xba,
     47 	0xbf,	0x2310,	0xac,	0xbd,	0xbc,	0xa1,	0xab,	0xbb,
     48 
     49 /*b0*/	0x254,	0x259,	0xf0,	0x283,	0x292,	0x14b,	0x251,	0x7a,
     50 	0x26a,	0xf0,	0x292,	0xe3,	0x153,	0x169,	0x28c,	0x265,
     51 /*c0*/	0x280,	0xeb,	0x6c,	0x28c,	0xf5,	0xf1,	0x152,	NONE,
     52 	NONE,	0x53,	0x73,	0x5a,	0x7a,	NONE,	NONE,	NONE,
     53 /*d0*/	0xdf,	NONE,	NONE,	0x101,	0x12b,	0x16b,	0x113,	0x14d,
     54 	NONE,	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE,
     55 
     56 /*e0*/	0x3b1,	0x3b2,	0x3b3,	0x3c0,	0x3a3,	0x3c3,	0xb5,	0x3c4,
     57 	0x3a6,	0x398,	0x3a9,	0x3b4,	0x221e,	0xd8,	0x3b5,	0x2229,
     58 /*f0*/	0x2261,	0xb1,	0x2265,	0x2264,	0x2320,	0x2321,	0xf7,	0x2248,
     59 	0xb0,	0x2219,	0xb7,	NONE,	NONE,	NONE,	NONE,	NONE
     60 };
     61 
     62 enum{ Utf, Kanahi, Kanalo=Kanahi+1, GBhi, GBlo=GBhi+1 };
     63 
     64 void
     65 worldprintentry(Entry e, int cmd)
     66 {
     67 	int nh, state[3];
     68 	uchar *p, *pe;
     69 
     70 	p = (uchar *)e.start;
     71 	pe = (uchar *)e.end;
     72 	nh = GSHORT(p);
     73 	p += 6;
     74 	if(cmd == 'h')
     75 		pe = p+nh;
     76 	state[0] = Utf;
     77 	state[1] = 0;
     78 	state[2] = 0;
     79 	while(p < pe){
     80 		if(cmd == 'r')
     81 			outchar(*p++);
     82 		else
     83 			putchar(*p++, state);
     84 	}
     85 	outnl(0);
     86 }
     87 
     88 long
     89 worldnextoff(long fromoff)
     90 {
     91 	int nh, np, nd;
     92 	uchar buf[6];
     93 
     94 	if(Bseek(bdict, fromoff-1, 0) < 0)
     95 		return -1;
     96 	if(Bread(bdict, buf, 6) != 6)
     97 		return -1;
     98 	nh = GSHORT(buf);
     99 	np = GSHORT(buf+2);
    100 	nd = GSHORT(buf+4);
    101 	return fromoff-1 + 6 + nh + np + nd;
    102 }
    103 
    104 static void
    105 putchar(int c, int *state)
    106 {
    107 	int xflag = 0;
    108 	Rune r;
    109 	int hi, lo;
    110 
    111 	switch(state[0]){
    112 	case Kanahi:
    113 	case GBhi:
    114 		if(CANS2JH(c) || c == 0xff){
    115 			state[0]++;
    116 			state[1] = c;
    117 			break;
    118 		}
    119 		/* fall through */
    120 	case Utf:
    121 		if(c == 0xfe){
    122 			state[0] = Kanahi;
    123 			break;
    124 		}else if(c == 0xff){
    125 			state[0] = GBhi;
    126 			break;
    127 		}
    128 		r = chartab[c];
    129 		if(r < 0x80 && state[2] == 0)
    130 			outchar(r);
    131 		else if(r == NONE){
    132 			switch(c){
    133 			case 0xfb:
    134 				if(!xflag){
    135 					state[2] = 1;
    136 					break;
    137 				}
    138 			case 0xfc:
    139 				if(!xflag){
    140 					state[2] = 0;
    141 					break;
    142 				}
    143 			case 0x10:
    144 			case 0xc7: case 0xc8:
    145 			case 0xd8: case 0xd9: case 0xda:
    146 			case 0xdc: case 0xdd: case 0xde: case 0xdf:
    147 			case 0xfd:
    148 				if(!xflag)
    149 					break;
    150 				/* fall through */
    151 			default:
    152 				outprint("\\%.2ux", c);
    153 			}
    154 		}else if(state[2] == 0)
    155 			outrune(r);
    156 		break;
    157 	case Kanalo:
    158 	case GBlo:
    159 		if(state[1] == 0xff && c == 0xff){
    160 			state[0] = Utf;
    161 			break;
    162 		}
    163 		state[0]--;
    164 		hi = state[1];
    165 		lo = c;
    166 		S2J(hi, lo);		/* convert to JIS */
    167 		r = hi*100 + lo - 3232;	/* convert to jis208 */
    168 		if(state[0] == Kanahi && r < JIS208MAX)
    169 			r = tabjis208[r];
    170 		else if(state[0] == GBhi && r < GB2312MAX)
    171 			r = tabgb2312[r];
    172 		else
    173 			r = NONE;
    174 		if(r == NONE)
    175 			outprint("\\%.2ux\\%.2ux", state[1], c);
    176 		else
    177 			outrune(r);
    178 		break;
    179 	}
    180 }
    181 
    182 void
    183 worldprintkey(void)
    184 {
    185 	Bprint(bout, "No pronunciation key.\n");
    186 }