plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

rune.c (2136B)


      1 #include	"rune.h"
      2 
      3 enum
      4 {
      5 	Bit1	= 7,
      6 	Bitx	= 6,
      7 	Bit2	= 5,
      8 	Bit3	= 4,
      9 	Bit4	= 3,
     10 
     11 	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
     12 	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
     13 	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
     14 	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
     15 	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
     16 
     17 	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
     18 	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
     19 	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
     20 
     21 	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
     22 	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
     23 
     24 	Bad	= Runeerror
     25 };
     26 
     27 int
     28 chartorune(Rune *rune, char *str)
     29 {
     30 	int c, c1, c2;
     31 	long l;
     32 
     33 	/*
     34 	 * one character sequence
     35 	 *	00000-0007F => T1
     36 	 */
     37 	c = *(unsigned char*)str;
     38 	if(c < Tx) {
     39 		*rune = c;
     40 		return 1;
     41 	}
     42 
     43 	/*
     44 	 * two character sequence
     45 	 *	0080-07FF => T2 Tx
     46 	 */
     47 	c1 = *(unsigned char*)(str+1) ^ Tx;
     48 	if(c1 & Testx)
     49 		goto bad;
     50 	if(c < T3) {
     51 		if(c < T2)
     52 			goto bad;
     53 		l = ((c << Bitx) | c1) & Rune2;
     54 		if(l <= Rune1)
     55 			goto bad;
     56 		*rune = l;
     57 		return 2;
     58 	}
     59 
     60 	/*
     61 	 * three character sequence
     62 	 *	0800-FFFF => T3 Tx Tx
     63 	 */
     64 	c2 = *(unsigned char*)(str+2) ^ Tx;
     65 	if(c2 & Testx)
     66 		goto bad;
     67 	if(c < T4) {
     68 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
     69 		if(l <= Rune2)
     70 			goto bad;
     71 		*rune = l;
     72 		return 3;
     73 	}
     74 
     75 	/*
     76 	 * bad decoding
     77 	 */
     78 bad:
     79 	*rune = Bad;
     80 	return 1;
     81 }
     82 
     83 int
     84 runetochar(char *str, Rune *rune)
     85 {
     86 	long c;
     87 
     88 	/*
     89 	 * one character sequence
     90 	 *	00000-0007F => 00-7F
     91 	 */
     92 	c = *rune;
     93 	if(c <= Rune1) {
     94 		str[0] = c;
     95 		return 1;
     96 	}
     97 
     98 	/*
     99 	 * two character sequence
    100 	 *	0080-07FF => T2 Tx
    101 	 */
    102 	if(c <= Rune2) {
    103 		str[0] = T2 | (c >> 1*Bitx);
    104 		str[1] = Tx | (c & Maskx);
    105 		return 2;
    106 	}
    107 
    108 	/*
    109 	 * three character sequence
    110 	 *	0800-FFFF => T3 Tx Tx
    111 	 */
    112 	str[0] = T3 |  (c >> 2*Bitx);
    113 	str[1] = Tx | ((c >> 1*Bitx) & Maskx);
    114 	str[2] = Tx |  (c & Maskx);
    115 	return 3;
    116 }
    117 
    118 int
    119 runelen(long c)
    120 {
    121 	Rune rune;
    122 	char str[10];
    123 
    124 	rune = c;
    125 	return runetochar(str, &rune);
    126 }
    127 
    128 int
    129 fullrune(char *str, int n)
    130 {
    131 	int c;
    132 
    133 	if(n > 0) {
    134 		c = *(unsigned char*)str;
    135 		if(c < Tx)
    136 			return 1;
    137 		if(n > 1)
    138 			if(c < T3 || n > 2)
    139 				return 1;
    140 	}
    141 	return 0;
    142 }