plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

mbwc.c (2253B)


      1 #include <stdlib.h>
      2 
      3 /*
      4  * Use the FSS-UTF transformation proposed by posix.
      5  *	We define 7 byte types:
      6  *	T0	0xxxxxxx	7 free bits
      7  *	Tx	10xxxxxx	6 free bits
      8  *	T1	110xxxxx	5 free bits
      9  *	T2	1110xxxx	4 free bits
     10  *
     11  *	Encoding is as follows.
     12  *	From hex	Thru hex	Sequence		Bits
     13  *	00000000	0000007F	T0			7
     14  *	00000080	000007FF	T1 Tx			11
     15  *	00000800	0000FFFF	T2 Tx Tx		16
     16  */
     17 
     18 int
     19 mblen(const char *s, size_t n)
     20 {
     21 
     22 	return mbtowc(0, s, n);
     23 }
     24 
     25 int
     26 mbtowc(wchar_t *pwc, const char *s, size_t n)
     27 {
     28 	int c, c1, c2;
     29 	long l;
     30 
     31 	if(!s)
     32 		return 0;
     33 
     34 	if(n < 1)
     35 		goto bad;
     36 	c = s[0] & 0xff;
     37 	if((c & 0x80) == 0x00) {
     38 		if(pwc)
     39 			*pwc = c;
     40 		if(c == 0)
     41 			return 0;
     42 		return 1;
     43 	}
     44 
     45 	if(n < 2)
     46 		goto bad;
     47 	c1 = (s[1] ^ 0x80) & 0xff;
     48 	if((c1 & 0xC0) != 0x00)
     49 		goto bad;
     50 	if((c & 0xE0) == 0xC0) {
     51 		l = ((c << 6) | c1) & 0x7FF;
     52 		if(l < 0x080)
     53 			goto bad;
     54 		if(pwc)
     55 			*pwc = l;
     56 		return 2;
     57 	}
     58 
     59 	if(n < 3)
     60 		goto bad;
     61 	c2 = (s[2] ^ 0x80) & 0xff;
     62 	if((c2 & 0xC0) != 0x00)
     63 		goto bad;
     64 	if((c & 0xF0) == 0xE0) {
     65 		l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF;
     66 		if(l < 0x0800)
     67 			goto bad;
     68 		if(pwc)
     69 			*pwc = l;
     70 		return 3;
     71 	}
     72 
     73 	/*
     74 	 * bad decoding
     75 	 */
     76 bad:
     77 	return -1;
     78 
     79 }
     80 
     81 int
     82 wctomb(char *s, wchar_t wchar)
     83 {
     84 	long c;
     85 
     86 	if(!s)
     87 		return 0;
     88 
     89 	c = wchar & 0xFFFF;
     90 	if(c < 0x80) {
     91 		s[0] = c;
     92 		return 1;
     93 	}
     94 
     95 	if(c < 0x800) {
     96 		s[0] = 0xC0 | (c >> 6);
     97 		s[1] = 0x80 | (c & 0x3F);
     98 		return 2;
     99 	}
    100 
    101 	s[0] = 0xE0 |  (c >> 12);
    102 	s[1] = 0x80 | ((c >> 6) & 0x3F);
    103 	s[2] = 0x80 |  (c & 0x3F);
    104 	return 3;
    105 }
    106 
    107 size_t
    108 mbstowcs(wchar_t *pwcs, const char *s, size_t n)
    109 {
    110 	int i, d, c;
    111 
    112 	for(i=0; i < n; i++) {
    113 		c = *s & 0xff;
    114 		if(c < 0x80) {
    115 			*pwcs = c;
    116 			if(c == 0)
    117 				break;
    118 			s++;
    119 		} else {
    120 			d = mbtowc(pwcs, s, 3);
    121 			if(d <= 0)
    122 				return (size_t)((d<0) ? -1 : i);
    123 			s += d;
    124 		}
    125 		pwcs++;
    126 	}
    127 	return i;
    128 }
    129 
    130 size_t
    131 wcstombs(char *s, const wchar_t *pwcs, size_t n)
    132 {
    133 	int d;
    134 	long c;
    135 	char *p, *pe;
    136 	char buf[3];
    137 
    138 	p = s;
    139 	pe = p+n-3;
    140 	while(p < pe) {
    141 		c = *pwcs++;
    142 		if(c < 0x80)
    143 			*p++ = c;
    144 		else
    145 			p += wctomb(p, c);
    146 		if(c == 0)
    147 			return p-s;
    148 	}
    149 	while(p < pe+3) {
    150 		c = *pwcs++;
    151 		d = wctomb(buf, c);
    152 		if(p+d <= pe+3) {
    153 			*p++ = buf[0];
    154 			if(d > 1) {
    155 				*p++ = buf[1];
    156 				if(d > 2)
    157 					*p++ = buf[2];
    158 			}
    159 		}
    160 		if(c == 0)
    161 			break;
    162 	}
    163 	return p-s;
    164 }