mbwc.c (2253B)
1 #include <stdlib.h> 2 3 /* 4 * Use the FSS-UTF transformation proposed by posix. 5 * We define 7 byte types: 6 * T0 0xxxxxxx 7 free bits 7 * Tx 10xxxxxx 6 free bits 8 * T1 110xxxxx 5 free bits 9 * T2 1110xxxx 4 free bits 10 * 11 * Encoding is as follows. 12 * From hex Thru hex Sequence Bits 13 * 00000000 0000007F T0 7 14 * 00000080 000007FF T1 Tx 11 15 * 00000800 0000FFFF T2 Tx Tx 16 16 */ 17 18 int 19 mblen(const char *s, size_t n) 20 { 21 22 return mbtowc(0, s, n); 23 } 24 25 int 26 mbtowc(wchar_t *pwc, const char *s, size_t n) 27 { 28 int c, c1, c2; 29 long l; 30 31 if(!s) 32 return 0; 33 34 if(n < 1) 35 goto bad; 36 c = s[0] & 0xff; 37 if((c & 0x80) == 0x00) { 38 if(pwc) 39 *pwc = c; 40 if(c == 0) 41 return 0; 42 return 1; 43 } 44 45 if(n < 2) 46 goto bad; 47 c1 = (s[1] ^ 0x80) & 0xff; 48 if((c1 & 0xC0) != 0x00) 49 goto bad; 50 if((c & 0xE0) == 0xC0) { 51 l = ((c << 6) | c1) & 0x7FF; 52 if(l < 0x080) 53 goto bad; 54 if(pwc) 55 *pwc = l; 56 return 2; 57 } 58 59 if(n < 3) 60 goto bad; 61 c2 = (s[2] ^ 0x80) & 0xff; 62 if((c2 & 0xC0) != 0x00) 63 goto bad; 64 if((c & 0xF0) == 0xE0) { 65 l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF; 66 if(l < 0x0800) 67 goto bad; 68 if(pwc) 69 *pwc = l; 70 return 3; 71 } 72 73 /* 74 * bad decoding 75 */ 76 bad: 77 return -1; 78 79 } 80 81 int 82 wctomb(char *s, wchar_t wchar) 83 { 84 long c; 85 86 if(!s) 87 return 0; 88 89 c = wchar & 0xFFFF; 90 if(c < 0x80) { 91 s[0] = c; 92 return 1; 93 } 94 95 if(c < 0x800) { 96 s[0] = 0xC0 | (c >> 6); 97 s[1] = 0x80 | (c & 0x3F); 98 return 2; 99 } 100 101 s[0] = 0xE0 | (c >> 12); 102 s[1] = 0x80 | ((c >> 6) & 0x3F); 103 s[2] = 0x80 | (c & 0x3F); 104 return 3; 105 } 106 107 size_t 108 mbstowcs(wchar_t *pwcs, const char *s, size_t n) 109 { 110 int i, d, c; 111 112 for(i=0; i < n; i++) { 113 c = *s & 0xff; 114 if(c < 0x80) { 115 *pwcs = c; 116 if(c == 0) 117 break; 118 s++; 119 } else { 120 d = mbtowc(pwcs, s, 3); 121 if(d <= 0) 122 return (size_t)((d<0) ? -1 : i); 123 s += d; 124 } 125 pwcs++; 126 } 127 return i; 128 } 129 130 size_t 131 wcstombs(char *s, const wchar_t *pwcs, size_t n) 132 { 133 int d; 134 long c; 135 char *p, *pe; 136 char buf[3]; 137 138 p = s; 139 pe = p+n-3; 140 while(p < pe) { 141 c = *pwcs++; 142 if(c < 0x80) 143 *p++ = c; 144 else 145 p += wctomb(p, c); 146 if(c == 0) 147 return p-s; 148 } 149 while(p < pe+3) { 150 c = *pwcs++; 151 d = wctomb(buf, c); 152 if(p+d <= pe+3) { 153 *p++ = buf[0]; 154 if(d > 1) { 155 *p++ = buf[1]; 156 if(d > 2) 157 *p++ = buf[2]; 158 } 159 } 160 if(c == 0) 161 break; 162 } 163 return p-s; 164 }