plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

utils.c (9991B)


      1 #include <u.h>
      2 #include <limits.h>
      3 #include <libc.h>
      4 #include <draw.h>
      5 #include <html.h>
      6 #include "impl.h"
      7 
      8 Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
      9 Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
     10 
     11 /* All lists start out like List structure. */
     12 /* List itself can be used as list of int. */
     13 int
     14 _listlen(List* l)
     15 {
     16 	int n = 0;
     17 
     18 	while(l != nil) {
     19 		l = l->next;
     20 		n++;
     21 	}
     22 	return n;
     23 }
     24 
     25 /* Cons */
     26 List*
     27 _newlist(int val, List* rest)
     28 {
     29 	List* ans;
     30 
     31 	ans = (List*)emalloc(sizeof(List));
     32 	ans->val = val;
     33 	ans->next = rest;
     34 	return ans;
     35 }
     36 
     37 /* Reverse a list in place */
     38 List*
     39 _revlist(List* l)
     40 {
     41 	List* newl;
     42 	List* nextl;
     43 
     44 	newl = nil;
     45 	while(l != nil) {
     46 		nextl = l->next;
     47 		l->next = newl;
     48 		newl = l;
     49 		l = nextl;
     50 	}
     51 	return newl;
     52 }
     53 
     54 /* The next few routines take a "character class" as argument. */
     55 /*    e.g., "a-zA-Z", or "^ \t\n" */
     56 /* (ranges indicated by - except in first position; */
     57 /*  ^ is first position means "not in" the following class) */
     58 
     59 /* Splitl splits s[0:n] just before first character of class cl. */
     60 /* Answers go in (p1, n1) and (p2, n2). */
     61 /* If no split, the whole thing goes in the first component. */
     62 /* Note: answers contain pointers into original string. */
     63 void
     64 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
     65 {
     66 	Rune* p;
     67 
     68 	p = _Strnclass(s, cl, n);
     69 	*p1 = s;
     70 	if(p == nil) {
     71 		*n1 = n;
     72 		*p2 = nil;
     73 		*n2 = 0;
     74 	}
     75 	else {
     76 		*p2 = p;
     77 		*n1 = p-s;
     78 		*n2 = n-*n1;
     79 	}
     80 }
     81 
     82 /* Splitr splits s[0:n] just after last character of class cl. */
     83 /* Answers go in (p1, n1) and (p2, n2). */
     84 /* If no split, the whole thing goes in the last component. */
     85 /* Note: answers contain pointers into original string. */
     86 void
     87 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
     88 {
     89 	Rune* p;
     90 
     91 	p = _Strnrclass(s, cl, n);
     92 	if(p == nil) {
     93 		*p1 = nil;
     94 		*n1 = 0;
     95 		*p2 = s;
     96 		*n2 = n;
     97 	}
     98 	else {
     99 		*p1 = s;
    100 		*p2 = p+1;
    101 		*n1 = *p2-s;
    102 		*n2 = n-*n1;
    103 	}
    104 }
    105 
    106 /* Splitall splits s[0:n] into parts that are separated by characters from class cl. */
    107 /* Each part will have nonzero length. */
    108 /* At most alen parts are found, and pointers to their starts go into */
    109 /* the strarr array, while their lengths go into the lenarr array. */
    110 /* The return value is the number of parts found. */
    111 int
    112 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
    113 {
    114 	int i;
    115 	Rune* p;
    116 	Rune* q;
    117 	Rune* slast;
    118 
    119 	if(s == nil || n == 0)
    120 		return 0;
    121 	i = 0;
    122 	p = s;
    123 	slast = s+n;
    124 	while(p < slast && i < alen) {
    125 		while(p < slast && _inclass(*p, cl))
    126 			p++;
    127 		if(p == slast)
    128 			break;
    129 		q = _Strnclass(p, cl, slast-p);
    130 		if(q == nil)
    131 			q = slast;
    132 		assert(q > p && q <= slast);
    133 		strarr[i] = p;
    134 		lenarr[i] = q-p;
    135 		i++;
    136 		p = q;
    137 	}
    138 	return i;
    139 }
    140 
    141 /* Find part of s that excludes leading and trailing whitespace, */
    142 /* and return that part in *pans (and its length in *panslen). */
    143 void
    144 _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
    145 {
    146 	Rune* p;
    147 	Rune* q;
    148 
    149 	p = nil;
    150 	if(n > 0) {
    151 		p = _Strnclass(s, notwhitespace, n);
    152 		if(p != nil) {
    153 			q = _Strnrclass(s, notwhitespace, n);
    154 			assert(q != nil);
    155 			n = q+1-p;
    156 		}
    157 	}
    158 	*pans = p;
    159 	*panslen = n;
    160 }
    161 
    162 /* _Strclass returns a pointer to the first element of s that is */
    163 /* a member of class cl, nil if none. */
    164 Rune*
    165 _Strclass(Rune* s, Rune* cl)
    166 {
    167 	Rune* p;
    168 
    169 	for(p = s; *p != 0; p++)
    170 		if(_inclass(*p, cl))
    171 			return p;
    172 	return nil;
    173 }
    174 
    175 /* _Strnclass returns a pointer to the first element of s[0:n] that is */
    176 /* a member of class cl, nil if none. */
    177 Rune*
    178 _Strnclass(Rune* s, Rune* cl, int n)
    179 {
    180 	Rune* p;
    181 
    182 	for(p = s; n-- && *p != 0; p++)
    183 		if(_inclass(*p, cl))
    184 			return p;
    185 	return nil;
    186 }
    187 
    188 /* _Strrclass returns a pointer to the last element of s that is */
    189 /* a member of class cl, nil if none */
    190 Rune*
    191 _Strrclass(Rune* s, Rune* cl)
    192 {
    193 	Rune* p;
    194 
    195 	if(s == nil || *s == 0)
    196 		return nil;
    197 	p = s + runestrlen(s) - 1;
    198 	while(p >= s) {
    199 		if(_inclass(*p, cl))
    200 			return p;
    201 		p--;
    202 	};
    203 	return nil;
    204 }
    205 
    206 /* _Strnrclass returns a pointer to the last element of s[0:n] that is */
    207 /* a member of class cl, nil if none */
    208 Rune*
    209 _Strnrclass(Rune* s, Rune* cl, int n)
    210 {
    211 	Rune* p;
    212 
    213 	if(s == nil || *s == 0 || n == 0)
    214 		return nil;
    215 	p = s + n - 1;
    216 	while(p >= s) {
    217 		if(_inclass(*p, cl))
    218 			return p;
    219 		p--;
    220 	};
    221 	return nil;
    222 }
    223 
    224 /* Is c in the class cl? */
    225 int
    226 _inclass(Rune c, Rune* cl)
    227 {
    228 	int	n;
    229 	int	ans;
    230 	int	negate;
    231 	int	i;
    232 
    233 	n = _Strlen(cl);
    234 	if(n == 0)
    235 		return 0;
    236 	ans = 0;
    237 	negate = 0;
    238 	if(cl[0] == '^') {
    239 		negate = 1;
    240 		cl++;
    241 		n--;
    242 	}
    243 	for(i = 0; i < n; i++) {
    244 		if(cl[i] == '-' && i > 0 && i < n - 1) {
    245 			if(c >= cl[i - 1] && c <= cl[i + 1]) {
    246 				ans = 1;
    247 				break;
    248 			}
    249 			i++;
    250 		}
    251 		else if(c == cl[i]) {
    252 			ans = 1;
    253 			break;
    254 		}
    255 	}
    256 	if(negate)
    257 		ans = !ans;
    258 	return ans;
    259 }
    260 
    261 /* Is pre a prefix of s? */
    262 int
    263 _prefix(Rune* pre, Rune* s)
    264 {
    265 	int	ns;
    266 	int	n;
    267 	int	k;
    268 
    269 	ns = _Strlen(s);
    270 	n = _Strlen(pre);
    271 	if(ns < n)
    272 		return 0;
    273 	for(k = 0; k < n; k++) {
    274 		if(pre[k] != s[k])
    275 			return 0;
    276 	}
    277 	return 1;
    278 }
    279 
    280 /* Number of runes in (null-terminated) s */
    281 int
    282 _Strlen(Rune* s)
    283 {
    284 	if(s == nil)
    285 		return 0;
    286 	return runestrlen(s);
    287 }
    288 
    289 /* -1, 0, 1 as s1 is lexicographically less, equal greater than s2 */
    290 int
    291 _Strcmp(Rune *s1, Rune *s2)
    292 {
    293 	if(s1 == nil)
    294 		return (s2 == nil || *s2 == 0) ? 0 : -1;
    295 	if(s2 == nil)
    296 		return (*s1 == 0) ? 0 : 1;
    297 	return runestrcmp(s1, s2);
    298 }
    299 
    300 /* Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). */
    301 /* Also, do a case-insensitive match, assuming s2 */
    302 /* has no chars in [A-Z], only their lowercase versions. */
    303 /* (This routine is used for in-place keyword lookup, where s2 is in a keyword */
    304 /* list and s1 is some substring, possibly mixed-case, in a buffer.) */
    305 int
    306 _Strncmpci(Rune *s1, int n1, Rune *s2)
    307 {
    308 	Rune c1, c2;
    309 
    310 	for(;;) {
    311 		if(n1-- == 0) {
    312 			if(*s2 == 0)
    313 				return 0;
    314 			return -1;
    315 		}
    316 		c1 = *s1++;
    317 		c2 = *s2++;
    318 		if(c1 >= 'A' && c1 <= 'Z')
    319 			c1 = c1 - 'A' + 'a';
    320 		if(c1 != c2) {
    321 			if(c1 > c2)
    322 				return 1;
    323 			return -1;
    324 		}
    325 	}
    326 }
    327 
    328 /* emalloc and copy */
    329 Rune*
    330 _Strdup(Rune* s)
    331 {
    332 	if(s == nil)
    333 		return nil;
    334 	return _Strndup(s, runestrlen(s));
    335 }
    336 
    337 /* emalloc and copy n chars of s (assume s is at least that long), */
    338 /* and add 0 terminator. */
    339 /* Return nil if n==0. */
    340 Rune*
    341 _Strndup(Rune* s, int n)
    342 {
    343 	Rune* ans;
    344 
    345 	if(n <= 0)
    346 		return nil;
    347 	ans = _newstr(n);
    348 	memmove(ans, s, n*sizeof(Rune));
    349 	ans[n] = 0;
    350 	return ans;
    351 }
    352 /* emalloc enough room for n Runes, plus 1 null terminator. */
    353 /* (Not initialized to anything.) */
    354 Rune*
    355 _newstr(int n)
    356 {
    357 	return (Rune*)emalloc((n+1)*sizeof(Rune));
    358 }
    359 
    360 /* emalloc and copy s+t */
    361 Rune*
    362 _Strdup2(Rune* s, Rune* t)
    363 {
    364 	int ns, nt;
    365 	Rune* ans;
    366 	Rune* p;
    367 
    368 	ns = _Strlen(s);
    369 	nt = _Strlen(t);
    370 	if(ns+nt == 0)
    371 		return nil;
    372 	ans = _newstr(ns+nt);
    373 	p = _Stradd(ans, s, ns);
    374 	p = _Stradd(p, t, nt);
    375 	*p = 0;
    376 	return ans;
    377 }
    378 
    379 /* Return emalloc'd substring s[start:stop], */
    380 Rune*
    381 _Strsubstr(Rune* s, int start, int stop)
    382 {
    383 	Rune* t;
    384 
    385 	if(start == stop)
    386 		return nil;
    387 	t = _Strndup(s+start, stop-start);
    388 	return t;
    389 }
    390 
    391 /* Copy n chars to s1 from s2, and return s1+n */
    392 Rune*
    393 _Stradd(Rune* s1, Rune* s2, int n)
    394 {
    395 	if(n == 0)
    396 		return s1;
    397 	memmove(s1, s2, n*sizeof(Rune));
    398 	return s1+n;
    399 }
    400 
    401 /* Like strtol, but converting from Rune* string */
    402 
    403 /*#define LONG_MAX	2147483647L */
    404 /*#define LONG_MIN	-2147483648L */
    405 
    406 long
    407 _Strtol(Rune* nptr, Rune** endptr, int base)
    408 {
    409 	Rune* p;
    410 	long n, nn;
    411 	int c, ovfl, v, neg, ndig;
    412 
    413 	p = nptr;
    414 	neg = 0;
    415 	n = 0;
    416 	ndig = 0;
    417 	ovfl = 0;
    418 
    419 	/*
    420 	 * White space
    421 	 */
    422 	for(;;p++){
    423 		switch(*p){
    424 		case ' ':
    425 		case '\t':
    426 		case '\n':
    427 		case '\f':
    428 		case '\r':
    429 		case '\v':
    430 			continue;
    431 		}
    432 		break;
    433 	}
    434 
    435 	/*
    436 	 * Sign
    437 	 */
    438 	if(*p=='-' || *p=='+')
    439 		if(*p++ == '-')
    440 			neg = 1;
    441 
    442 	/*
    443 	 * Base
    444 	 */
    445 	if(base==0){
    446 		if(*p != '0')
    447 			base = 10;
    448 		else{
    449 			base = 8;
    450 			if(p[1]=='x' || p[1]=='X'){
    451 				p += 2;
    452 				base = 16;
    453 			}
    454 		}
    455 	}else if(base==16 && *p=='0'){
    456 		if(p[1]=='x' || p[1]=='X')
    457 			p += 2;
    458 	}else if(base<0 || 36<base)
    459 		goto Return;
    460 
    461 	/*
    462 	 * Non-empty sequence of digits
    463 	 */
    464 	for(;; p++,ndig++){
    465 		c = *p;
    466 		v = base;
    467 		if('0'<=c && c<='9')
    468 			v = c - '0';
    469 		else if('a'<=c && c<='z')
    470 			v = c - 'a' + 10;
    471 		else if('A'<=c && c<='Z')
    472 			v = c - 'A' + 10;
    473 		if(v >= base)
    474 			break;
    475 		nn = n*base + v;
    476 		if(nn < n)
    477 			ovfl = 1;
    478 		n = nn;
    479 	}
    480 
    481     Return:
    482 	if(ndig == 0)
    483 		p = nptr;
    484 	if(endptr)
    485 		*endptr = p;
    486 	if(ovfl){
    487 		if(neg)
    488 			return LONG_MIN;
    489 		return LONG_MAX;
    490 	}
    491 	if(neg)
    492 		return -n;
    493 	return n;
    494 }
    495 
    496 /* Convert buf[0:n], bytes whose character set is chset, */
    497 /* into a emalloc'd null-terminated Unicode string. */
    498 Rune*
    499 toStr(uchar* buf, int n, int chset)
    500 {
    501 	int i;
    502 	int m;
    503 	Rune ch;
    504 	Rune* ans;
    505 
    506 	switch(chset) {
    507 	case US_Ascii:
    508 	case ISO_8859_1:
    509 		ans = (Rune*)emalloc((n+1)*sizeof(Rune));
    510 		for(i = 0; i < n; i++)
    511 			ans[i] = buf[i];
    512 		ans[n] = 0;
    513 		break;
    514 
    515 	case UTF_8:
    516 		m = 0;
    517 		for(i = 0; i < n; ) {
    518 			i += chartorune(&ch, (char*)(buf+i));
    519 			m++;
    520 		}
    521 		ans = (Rune*)emalloc((m+1)*sizeof(Rune));
    522 		m = 0;
    523 		for(i = 0; i < n; ) {
    524 			i += chartorune(&ch, (char*)(buf+i));
    525 			ans[m++] = ch;
    526 		}
    527 		ans[m] = 0;
    528 		break;
    529 
    530 	default:
    531 		ans = nil;
    532 		assert(0);
    533 	}
    534 	return ans;
    535 }
    536 
    537 /* Convert buf[0:n], Unicode characters, */
    538 /* into an emalloc'd null-terminated string in character set chset. */
    539 /* Use 0x80 for unconvertable characters. */
    540 uchar*
    541 fromStr(Rune* buf, int n, int chset)
    542 {
    543 	uchar* ans;
    544 	int i, lim, m;
    545 	Rune ch;
    546 	uchar* p;
    547 	uchar s[UTFmax];
    548 
    549 	ans = nil;
    550 	switch(chset) {
    551 	case US_Ascii:
    552 	case ISO_8859_1:
    553 		ans = (uchar*)emalloc(n+1);
    554 		lim = (chset==US_Ascii)? 127 : 255;
    555 		for(i = 0; i < n; i++) {
    556 			ch = buf[i];
    557 			if(ch > lim)
    558 				ch = 0x80;
    559 			ans[i] = ch;
    560 		}
    561 		ans[n] = 0;
    562 		break;
    563 
    564 	case UTF_8:
    565 		m = 0;
    566 		for(i = 0; i < n; i++) {
    567 			m += runetochar((char*)s, &buf[i]);
    568 		}
    569 		ans = (uchar*)emalloc(m+1);
    570 		p = ans;
    571 		for(i = 0; i < n; i++)
    572 			p += runetochar((char*)p, &buf[i]);
    573 		*p = 0;
    574 		break;
    575 
    576 	default:
    577 		assert(0);
    578 	}
    579 	return ans;
    580 
    581 }
    582 
    583 /* Convert n to emalloc'd String. */
    584 Rune*
    585 _ltoStr(int n)
    586 {
    587 	int m;
    588 	uchar buf[20];
    589 
    590 	m = snprint((char*)buf, sizeof(buf), "%d", n);
    591 	return toStr(buf, m, US_Ascii);
    592 }