utils.c (9991B)
1 #include <u.h> 2 #include <limits.h> 3 #include <libc.h> 4 #include <draw.h> 5 #include <html.h> 6 #include "impl.h" 7 8 Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' }; 9 Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'}; 10 11 /* All lists start out like List structure. */ 12 /* List itself can be used as list of int. */ 13 int 14 _listlen(List* l) 15 { 16 int n = 0; 17 18 while(l != nil) { 19 l = l->next; 20 n++; 21 } 22 return n; 23 } 24 25 /* Cons */ 26 List* 27 _newlist(int val, List* rest) 28 { 29 List* ans; 30 31 ans = (List*)emalloc(sizeof(List)); 32 ans->val = val; 33 ans->next = rest; 34 return ans; 35 } 36 37 /* Reverse a list in place */ 38 List* 39 _revlist(List* l) 40 { 41 List* newl; 42 List* nextl; 43 44 newl = nil; 45 while(l != nil) { 46 nextl = l->next; 47 l->next = newl; 48 newl = l; 49 l = nextl; 50 } 51 return newl; 52 } 53 54 /* The next few routines take a "character class" as argument. */ 55 /* e.g., "a-zA-Z", or "^ \t\n" */ 56 /* (ranges indicated by - except in first position; */ 57 /* ^ is first position means "not in" the following class) */ 58 59 /* Splitl splits s[0:n] just before first character of class cl. */ 60 /* Answers go in (p1, n1) and (p2, n2). */ 61 /* If no split, the whole thing goes in the first component. */ 62 /* Note: answers contain pointers into original string. */ 63 void 64 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) 65 { 66 Rune* p; 67 68 p = _Strnclass(s, cl, n); 69 *p1 = s; 70 if(p == nil) { 71 *n1 = n; 72 *p2 = nil; 73 *n2 = 0; 74 } 75 else { 76 *p2 = p; 77 *n1 = p-s; 78 *n2 = n-*n1; 79 } 80 } 81 82 /* Splitr splits s[0:n] just after last character of class cl. */ 83 /* Answers go in (p1, n1) and (p2, n2). */ 84 /* If no split, the whole thing goes in the last component. */ 85 /* Note: answers contain pointers into original string. */ 86 void 87 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) 88 { 89 Rune* p; 90 91 p = _Strnrclass(s, cl, n); 92 if(p == nil) { 93 *p1 = nil; 94 *n1 = 0; 95 *p2 = s; 96 *n2 = n; 97 } 98 else { 99 *p1 = s; 100 *p2 = p+1; 101 *n1 = *p2-s; 102 *n2 = n-*n1; 103 } 104 } 105 106 /* Splitall splits s[0:n] into parts that are separated by characters from class cl. */ 107 /* Each part will have nonzero length. */ 108 /* At most alen parts are found, and pointers to their starts go into */ 109 /* the strarr array, while their lengths go into the lenarr array. */ 110 /* The return value is the number of parts found. */ 111 int 112 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen) 113 { 114 int i; 115 Rune* p; 116 Rune* q; 117 Rune* slast; 118 119 if(s == nil || n == 0) 120 return 0; 121 i = 0; 122 p = s; 123 slast = s+n; 124 while(p < slast && i < alen) { 125 while(p < slast && _inclass(*p, cl)) 126 p++; 127 if(p == slast) 128 break; 129 q = _Strnclass(p, cl, slast-p); 130 if(q == nil) 131 q = slast; 132 assert(q > p && q <= slast); 133 strarr[i] = p; 134 lenarr[i] = q-p; 135 i++; 136 p = q; 137 } 138 return i; 139 } 140 141 /* Find part of s that excludes leading and trailing whitespace, */ 142 /* and return that part in *pans (and its length in *panslen). */ 143 void 144 _trimwhite(Rune* s, int n, Rune** pans, int* panslen) 145 { 146 Rune* p; 147 Rune* q; 148 149 p = nil; 150 if(n > 0) { 151 p = _Strnclass(s, notwhitespace, n); 152 if(p != nil) { 153 q = _Strnrclass(s, notwhitespace, n); 154 assert(q != nil); 155 n = q+1-p; 156 } 157 } 158 *pans = p; 159 *panslen = n; 160 } 161 162 /* _Strclass returns a pointer to the first element of s that is */ 163 /* a member of class cl, nil if none. */ 164 Rune* 165 _Strclass(Rune* s, Rune* cl) 166 { 167 Rune* p; 168 169 for(p = s; *p != 0; p++) 170 if(_inclass(*p, cl)) 171 return p; 172 return nil; 173 } 174 175 /* _Strnclass returns a pointer to the first element of s[0:n] that is */ 176 /* a member of class cl, nil if none. */ 177 Rune* 178 _Strnclass(Rune* s, Rune* cl, int n) 179 { 180 Rune* p; 181 182 for(p = s; n-- && *p != 0; p++) 183 if(_inclass(*p, cl)) 184 return p; 185 return nil; 186 } 187 188 /* _Strrclass returns a pointer to the last element of s that is */ 189 /* a member of class cl, nil if none */ 190 Rune* 191 _Strrclass(Rune* s, Rune* cl) 192 { 193 Rune* p; 194 195 if(s == nil || *s == 0) 196 return nil; 197 p = s + runestrlen(s) - 1; 198 while(p >= s) { 199 if(_inclass(*p, cl)) 200 return p; 201 p--; 202 }; 203 return nil; 204 } 205 206 /* _Strnrclass returns a pointer to the last element of s[0:n] that is */ 207 /* a member of class cl, nil if none */ 208 Rune* 209 _Strnrclass(Rune* s, Rune* cl, int n) 210 { 211 Rune* p; 212 213 if(s == nil || *s == 0 || n == 0) 214 return nil; 215 p = s + n - 1; 216 while(p >= s) { 217 if(_inclass(*p, cl)) 218 return p; 219 p--; 220 }; 221 return nil; 222 } 223 224 /* Is c in the class cl? */ 225 int 226 _inclass(Rune c, Rune* cl) 227 { 228 int n; 229 int ans; 230 int negate; 231 int i; 232 233 n = _Strlen(cl); 234 if(n == 0) 235 return 0; 236 ans = 0; 237 negate = 0; 238 if(cl[0] == '^') { 239 negate = 1; 240 cl++; 241 n--; 242 } 243 for(i = 0; i < n; i++) { 244 if(cl[i] == '-' && i > 0 && i < n - 1) { 245 if(c >= cl[i - 1] && c <= cl[i + 1]) { 246 ans = 1; 247 break; 248 } 249 i++; 250 } 251 else if(c == cl[i]) { 252 ans = 1; 253 break; 254 } 255 } 256 if(negate) 257 ans = !ans; 258 return ans; 259 } 260 261 /* Is pre a prefix of s? */ 262 int 263 _prefix(Rune* pre, Rune* s) 264 { 265 int ns; 266 int n; 267 int k; 268 269 ns = _Strlen(s); 270 n = _Strlen(pre); 271 if(ns < n) 272 return 0; 273 for(k = 0; k < n; k++) { 274 if(pre[k] != s[k]) 275 return 0; 276 } 277 return 1; 278 } 279 280 /* Number of runes in (null-terminated) s */ 281 int 282 _Strlen(Rune* s) 283 { 284 if(s == nil) 285 return 0; 286 return runestrlen(s); 287 } 288 289 /* -1, 0, 1 as s1 is lexicographically less, equal greater than s2 */ 290 int 291 _Strcmp(Rune *s1, Rune *s2) 292 { 293 if(s1 == nil) 294 return (s2 == nil || *s2 == 0) ? 0 : -1; 295 if(s2 == nil) 296 return (*s1 == 0) ? 0 : 1; 297 return runestrcmp(s1, s2); 298 } 299 300 /* Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). */ 301 /* Also, do a case-insensitive match, assuming s2 */ 302 /* has no chars in [A-Z], only their lowercase versions. */ 303 /* (This routine is used for in-place keyword lookup, where s2 is in a keyword */ 304 /* list and s1 is some substring, possibly mixed-case, in a buffer.) */ 305 int 306 _Strncmpci(Rune *s1, int n1, Rune *s2) 307 { 308 Rune c1, c2; 309 310 for(;;) { 311 if(n1-- == 0) { 312 if(*s2 == 0) 313 return 0; 314 return -1; 315 } 316 c1 = *s1++; 317 c2 = *s2++; 318 if(c1 >= 'A' && c1 <= 'Z') 319 c1 = c1 - 'A' + 'a'; 320 if(c1 != c2) { 321 if(c1 > c2) 322 return 1; 323 return -1; 324 } 325 } 326 } 327 328 /* emalloc and copy */ 329 Rune* 330 _Strdup(Rune* s) 331 { 332 if(s == nil) 333 return nil; 334 return _Strndup(s, runestrlen(s)); 335 } 336 337 /* emalloc and copy n chars of s (assume s is at least that long), */ 338 /* and add 0 terminator. */ 339 /* Return nil if n==0. */ 340 Rune* 341 _Strndup(Rune* s, int n) 342 { 343 Rune* ans; 344 345 if(n <= 0) 346 return nil; 347 ans = _newstr(n); 348 memmove(ans, s, n*sizeof(Rune)); 349 ans[n] = 0; 350 return ans; 351 } 352 /* emalloc enough room for n Runes, plus 1 null terminator. */ 353 /* (Not initialized to anything.) */ 354 Rune* 355 _newstr(int n) 356 { 357 return (Rune*)emalloc((n+1)*sizeof(Rune)); 358 } 359 360 /* emalloc and copy s+t */ 361 Rune* 362 _Strdup2(Rune* s, Rune* t) 363 { 364 int ns, nt; 365 Rune* ans; 366 Rune* p; 367 368 ns = _Strlen(s); 369 nt = _Strlen(t); 370 if(ns+nt == 0) 371 return nil; 372 ans = _newstr(ns+nt); 373 p = _Stradd(ans, s, ns); 374 p = _Stradd(p, t, nt); 375 *p = 0; 376 return ans; 377 } 378 379 /* Return emalloc'd substring s[start:stop], */ 380 Rune* 381 _Strsubstr(Rune* s, int start, int stop) 382 { 383 Rune* t; 384 385 if(start == stop) 386 return nil; 387 t = _Strndup(s+start, stop-start); 388 return t; 389 } 390 391 /* Copy n chars to s1 from s2, and return s1+n */ 392 Rune* 393 _Stradd(Rune* s1, Rune* s2, int n) 394 { 395 if(n == 0) 396 return s1; 397 memmove(s1, s2, n*sizeof(Rune)); 398 return s1+n; 399 } 400 401 /* Like strtol, but converting from Rune* string */ 402 403 /*#define LONG_MAX 2147483647L */ 404 /*#define LONG_MIN -2147483648L */ 405 406 long 407 _Strtol(Rune* nptr, Rune** endptr, int base) 408 { 409 Rune* p; 410 long n, nn; 411 int c, ovfl, v, neg, ndig; 412 413 p = nptr; 414 neg = 0; 415 n = 0; 416 ndig = 0; 417 ovfl = 0; 418 419 /* 420 * White space 421 */ 422 for(;;p++){ 423 switch(*p){ 424 case ' ': 425 case '\t': 426 case '\n': 427 case '\f': 428 case '\r': 429 case '\v': 430 continue; 431 } 432 break; 433 } 434 435 /* 436 * Sign 437 */ 438 if(*p=='-' || *p=='+') 439 if(*p++ == '-') 440 neg = 1; 441 442 /* 443 * Base 444 */ 445 if(base==0){ 446 if(*p != '0') 447 base = 10; 448 else{ 449 base = 8; 450 if(p[1]=='x' || p[1]=='X'){ 451 p += 2; 452 base = 16; 453 } 454 } 455 }else if(base==16 && *p=='0'){ 456 if(p[1]=='x' || p[1]=='X') 457 p += 2; 458 }else if(base<0 || 36<base) 459 goto Return; 460 461 /* 462 * Non-empty sequence of digits 463 */ 464 for(;; p++,ndig++){ 465 c = *p; 466 v = base; 467 if('0'<=c && c<='9') 468 v = c - '0'; 469 else if('a'<=c && c<='z') 470 v = c - 'a' + 10; 471 else if('A'<=c && c<='Z') 472 v = c - 'A' + 10; 473 if(v >= base) 474 break; 475 nn = n*base + v; 476 if(nn < n) 477 ovfl = 1; 478 n = nn; 479 } 480 481 Return: 482 if(ndig == 0) 483 p = nptr; 484 if(endptr) 485 *endptr = p; 486 if(ovfl){ 487 if(neg) 488 return LONG_MIN; 489 return LONG_MAX; 490 } 491 if(neg) 492 return -n; 493 return n; 494 } 495 496 /* Convert buf[0:n], bytes whose character set is chset, */ 497 /* into a emalloc'd null-terminated Unicode string. */ 498 Rune* 499 toStr(uchar* buf, int n, int chset) 500 { 501 int i; 502 int m; 503 Rune ch; 504 Rune* ans; 505 506 switch(chset) { 507 case US_Ascii: 508 case ISO_8859_1: 509 ans = (Rune*)emalloc((n+1)*sizeof(Rune)); 510 for(i = 0; i < n; i++) 511 ans[i] = buf[i]; 512 ans[n] = 0; 513 break; 514 515 case UTF_8: 516 m = 0; 517 for(i = 0; i < n; ) { 518 i += chartorune(&ch, (char*)(buf+i)); 519 m++; 520 } 521 ans = (Rune*)emalloc((m+1)*sizeof(Rune)); 522 m = 0; 523 for(i = 0; i < n; ) { 524 i += chartorune(&ch, (char*)(buf+i)); 525 ans[m++] = ch; 526 } 527 ans[m] = 0; 528 break; 529 530 default: 531 ans = nil; 532 assert(0); 533 } 534 return ans; 535 } 536 537 /* Convert buf[0:n], Unicode characters, */ 538 /* into an emalloc'd null-terminated string in character set chset. */ 539 /* Use 0x80 for unconvertable characters. */ 540 uchar* 541 fromStr(Rune* buf, int n, int chset) 542 { 543 uchar* ans; 544 int i, lim, m; 545 Rune ch; 546 uchar* p; 547 uchar s[UTFmax]; 548 549 ans = nil; 550 switch(chset) { 551 case US_Ascii: 552 case ISO_8859_1: 553 ans = (uchar*)emalloc(n+1); 554 lim = (chset==US_Ascii)? 127 : 255; 555 for(i = 0; i < n; i++) { 556 ch = buf[i]; 557 if(ch > lim) 558 ch = 0x80; 559 ans[i] = ch; 560 } 561 ans[n] = 0; 562 break; 563 564 case UTF_8: 565 m = 0; 566 for(i = 0; i < n; i++) { 567 m += runetochar((char*)s, &buf[i]); 568 } 569 ans = (uchar*)emalloc(m+1); 570 p = ans; 571 for(i = 0; i < n; i++) 572 p += runetochar((char*)p, &buf[i]); 573 *p = 0; 574 break; 575 576 default: 577 assert(0); 578 } 579 return ans; 580 581 } 582 583 /* Convert n to emalloc'd String. */ 584 Rune* 585 _ltoStr(int n) 586 { 587 int m; 588 uchar buf[20]; 589 590 m = snprint((char*)buf, sizeof(buf), "%d", n); 591 return toStr(buf, m, US_Ascii); 592 }