manglegcc2.c (11626B)
1 /* 2 * gcc2 name demangler. 3 * 4 * gcc2 follows the C++ Annotated Reference Manual section 7.2.1 5 * name mangling description with a few changes. 6 * See gpcompare.texi, gxxint_15.html in this directory for the changes. 7 * 8 * Not implemented: 9 * unicode mangling 10 * renaming of operator functions 11 */ 12 /* 13 RULES TO ADD: 14 15 _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ 16 17 18 */ 19 #include <u.h> 20 #include <libc.h> 21 #include <bio.h> 22 #include <mach.h> 23 24 #define debug 0 25 26 typedef struct Chartab Chartab; 27 struct Chartab 28 { 29 char c; 30 char *s; 31 }; 32 33 static char* 34 chartabsearch(Chartab *ct, int c) 35 { 36 for(; ct->c; ct++) 37 if(ct->c == c) 38 return ct->s; 39 return nil; 40 } 41 42 static Chartab typetab[] = 43 { 44 'b', "bool", 45 'c', "char", 46 'd', "double", 47 'e', "...", 48 'f', "float", 49 'i', "int", 50 'J', "complex", 51 'l', "long", 52 'r', "long double", 53 's', "short", 54 'v', "void", 55 'w', "wchar_t", 56 'x', "long long", 57 0, 0 58 }; 59 60 static Chartab modifiertab[] = 61 { 62 'C', "const", 63 'S', "signed", /* means static for member functions */ 64 'U', "unsigned", 65 'V', "volatile", 66 67 'G', "garbage", /* no idea what this is */ 68 0, 0 69 }; 70 71 static char constructor[] = "constructor"; 72 static char destructor[] = "destructor"; 73 static char gconstructor[] = "$gconstructor"; /* global destructor */ 74 static char gdestructor[] = "$gdestructor"; /* global destructor */ 75 76 static char manglestarts[] = "123456789CFHQSUVt"; 77 78 static int gccname(char**, char**); 79 static char *demanglegcc2a(char*, char*); 80 static char *demanglegcc2b(char*, char*); 81 static char *demanglegcc2c(char*, char*); 82 static int gccnumber(char**, int*, int); 83 84 char* 85 demanglegcc2(char *s, char *buf) 86 { 87 char *name, *os, *p, *t; 88 int isfn, namelen; 89 90 91 /* 92 * Pick off some cases that seem not to fit the pattern. 93 */ 94 if((t = demanglegcc2a(s, buf)) != nil) 95 return t; 96 if((t = demanglegcc2b(s, buf)) != nil) 97 return t; 98 if((t = demanglegcc2c(s, buf)) != nil) 99 return t; 100 101 /* 102 * First, figure out whether this is a mangled name. 103 * The name begins with a short version of the name, then __. 104 * Of course, some C names begin with __ too, so the ultimate 105 * test is whether what follows __ looks reasonable. 106 * We use a test on the first letter instead. 107 * 108 * Constructors have no name - they begin __ (double underscore). 109 * Destructors break the rule - they begin _._ (underscore, dot, underscore). 110 */ 111 os = s; 112 isfn = 0; 113 if(memcmp(s, "_._", 3) == 0){ 114 isfn = 1; 115 name = destructor; 116 namelen = strlen(name); 117 s += 3; 118 }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){ 119 isfn = 1; 120 name = gdestructor; 121 namelen = strlen(name); 122 s += 13; 123 }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){ 124 isfn = 0; 125 name = gdestructor; 126 namelen = strlen(name); 127 s += 12; 128 }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){ 129 isfn = 1; 130 name = gconstructor; 131 namelen = strlen(name); 132 s += 13; 133 }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){ 134 isfn = 0; 135 name = gconstructor; 136 namelen = strlen(name); 137 s += 12; 138 }else{ 139 t = strstr(os, "__"); 140 if(t == nil) 141 return os; 142 do{ 143 s = t; 144 if(strchr(manglestarts, *(s+2))) 145 break; 146 }while((t = strstr(s+1, "__")) != nil); 147 148 name = os; 149 namelen = s - os; 150 if(namelen == 0){ 151 isfn = 1; 152 name = constructor; 153 namelen = strlen(name); 154 } 155 s += 2; 156 } 157 158 /* 159 * Now s points at the mangled crap (maybe). 160 * and name is the final element of the name. 161 */ 162 if(strchr(manglestarts, *s) == nil) 163 return os; 164 165 p = buf; 166 if(*s == 'F'){ 167 /* global function, no extra name pieces, just types */ 168 isfn = 1; 169 }else{ 170 /* parse extra name pieces */ 171 if(!gccname(&s, &p)){ 172 if(debug) 173 fprint(2, "parsename %s: %r\n", s); 174 return os; 175 } 176 177 /* if we have a constructor or destructor, try to use the C++ name */ 178 t = nil; 179 if(name == constructor || name == destructor){ 180 *p = 0; 181 t = strrchr(buf, ':'); 182 if(t) 183 t++; 184 else 185 t = buf; 186 } 187 strcpy(p, "::"); 188 p += 2; 189 if(t){ 190 namelen = strlen(t)-2; 191 if(name == destructor) 192 *p++ = '~'; 193 name = t; 194 } 195 } 196 if(p >= buf+2 && memcmp(p-2, "::", 2) == 0 && *(p-3) == ')') 197 p -= 2; 198 memmove(p, name, namelen); 199 p += namelen; 200 201 if(*s == 'F'){ 202 /* might be from above, or might follow name pieces */ 203 s++; 204 isfn = 1; 205 } 206 207 /* the rest of the name is argument types - could skip this */ 208 if(*s || isfn){ 209 *p++ = '('; 210 while(*s != 0 && *s != '_'){ 211 if(!gccname(&s, &p)) 212 break; 213 *p++ = ','; 214 } 215 if(*(p-1) == ',') 216 p--; 217 *p++ = ')'; 218 } 219 220 if(*s == '_'){ 221 /* return type (left over from H) */ 222 } 223 224 *p = 0; 225 return buf; 226 } 227 228 /* 229 * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ 230 * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos 231 * (maybe the funny syntax means they are private) 232 */ 233 static char* 234 demanglegcc2a(char *s, char *buf) 235 { 236 char *p; 237 238 if(*s != '_' || strchr(manglestarts, *(s+1)) == nil) 239 return nil; 240 p = buf; 241 s++; 242 if(!gccname(&s, &p)) 243 return nil; 244 if(*s != '.') 245 return nil; 246 s++; 247 strcpy(p, "::"); 248 p += 2; 249 strcpy(p, s); 250 return buf; 251 } 252 253 /* 254 * _tfb => type info for bool 255 * __vt_7ostream => vtbl for ostream 256 */ 257 static char* 258 demanglegcc2b(char *s, char *buf) 259 { 260 char *p; 261 char *t; 262 263 if(memcmp(s, "__ti", 4) == 0){ 264 t = "$typeinfo"; 265 s += 4; 266 }else if(memcmp(s, "__tf", 4) == 0){ 267 t = "$typeinfofn"; 268 s += 4; 269 }else if(memcmp(s, "__vt_", 5) == 0){ 270 t = "$vtbl"; 271 s += 5; 272 }else 273 return nil; 274 275 p = buf; 276 for(;;){ 277 if(*s == 0 || !gccname(&s, &p)) 278 return nil; 279 if(*s == 0) 280 break; 281 if(*s != '.' && *s != '$') 282 return nil; 283 strcpy(p, "::"); 284 p += 2; 285 s++; 286 } 287 strcpy(p, "::"); 288 p += 2; 289 strcpy(p, t); 290 return buf; 291 } 292 293 /* 294 * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream 295 */ 296 static char* 297 demanglegcc2c(char *s, char *buf) 298 { 299 int n; 300 char *p; 301 302 if(memcmp(s, "__thunk_", 8) != 0) 303 return nil; 304 s += 8; 305 if(!gccnumber(&s, &n, 1)) 306 return nil; 307 if(memcmp(s, "__._", 4) != 0) /* might as well be morse code */ 308 return nil; 309 s += 4; 310 p = buf; 311 if(!gccname(&s, &p)) 312 return nil; 313 strcpy(p, "::$thunk"); 314 return buf; 315 } 316 317 /* 318 * Parse a number, a non-empty run of digits. 319 * If many==0, then only one digit is used, even 320 * if it is followed by more. When we need a big 321 * number in a one-digit slot, it gets bracketed by underscores. 322 */ 323 static int 324 gccnumber(char **ps, int *pn, int many) 325 { 326 char *s; 327 int n, eatunderscore; 328 329 s = *ps; 330 eatunderscore = 0; 331 if(!many && *s == '_'){ 332 many = 1; 333 s++; 334 eatunderscore = 1; 335 } 336 if(!isdigit((uchar)*s)){ 337 bad: 338 werrstr("bad number %.20s", *ps); 339 return 0; 340 } 341 if(many) 342 n = strtol(s, &s, 10); 343 else 344 n = *s++ - '0'; 345 if(eatunderscore){ 346 if(*s != '_') 347 goto bad; 348 s++; 349 } 350 *ps = s; 351 *pn = n; 352 return 1; 353 } 354 355 /* 356 * Pick apart the next mangled name section. 357 * Names and types are treated as the same. 358 * Let's see how far we can go before that becomes a problem. 359 */ 360 static int 361 gccname(char **ps, char **pp) 362 { 363 int i, n, m, val; 364 char *os, *s, *t, *p, *p0, *p1; 365 366 s = *ps; 367 os = s; 368 p = *pp; 369 370 /* print("\tgccname: %s\n", s); */ 371 372 /* basic types */ 373 if((t = chartabsearch(typetab, *s)) != nil){ 374 s++; 375 strcpy(p, t); 376 p += strlen(t); 377 goto out; 378 } 379 380 /* modifiers */ 381 if((t = chartabsearch(modifiertab, *s)) != nil){ 382 s++; 383 if(!gccname(&s, &p)) 384 return 0; 385 /* 386 * These don't end up in the right place 387 * and i don't care anyway 388 * (AssertHeld__C17ReaderWriterMutex) 389 */ 390 /* 391 *p++ = ' '; 392 strcpy(p, t); 393 p += strlen(p); 394 */ 395 goto out; 396 } 397 398 switch(*s){ 399 default: 400 bad: 401 if(debug) 402 fprint(2, "gccname: %s (%s)\n", os, s); 403 werrstr("bad name %.20s", s); 404 return 0; 405 406 case '1': case '2': case '3': case '4': /* length-prefixed string */ 407 case '5': case '6': case '7': case '8': case '9': 408 if(!gccnumber(&s, &n, 1)) 409 return 0; 410 memmove(p, s, n); 411 p += n; 412 s += n; 413 break; 414 415 case 'A': /* array */ 416 t = s; 417 s++; 418 if(!gccnumber(&s, &n, 1)) 419 return 0; 420 if(*s != '_'){ 421 werrstr("bad array %.20s", t); 422 return 0; 423 } 424 s++; 425 sprint(p, "array[%d] ", n); 426 p += strlen(p); 427 break; 428 429 case 'F': /* function */ 430 t = s; 431 s++; 432 strcpy(p, "fn("); 433 p += 3; 434 /* arguments */ 435 while(*s && *s != '_') 436 if(!gccname(&s, &p)) 437 return 0; 438 if(*s != '_'){ 439 werrstr("unexpected end in function: %s", t); 440 return 0; 441 } 442 s++; 443 strcpy(p, " => "); 444 p += 4; 445 /* return type */ 446 if(!gccname(&s, &p)) 447 return 0; 448 *p++ = ')'; 449 break; 450 451 case 'H': /* template specialization */ 452 if(memcmp(s-2, "__", 2) != 0) 453 fprint(2, "wow: %s\n", s-2); 454 t = s; 455 s++; 456 if(!gccnumber(&s, &n, 0)) 457 return 0; 458 p0 = p; 459 /* template arguments */ 460 *p++ = '<'; 461 for(i=0; i<n; i++){ 462 val = 1; 463 if(*s == 'Z'){ /* argument is a type, not value */ 464 val = 0; 465 s++; 466 } 467 if(!gccname(&s, &p)) 468 return 0; 469 if(val){ 470 if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */ 471 return 0; 472 sprint(p, "=%d", m); 473 p += strlen(p); 474 } 475 if(i+1<n) 476 *p++ = ','; 477 } 478 *p++ = '>'; 479 if(*s != '_'){ 480 werrstr("bad template %s", t); 481 return 0; 482 } 483 s++; 484 485 /* 486 * Can't seem to tell difference between a qualifying name 487 * and arguments. Not sure which is which. It appears that if 488 * you get a name, use it, otherwise look for types. 489 * The G type qualifier appears to have no effect other than 490 * turning an ambiguous name into a definite type. 491 * 492 * SetFlag__H1Zb_P15FlagSettingMode_v 493 * => void SetFlag<bool>(FlagSettingMode *) 494 * SetFlag__H1Zb_15FlagSettingMode_v 495 * => void FlagSettingMode::SetFlag<bool>() 496 * SetFlag__H1Zb_G15FlagSettingMode_v 497 * => void SetFlag<bool>(FlagSettingMode) 498 */ 499 if(strchr("ACFGPRSUVX", *s)){ 500 /* args */ 501 t = s; 502 p1 = p; 503 *p++ = '('; 504 while(*s != '_'){ 505 if(*s == 0 || !gccname(&s, &p)){ 506 werrstr("bad H args: %s", t); 507 return 0; 508 } 509 } 510 *p++ = ')'; 511 s++; 512 }else{ 513 p1 = p; 514 /* name */ 515 if(!gccname(&s, &p)) 516 return 0; 517 } 518 /* 519 * Need to do some rearrangement of <> () and names here. 520 * Doesn't matter since we strip out the <> and () anyway. 521 */ 522 break; 523 524 case 'M': /* M1S: pointer to member */ 525 if(*(s+1) != '1' || *(s+2) != 'S') 526 goto bad; 527 s += 3; 528 strcpy(p, "mptr "); 529 p += 5; 530 if(!gccname(&s, &p)) 531 return 0; 532 break; 533 534 case 'N': /* multiply-repeated type */ 535 s++; 536 if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0)) 537 return 0; 538 sprint(p, "T%dx%d", m, n); 539 p += strlen(p); 540 break; 541 542 case 'P': /* pointer */ 543 s++; 544 strcpy(p, "ptr "); 545 p += 4; 546 if(!gccname(&s, &p)) 547 return 0; 548 break; 549 550 case 'Q': /* qualified name */ 551 s++; 552 if(!gccnumber(&s, &n, 0)) 553 return 0; 554 for(i=0; i<n; i++){ 555 if(!gccname(&s, &p)){ 556 werrstr("in hierarchy: %r"); 557 return 0; 558 } 559 if(i+1 < n){ 560 strcpy(p, "::"); 561 p += 2; 562 } 563 } 564 break; 565 566 case 'R': /* reference */ 567 s++; 568 strcpy(p, "ref "); 569 p += 4; 570 if(!gccname(&s, &p)) 571 return 0; 572 break; 573 574 case 't': /* class template instantiation */ 575 /* should share code with case 'H' */ 576 t = s; 577 s++; 578 if(!gccname(&s, &p)) 579 return 0; 580 if(!gccnumber(&s, &n, 0)) 581 return 0; 582 p0 = p; 583 /* template arguments */ 584 *p++ = '<'; 585 for(i=0; i<n; i++){ 586 val = 1; 587 if(*s == 'Z'){ /* argument is a type, not value */ 588 val = 0; 589 s++; 590 } 591 if(!gccname(&s, &p)) 592 return 0; 593 if(val){ 594 if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */ 595 return 0; 596 sprint(p, "=%d", m); 597 p += strlen(p); 598 } 599 if(i+1<n) 600 *p++ = ','; 601 } 602 *p++ = '>'; 603 break; 604 605 case 'T': /* once-repeated type */ 606 s++; 607 if(!gccnumber(&s, &n, 0)) 608 return 0; 609 sprint(p, "T%d", n); 610 p += strlen(p); 611 break; 612 613 case 'X': /* type parameter in 'H' */ 614 if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2))) 615 goto bad; 616 memmove(p, s, 3); 617 p += 3; 618 s += 3; 619 break; 620 } 621 622 USED(p1); 623 USED(p0); 624 625 out: 626 *ps = s; 627 *pp = p; 628 return 1; 629 }