plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

manglegcc2.c (11626B)


      1 /*
      2  * gcc2 name demangler.
      3  *
      4  * gcc2 follows the C++ Annotated Reference Manual section 7.2.1
      5  * name mangling description with a few changes.
      6  * See gpcompare.texi, gxxint_15.html in this directory for the changes.
      7  *
      8  * Not implemented:
      9  *	unicode mangling
     10  *	renaming of operator functions
     11  */
     12 /*
     13 RULES TO ADD:
     14 
     15 _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
     16 
     17 
     18 */
     19 #include <u.h>
     20 #include <libc.h>
     21 #include <bio.h>
     22 #include <mach.h>
     23 
     24 #define debug 0
     25 
     26 typedef struct Chartab Chartab;
     27 struct Chartab
     28 {
     29 	char c;
     30 	char *s;
     31 };
     32 
     33 static char*
     34 chartabsearch(Chartab *ct, int c)
     35 {
     36 	for(; ct->c; ct++)
     37 		if(ct->c == c)
     38 			return ct->s;
     39 	return nil;
     40 }
     41 
     42 static Chartab typetab[] =
     43 {
     44 	'b',	"bool",
     45 	'c',	"char",
     46 	'd',	"double",
     47 	'e',	"...",
     48 	'f',	"float",
     49 	'i',	"int",
     50 	'J',	"complex",
     51 	'l',	"long",
     52 	'r',	"long double",
     53 	's',	"short",
     54 	'v',	"void",
     55 	'w',	"wchar_t",
     56 	'x',	"long long",
     57 	0, 0
     58 };
     59 
     60 static Chartab modifiertab[] =
     61 {
     62 	'C',	"const",
     63 	'S',	"signed",		/* means static for member functions */
     64 	'U',	"unsigned",
     65 	'V',	"volatile",
     66 
     67 	'G',	"garbage",	/* no idea what this is */
     68 	0, 0
     69 };
     70 
     71 static char constructor[] = "constructor";
     72 static char destructor[] = "destructor";
     73 static char gconstructor[] = "$gconstructor";	/* global destructor */
     74 static char gdestructor[] = "$gdestructor";	/* global destructor */
     75 
     76 static char manglestarts[] = "123456789CFHQSUVt";
     77 
     78 static int gccname(char**, char**);
     79 static char *demanglegcc2a(char*, char*);
     80 static char *demanglegcc2b(char*, char*);
     81 static char *demanglegcc2c(char*, char*);
     82 static int gccnumber(char**, int*, int);
     83 
     84 char*
     85 demanglegcc2(char *s, char *buf)
     86 {
     87 	char *name, *os, *p, *t;
     88 	int isfn, namelen;
     89 
     90 
     91 	/*
     92 	 * Pick off some cases that seem not to fit the pattern.
     93 	 */
     94 	if((t = demanglegcc2a(s, buf)) != nil)
     95 		return t;
     96 	if((t = demanglegcc2b(s, buf)) != nil)
     97 		return t;
     98 	if((t = demanglegcc2c(s, buf)) != nil)
     99 		return t;
    100 
    101 	/*
    102 	 * First, figure out whether this is a mangled name.
    103 	 * The name begins with a short version of the name, then __.
    104 	 * Of course, some C names begin with __ too, so the ultimate
    105 	 * test is whether what follows __ looks reasonable.
    106 	 * We use a test on the first letter instead.
    107 	 *
    108 	 * Constructors have no name - they begin __ (double underscore).
    109 	 * Destructors break the rule - they begin _._ (underscore, dot, underscore).
    110 	 */
    111 	os = s;
    112 	isfn = 0;
    113 	if(memcmp(s, "_._", 3) == 0){
    114 		isfn = 1;
    115 		name = destructor;
    116 		namelen = strlen(name);
    117 		s += 3;
    118 	}else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){
    119 		isfn = 1;
    120 		name = gdestructor;
    121 		namelen = strlen(name);
    122 		s += 13;
    123 	}else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){
    124 		isfn = 0;
    125 		name = gdestructor;
    126 		namelen = strlen(name);
    127 		s += 12;
    128 	}else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){
    129 		isfn = 1;
    130 		name = gconstructor;
    131 		namelen = strlen(name);
    132 		s += 13;
    133 	}else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){
    134 		isfn = 0;
    135 		name = gconstructor;
    136 		namelen = strlen(name);
    137 		s += 12;
    138 	}else{
    139 		t = strstr(os, "__");
    140 		if(t == nil)
    141 			return os;
    142 		do{
    143 			s = t;
    144 			if(strchr(manglestarts, *(s+2)))
    145 				break;
    146 		}while((t = strstr(s+1, "__")) != nil);
    147 
    148 		name = os;
    149 		namelen = s - os;
    150 		if(namelen == 0){
    151 			isfn = 1;
    152 			name = constructor;
    153 			namelen = strlen(name);
    154 		}
    155 		s += 2;
    156 	}
    157 
    158 	/*
    159 	 * Now s points at the mangled crap (maybe).
    160 	 * and name is the final element of the name.
    161 	 */
    162 	if(strchr(manglestarts, *s) == nil)
    163 		return os;
    164 
    165 	p = buf;
    166 	if(*s == 'F'){
    167 		/* global function, no extra name pieces, just types */
    168 		isfn = 1;
    169 	}else{
    170 		/* parse extra name pieces */
    171 		if(!gccname(&s, &p)){
    172 			if(debug)
    173 				fprint(2, "parsename %s: %r\n", s);
    174 			return os;
    175 		}
    176 
    177 		/* if we have a constructor or destructor, try to use the C++ name */
    178 		t = nil;
    179 		if(name == constructor || name == destructor){
    180 			*p = 0;
    181 			t = strrchr(buf, ':');
    182 			if(t)
    183 				t++;
    184 			else
    185 				t = buf;
    186 		}
    187 		strcpy(p, "::");
    188 		p += 2;
    189 		if(t){
    190 			namelen = strlen(t)-2;
    191 			if(name == destructor)
    192 				*p++ = '~';
    193 			name = t;
    194 		}
    195 	}
    196 	if(p >= buf+2 && memcmp(p-2, "::", 2) == 0 && *(p-3) == ')')
    197 		p -= 2;
    198 	memmove(p, name, namelen);
    199 	p += namelen;
    200 
    201 	if(*s == 'F'){
    202 		/* might be from above, or might follow name pieces */
    203 		s++;
    204 		isfn = 1;
    205 	}
    206 
    207 	/* the rest of the name is argument types - could skip this */
    208 	if(*s || isfn){
    209 		*p++ = '(';
    210 		while(*s != 0 && *s != '_'){
    211 			if(!gccname(&s, &p))
    212 				break;
    213 			*p++ = ',';
    214 		}
    215 		if(*(p-1) == ',')
    216 			p--;
    217 		*p++ = ')';
    218 	}
    219 
    220 	if(*s == '_'){
    221 		/* return type (left over from H) */
    222 	}
    223 
    224 	*p = 0;
    225 	return buf;
    226 }
    227 
    228 /*
    229  * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
    230  * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos
    231  * (maybe the funny syntax means they are private)
    232  */
    233 static char*
    234 demanglegcc2a(char *s, char *buf)
    235 {
    236 	char *p;
    237 
    238 	if(*s != '_' || strchr(manglestarts, *(s+1)) == nil)
    239 		return nil;
    240 	p = buf;
    241 	s++;
    242 	if(!gccname(&s, &p))
    243 		return nil;
    244 	if(*s != '.')
    245 		return nil;
    246 	s++;
    247 	strcpy(p, "::");
    248 	p += 2;
    249 	strcpy(p, s);
    250 	return buf;
    251 }
    252 
    253 /*
    254  * _tfb => type info for bool
    255  * __vt_7ostream => vtbl for ostream
    256  */
    257 static char*
    258 demanglegcc2b(char *s, char *buf)
    259 {
    260 	char *p;
    261 	char *t;
    262 
    263 	if(memcmp(s, "__ti", 4) == 0){
    264 		t = "$typeinfo";
    265 		s += 4;
    266 	}else if(memcmp(s, "__tf", 4) == 0){
    267 		t = "$typeinfofn";
    268 		s += 4;
    269 	}else if(memcmp(s, "__vt_", 5) == 0){
    270 		t = "$vtbl";
    271 		s += 5;
    272 	}else
    273 		return nil;
    274 
    275 	p = buf;
    276 	for(;;){
    277 		if(*s == 0 || !gccname(&s, &p))
    278 			return nil;
    279 		if(*s == 0)
    280 			break;
    281 		if(*s != '.' && *s != '$')
    282 			return nil;
    283 		strcpy(p, "::");
    284 		p += 2;
    285 		s++;
    286 	}
    287 	strcpy(p, "::");
    288 	p += 2;
    289 	strcpy(p, t);
    290 	return buf;
    291 }
    292 
    293 /*
    294  * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream
    295  */
    296 static char*
    297 demanglegcc2c(char *s, char *buf)
    298 {
    299 	int n;
    300 	char *p;
    301 
    302 	if(memcmp(s, "__thunk_", 8) != 0)
    303 		return nil;
    304 	s += 8;
    305 	if(!gccnumber(&s, &n, 1))
    306 		return nil;
    307 	if(memcmp(s, "__._", 4) != 0)	/* might as well be morse code */
    308 		return nil;
    309 	s += 4;
    310 	p = buf;
    311 	if(!gccname(&s, &p))
    312 		return nil;
    313 	strcpy(p, "::$thunk");
    314 	return buf;
    315 }
    316 
    317 /*
    318  * Parse a number, a non-empty run of digits.
    319  * If many==0, then only one digit is used, even
    320  * if it is followed by more.  When we need a big
    321  * number in a one-digit slot, it gets bracketed by underscores.
    322  */
    323 static int
    324 gccnumber(char **ps, int *pn, int many)
    325 {
    326 	char *s;
    327 	int n, eatunderscore;
    328 
    329 	s = *ps;
    330 	eatunderscore = 0;
    331 	if(!many && *s == '_'){
    332 		many = 1;
    333 		s++;
    334 		eatunderscore = 1;
    335 	}
    336 	if(!isdigit((uchar)*s)){
    337 	bad:
    338 		werrstr("bad number %.20s", *ps);
    339 		return 0;
    340 	}
    341 	if(many)
    342 		n = strtol(s, &s, 10);
    343 	else
    344 		n = *s++ - '0';
    345 	if(eatunderscore){
    346 		if(*s != '_')
    347 			goto bad;
    348 		s++;
    349 	}
    350 	*ps = s;
    351 	*pn = n;
    352 	return 1;
    353 }
    354 
    355 /*
    356  * Pick apart the next mangled name section.
    357  * Names and types are treated as the same.
    358  * Let's see how far we can go before that becomes a problem.
    359  */
    360 static int
    361 gccname(char **ps, char **pp)
    362 {
    363 	int i, n, m, val;
    364 	char *os, *s, *t, *p, *p0, *p1;
    365 
    366 	s = *ps;
    367 	os = s;
    368 	p = *pp;
    369 
    370 /*	print("\tgccname: %s\n", s); */
    371 
    372 	/* basic types */
    373 	if((t = chartabsearch(typetab, *s)) != nil){
    374 		s++;
    375 		strcpy(p, t);
    376 		p += strlen(t);
    377 		goto out;
    378 	}
    379 
    380 	/* modifiers */
    381 	if((t = chartabsearch(modifiertab, *s)) != nil){
    382 		s++;
    383 		if(!gccname(&s, &p))
    384 			return 0;
    385 		/*
    386 		 * These don't end up in the right place
    387 		 * and i don't care anyway
    388 		 * (AssertHeld__C17ReaderWriterMutex)
    389 		 */
    390 		/*
    391 		*p++ = ' ';
    392 		strcpy(p, t);
    393 		p += strlen(p);
    394 		*/
    395 		goto out;
    396 	}
    397 
    398 	switch(*s){
    399 	default:
    400 	bad:
    401 		if(debug)
    402 			fprint(2, "gccname: %s (%s)\n", os, s);
    403 		werrstr("bad name %.20s", s);
    404 		return 0;
    405 
    406 	case '1': case '2': case '3': case '4':	/* length-prefixed string */
    407 	case '5': case '6': case '7': case '8': case '9':
    408 		if(!gccnumber(&s, &n, 1))
    409 			return 0;
    410 		memmove(p, s, n);
    411 		p += n;
    412 		s += n;
    413 		break;
    414 
    415 	case 'A':	/* array */
    416 		t = s;
    417 		s++;
    418 		if(!gccnumber(&s, &n, 1))
    419 			return 0;
    420 		if(*s != '_'){
    421 			werrstr("bad array %.20s", t);
    422 			return 0;
    423 		}
    424 		s++;
    425 		sprint(p, "array[%d] ", n);
    426 		p += strlen(p);
    427 		break;
    428 
    429 	case 'F':	/* function */
    430 		t = s;
    431 		s++;
    432 		strcpy(p, "fn(");
    433 		p += 3;
    434 		/* arguments */
    435 		while(*s && *s != '_')
    436 			if(!gccname(&s, &p))
    437 				return 0;
    438 		if(*s != '_'){
    439 			werrstr("unexpected end in function: %s", t);
    440 			return 0;
    441 		}
    442 		s++;
    443 		strcpy(p, " => ");
    444 		p += 4;
    445 		/* return type */
    446 		if(!gccname(&s, &p))
    447 			return 0;
    448 		*p++ = ')';
    449 		break;
    450 
    451 	case 'H':	/* template specialization */
    452 		if(memcmp(s-2, "__", 2) != 0)
    453 			fprint(2, "wow: %s\n", s-2);
    454 		t = s;
    455 		s++;
    456 		if(!gccnumber(&s, &n, 0))
    457 			return 0;
    458 		p0 = p;
    459 		/* template arguments */
    460 		*p++ = '<';
    461 		for(i=0; i<n; i++){
    462 			val = 1;
    463 			if(*s == 'Z'){	/* argument is a type, not value */
    464 				val = 0;
    465 				s++;
    466 			}
    467 			if(!gccname(&s, &p))
    468 				return 0;
    469 			if(val){
    470 				if(!gccnumber(&s, &m, 1))	/* gccnumber: 1 or 0? */
    471 					return 0;
    472 				sprint(p, "=%d", m);
    473 				p += strlen(p);
    474 			}
    475 			if(i+1<n)
    476 				*p++ = ',';
    477 		}
    478 		*p++ = '>';
    479 		if(*s != '_'){
    480 			werrstr("bad template %s", t);
    481 			return 0;
    482 		}
    483 		s++;
    484 
    485 		/*
    486 		 * Can't seem to tell difference between a qualifying name
    487 		 * and arguments.  Not sure which is which.  It appears that if
    488 		 * you get a name, use it, otherwise look for types.
    489 		 * The G type qualifier appears to have no effect other than
    490 		 * turning an ambiguous name into a definite type.
    491 		 *
    492 		 *	SetFlag__H1Zb_P15FlagSettingMode_v
    493 		 *	=>	void SetFlag<bool>(FlagSettingMode *)
    494 		 *	SetFlag__H1Zb_15FlagSettingMode_v
    495 		 *	=>	void FlagSettingMode::SetFlag<bool>()
    496 		 *	SetFlag__H1Zb_G15FlagSettingMode_v
    497 		 *	=>	void SetFlag<bool>(FlagSettingMode)
    498 		 */
    499 		if(strchr("ACFGPRSUVX", *s)){
    500 			/* args */
    501 			t = s;
    502 			p1 = p;
    503 			*p++ = '(';
    504 			while(*s != '_'){
    505 				if(*s == 0 || !gccname(&s, &p)){
    506 					werrstr("bad H args: %s", t);
    507 					return 0;
    508 				}
    509 			}
    510 			*p++ = ')';
    511 			s++;
    512 		}else{
    513 			p1 = p;
    514 			/* name */
    515 			if(!gccname(&s, &p))
    516 				return 0;
    517 		}
    518 		/*
    519 		 * Need to do some rearrangement of <> () and names here.
    520 		 * Doesn't matter since we strip out the <> and () anyway.
    521 		 */
    522 		break;
    523 
    524 	case 'M':	/* M1S: pointer to member */
    525 		if(*(s+1) != '1' || *(s+2) != 'S')
    526 			goto bad;
    527 		s += 3;
    528 		strcpy(p, "mptr ");
    529 		p += 5;
    530 		if(!gccname(&s, &p))
    531 			return 0;
    532 		break;
    533 
    534 	case 'N':	/* multiply-repeated type */
    535 		s++;
    536 		if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0))
    537 			return 0;
    538 		sprint(p, "T%dx%d", m, n);
    539 		p += strlen(p);
    540 		break;
    541 
    542 	case 'P':	/* pointer */
    543 		s++;
    544 		strcpy(p, "ptr ");
    545 		p += 4;
    546 		if(!gccname(&s, &p))
    547 			return 0;
    548 		break;
    549 
    550 	case 'Q':	/* qualified name */
    551 		s++;
    552 		if(!gccnumber(&s, &n, 0))
    553 			return 0;
    554 		for(i=0; i<n; i++){
    555 			if(!gccname(&s, &p)){
    556 				werrstr("in hierarchy: %r");
    557 				return 0;
    558 			}
    559 			if(i+1 < n){
    560 				strcpy(p, "::");
    561 				p += 2;
    562 			}
    563 		}
    564 		break;
    565 
    566 	case 'R':	/* reference */
    567 		s++;
    568 		strcpy(p, "ref ");
    569 		p += 4;
    570 		if(!gccname(&s, &p))
    571 			return 0;
    572 		break;
    573 
    574 	case 't':	/* class template instantiation */
    575 		/* should share code with case 'H' */
    576 		t = s;
    577 		s++;
    578 		if(!gccname(&s, &p))
    579 			return 0;
    580 		if(!gccnumber(&s, &n, 0))
    581 			return 0;
    582 		p0 = p;
    583 		/* template arguments */
    584 		*p++ = '<';
    585 		for(i=0; i<n; i++){
    586 			val = 1;
    587 			if(*s == 'Z'){	/* argument is a type, not value */
    588 				val = 0;
    589 				s++;
    590 			}
    591 			if(!gccname(&s, &p))
    592 				return 0;
    593 			if(val){
    594 				if(!gccnumber(&s, &m, 1))	/* gccnumber: 1 or 0? */
    595 					return 0;
    596 				sprint(p, "=%d", m);
    597 				p += strlen(p);
    598 			}
    599 			if(i+1<n)
    600 				*p++ = ',';
    601 		}
    602 		*p++ = '>';
    603 		break;
    604 
    605 	case 'T':	/* once-repeated type */
    606 		s++;
    607 		if(!gccnumber(&s, &n, 0))
    608 			return 0;
    609 		sprint(p, "T%d", n);
    610 		p += strlen(p);
    611 		break;
    612 
    613 	case 'X':	/* type parameter in 'H' */
    614 		if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2)))
    615 			goto bad;
    616 		memmove(p, s, 3);
    617 		p += 3;
    618 		s += 3;
    619 		break;
    620 	}
    621 
    622 	USED(p1);
    623 	USED(p0);
    624 
    625 out:
    626 	*ps = s;
    627 	*pp = p;
    628 	return 1;
    629 }