plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

comp.c (4240B)


      1 #include	"grep.h"
      2 
      3 /*
      4  * incremental compiler.
      5  * add the branch c to the
      6  * state s.
      7  */
      8 void
      9 increment(State *s, int c)
     10 {
     11 	int i;
     12 	State *t, **tt;
     13 	Re *re1, *re2;
     14 
     15 	nfollow = 0;
     16 	gen++;
     17 	matched = 0;
     18 	for(i=0; i<s->count; i++)
     19 		fol1(s->re[i], c);
     20 	qsort(follow, nfollow, sizeof(*follow), fcmp);
     21 	for(tt=&state0; t = *tt;) {
     22 		if(t->count > nfollow) {
     23 			tt = &t->linkleft;
     24 			goto cont;
     25 		}
     26 		if(t->count < nfollow) {
     27 			tt = &t->linkright;
     28 			goto cont;
     29 		}
     30 		for(i=0; i<nfollow; i++) {
     31 			re1 = t->re[i];
     32 			re2 = follow[i];
     33 			if(re1 > re2) {
     34 				tt = &t->linkleft;
     35 				goto cont;
     36 			}
     37 			if(re1 < re2) {
     38 				tt = &t->linkright;
     39 				goto cont;
     40 			}
     41 		}
     42 		if(!!matched && !t->match) {
     43 			tt = &t->linkleft;
     44 			goto cont;
     45 		}
     46 		if(!matched && !!t->match) {
     47 			tt = &t->linkright;
     48 			goto cont;
     49 		}
     50 		s->next[c] = t;
     51 		return;
     52 	cont:;
     53 	}
     54 
     55 	t = sal(nfollow);
     56 	*tt = t;
     57 	for(i=0; i<nfollow; i++) {
     58 		re1 = follow[i];
     59 		t->re[i] = re1;
     60 	}
     61 	s->next[c] = t;
     62 	t->match = matched;
     63 }
     64 
     65 int
     66 fcmp(const void *va, const void *vb)
     67 {
     68 	Re **aa, **bb;
     69 	Re *a, *b;
     70 
     71 	aa = (Re**)va;
     72 	bb = (Re**)vb;
     73 	a = *aa;
     74 	b = *bb;
     75 	if(a > b)
     76 		return 1;
     77 	if(a < b)
     78 		return -1;
     79 	return 0;
     80 }
     81 
     82 void
     83 fol1(Re *r, int c)
     84 {
     85 	Re *r1;
     86 
     87 loop:
     88 	if(r->gen == gen)
     89 		return;
     90 	if(nfollow >= maxfollow)
     91 		error("nfollow");
     92 	r->gen = gen;
     93 	switch(r->type) {
     94 	default:
     95 		error("fol1");
     96 
     97 	case Tcase:
     98 		if(c >= 0 && c < 256)
     99 		if(r1 = r->u.cases[c])
    100 			follow[nfollow++] = r1;
    101 		if(r = r->next)
    102 			goto loop;
    103 		break;
    104 
    105 	case Talt:
    106 	case Tor:
    107 		fol1(r->u.alt, c);
    108 		r = r->next;
    109 		goto loop;
    110 
    111 	case Tbegin:
    112 		if(c == '\n' || c == Cbegin)
    113 			follow[nfollow++] = r->next;
    114 		break;
    115 
    116 	case Tend:
    117 		if(c == '\n') {
    118 			if(r->next == 0) {
    119 				matched = 1;
    120 				break;
    121 			}
    122 			r = r->next;
    123 			goto loop;
    124 		}
    125 		break;
    126 
    127 	case Tclass:
    128 		if(c >= r->u.x.lo && c <= r->u.x.hi)
    129 			follow[nfollow++] = r->next;
    130 		break;
    131 	}
    132 }
    133 
    134 Rune	tab1[] =
    135 {
    136 	0x007f,
    137 	0x07ff
    138 };
    139 Rune	tab2[] =
    140 {
    141 	0x003f,
    142 	0x0fff
    143 };
    144 
    145 Re2
    146 rclass(Rune p0, Rune p1)
    147 {
    148 	char xc0[6], xc1[6];
    149 	int i, n, m;
    150 	Re2 x;
    151 
    152 	if(p0 > p1)
    153 		return re2char(0xff, 0xff);	/* no match */
    154 
    155 	/*
    156 	 * bust range into same length
    157 	 * character sequences
    158 	 */
    159 	for(i=0; i<nelem(tab1); i++) {
    160 		m = tab1[i];
    161 		if(p0 <= m && p1 > m)
    162 			return re2or(rclass(p0, m), rclass(m+1, p1));
    163 	}
    164 
    165 	/*
    166 	 * bust range into part of a single page
    167 	 * or into full pages
    168 	 */
    169 	for(i=0; i<nelem(tab2); i++) {
    170 		m = tab2[i];
    171 		if((p0 & ~m) != (p1 & ~m)) {
    172 			if((p0 & m) != 0)
    173 				return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1));
    174 			if((p1 & m) != m)
    175 				return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1));
    176 		}
    177 	}
    178 
    179 	n = runetochar(xc0, &p0);
    180 	i = runetochar(xc1, &p1);
    181 	if(i != n)
    182 		error("length");
    183 
    184 	x = re2char(xc0[0], xc1[0]);
    185 	for(i=1; i<n; i++)
    186 		x = re2cat(x, re2char(xc0[i], xc1[i]));
    187 	return x;
    188 }
    189 
    190 int
    191 pcmp(const void *va, const void *vb)
    192 {
    193 	int n;
    194 	Rune *a, *b;
    195 
    196 	a = (Rune*)va;
    197 	b = (Rune*)vb;
    198 
    199 	n = a[0] - b[0];
    200 	if(n)
    201 		return n;
    202 	return a[1] - b[1];
    203 }
    204 
    205 /*
    206  * convert character chass into
    207  * run-pair ranges of matches.
    208  * this is 10646/utf specific and
    209  * needs to be changed for some
    210  * other input character set.
    211  * this is the key to a fast
    212  * regular search of characters
    213  * by looking at sequential bytes.
    214  */
    215 Re2
    216 re2class(char *s)
    217 {
    218 	Rune pairs[200], *p, *q, ov;
    219 	int nc;
    220 	Re2 x;
    221 
    222 	nc = 0;
    223 	if(*s == '^') {
    224 		nc = 1;
    225 		s++;
    226 	}
    227 
    228 	p = pairs;
    229 	s += chartorune(p, s);
    230 	for(;;) {
    231 		if(*p == '\\')
    232 			s += chartorune(p, s);
    233 		if(*p == 0)
    234 			break;
    235 		p[1] = *p;
    236 		p += 2;
    237 		s += chartorune(p, s);
    238 		if(*p != '-')
    239 			continue;
    240 		s += chartorune(p, s);
    241 		if(*p == '\\')
    242 			s += chartorune(p, s);
    243 		if(*p == 0)
    244 			break;
    245 		p[-1] = *p;
    246 		s += chartorune(p, s);
    247 	}
    248 	*p = 0;
    249 	qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp);
    250 
    251 	q = pairs;
    252 	for(p=pairs+2; *p; p+=2) {
    253 		if(p[0] > p[1])
    254 			continue;
    255 		if(p[0] > q[1] || p[1] < q[0]) {
    256 			q[2] = p[0];
    257 			q[3] = p[1];
    258 			q += 2;
    259 			continue;
    260 		}
    261 		if(p[0] < q[0])
    262 			q[0] = p[0];
    263 		if(p[1] > q[1])
    264 			q[1] = p[1];
    265 	}
    266 	q[2] = 0;
    267 
    268 	p = pairs;
    269 	if(nc) {
    270 		x = rclass(0, p[0]-1);
    271 		ov = p[1]+1;
    272 		for(p+=2; *p; p+=2) {
    273 			x = re2or(x, rclass(ov, p[0]-1));
    274 			ov = p[1]+1;
    275 		}
    276 		x = re2or(x, rclass(ov, 0xffff));
    277 	} else {
    278 		x = rclass(p[0], p[1]);
    279 		for(p+=2; *p; p+=2)
    280 			x = re2or(x, rclass(p[0], p[1]));
    281 	}
    282 	return x;
    283 }