comp.c (4240B)
1 #include "grep.h" 2 3 /* 4 * incremental compiler. 5 * add the branch c to the 6 * state s. 7 */ 8 void 9 increment(State *s, int c) 10 { 11 int i; 12 State *t, **tt; 13 Re *re1, *re2; 14 15 nfollow = 0; 16 gen++; 17 matched = 0; 18 for(i=0; i<s->count; i++) 19 fol1(s->re[i], c); 20 qsort(follow, nfollow, sizeof(*follow), fcmp); 21 for(tt=&state0; t = *tt;) { 22 if(t->count > nfollow) { 23 tt = &t->linkleft; 24 goto cont; 25 } 26 if(t->count < nfollow) { 27 tt = &t->linkright; 28 goto cont; 29 } 30 for(i=0; i<nfollow; i++) { 31 re1 = t->re[i]; 32 re2 = follow[i]; 33 if(re1 > re2) { 34 tt = &t->linkleft; 35 goto cont; 36 } 37 if(re1 < re2) { 38 tt = &t->linkright; 39 goto cont; 40 } 41 } 42 if(!!matched && !t->match) { 43 tt = &t->linkleft; 44 goto cont; 45 } 46 if(!matched && !!t->match) { 47 tt = &t->linkright; 48 goto cont; 49 } 50 s->next[c] = t; 51 return; 52 cont:; 53 } 54 55 t = sal(nfollow); 56 *tt = t; 57 for(i=0; i<nfollow; i++) { 58 re1 = follow[i]; 59 t->re[i] = re1; 60 } 61 s->next[c] = t; 62 t->match = matched; 63 } 64 65 int 66 fcmp(const void *va, const void *vb) 67 { 68 Re **aa, **bb; 69 Re *a, *b; 70 71 aa = (Re**)va; 72 bb = (Re**)vb; 73 a = *aa; 74 b = *bb; 75 if(a > b) 76 return 1; 77 if(a < b) 78 return -1; 79 return 0; 80 } 81 82 void 83 fol1(Re *r, int c) 84 { 85 Re *r1; 86 87 loop: 88 if(r->gen == gen) 89 return; 90 if(nfollow >= maxfollow) 91 error("nfollow"); 92 r->gen = gen; 93 switch(r->type) { 94 default: 95 error("fol1"); 96 97 case Tcase: 98 if(c >= 0 && c < 256) 99 if(r1 = r->u.cases[c]) 100 follow[nfollow++] = r1; 101 if(r = r->next) 102 goto loop; 103 break; 104 105 case Talt: 106 case Tor: 107 fol1(r->u.alt, c); 108 r = r->next; 109 goto loop; 110 111 case Tbegin: 112 if(c == '\n' || c == Cbegin) 113 follow[nfollow++] = r->next; 114 break; 115 116 case Tend: 117 if(c == '\n') { 118 if(r->next == 0) { 119 matched = 1; 120 break; 121 } 122 r = r->next; 123 goto loop; 124 } 125 break; 126 127 case Tclass: 128 if(c >= r->u.x.lo && c <= r->u.x.hi) 129 follow[nfollow++] = r->next; 130 break; 131 } 132 } 133 134 Rune tab1[] = 135 { 136 0x007f, 137 0x07ff 138 }; 139 Rune tab2[] = 140 { 141 0x003f, 142 0x0fff 143 }; 144 145 Re2 146 rclass(Rune p0, Rune p1) 147 { 148 char xc0[6], xc1[6]; 149 int i, n, m; 150 Re2 x; 151 152 if(p0 > p1) 153 return re2char(0xff, 0xff); /* no match */ 154 155 /* 156 * bust range into same length 157 * character sequences 158 */ 159 for(i=0; i<nelem(tab1); i++) { 160 m = tab1[i]; 161 if(p0 <= m && p1 > m) 162 return re2or(rclass(p0, m), rclass(m+1, p1)); 163 } 164 165 /* 166 * bust range into part of a single page 167 * or into full pages 168 */ 169 for(i=0; i<nelem(tab2); i++) { 170 m = tab2[i]; 171 if((p0 & ~m) != (p1 & ~m)) { 172 if((p0 & m) != 0) 173 return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1)); 174 if((p1 & m) != m) 175 return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1)); 176 } 177 } 178 179 n = runetochar(xc0, &p0); 180 i = runetochar(xc1, &p1); 181 if(i != n) 182 error("length"); 183 184 x = re2char(xc0[0], xc1[0]); 185 for(i=1; i<n; i++) 186 x = re2cat(x, re2char(xc0[i], xc1[i])); 187 return x; 188 } 189 190 int 191 pcmp(const void *va, const void *vb) 192 { 193 int n; 194 Rune *a, *b; 195 196 a = (Rune*)va; 197 b = (Rune*)vb; 198 199 n = a[0] - b[0]; 200 if(n) 201 return n; 202 return a[1] - b[1]; 203 } 204 205 /* 206 * convert character chass into 207 * run-pair ranges of matches. 208 * this is 10646/utf specific and 209 * needs to be changed for some 210 * other input character set. 211 * this is the key to a fast 212 * regular search of characters 213 * by looking at sequential bytes. 214 */ 215 Re2 216 re2class(char *s) 217 { 218 Rune pairs[200], *p, *q, ov; 219 int nc; 220 Re2 x; 221 222 nc = 0; 223 if(*s == '^') { 224 nc = 1; 225 s++; 226 } 227 228 p = pairs; 229 s += chartorune(p, s); 230 for(;;) { 231 if(*p == '\\') 232 s += chartorune(p, s); 233 if(*p == 0) 234 break; 235 p[1] = *p; 236 p += 2; 237 s += chartorune(p, s); 238 if(*p != '-') 239 continue; 240 s += chartorune(p, s); 241 if(*p == '\\') 242 s += chartorune(p, s); 243 if(*p == 0) 244 break; 245 p[-1] = *p; 246 s += chartorune(p, s); 247 } 248 *p = 0; 249 qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp); 250 251 q = pairs; 252 for(p=pairs+2; *p; p+=2) { 253 if(p[0] > p[1]) 254 continue; 255 if(p[0] > q[1] || p[1] < q[0]) { 256 q[2] = p[0]; 257 q[3] = p[1]; 258 q += 2; 259 continue; 260 } 261 if(p[0] < q[0]) 262 q[0] = p[0]; 263 if(p[1] > q[1]) 264 q[1] = p[1]; 265 } 266 q[2] = 0; 267 268 p = pairs; 269 if(nc) { 270 x = rclass(0, p[0]-1); 271 ov = p[1]+1; 272 for(p+=2; *p; p+=2) { 273 x = re2or(x, rclass(ov, p[0]-1)); 274 ov = p[1]+1; 275 } 276 x = re2or(x, rclass(ov, 0xffff)); 277 } else { 278 x = rclass(p[0], p[1]); 279 for(p+=2; *p; p+=2) 280 x = re2or(x, rclass(p[0], p[1])); 281 } 282 return x; 283 }