rregexec.c (4728B)
1 #include "lib9.h" 2 #include "regexp9.h" 3 #include "regcomp.h" 4 5 /* 6 * return 0 if no match 7 * >0 if a match 8 * <0 if we ran out of _relist space 9 */ 10 static int 11 rregexec1(Reprog *progp, /* program to run */ 12 Rune *bol, /* string to run machine on */ 13 Resub *mp, /* subexpression elements */ 14 int ms, /* number of elements at mp */ 15 Reljunk *j) 16 { 17 int flag=0; 18 Reinst *inst; 19 Relist *tlp; 20 Rune *s; 21 int i, checkstart; 22 Rune r, *rp, *ep; 23 Relist* tl; /* This list, next list */ 24 Relist* nl; 25 Relist* tle; /* ends of this and next list */ 26 Relist* nle; 27 int match; 28 Rune *p; 29 30 match = 0; 31 checkstart = j->startchar; 32 if(mp) 33 for(i=0; i<ms; i++) { 34 mp[i].s.rsp = 0; 35 mp[i].e.rep = 0; 36 } 37 j->relist[0][0].inst = 0; 38 j->relist[1][0].inst = 0; 39 40 /* Execute machine once for each character, including terminal NUL */ 41 s = j->rstarts; 42 do{ 43 /* fast check for first char */ 44 if(checkstart) { 45 switch(j->starttype) { 46 case RUNE: 47 p = runestrchr(s, j->startchar); 48 if(p == 0 || s == j->reol) 49 return match; 50 s = p; 51 break; 52 case BOL: 53 if(s == bol) 54 break; 55 p = runestrchr(s, '\n'); 56 if(p == 0 || s == j->reol) 57 return match; 58 s = p+1; 59 break; 60 } 61 } 62 63 r = *s; 64 65 /* switch run lists */ 66 tl = j->relist[flag]; 67 tle = j->reliste[flag]; 68 nl = j->relist[flag^=1]; 69 nle = j->reliste[flag]; 70 nl->inst = 0; 71 72 /* Add first instruction to current list */ 73 _rrenewemptythread(tl, progp->startinst, ms, s); 74 75 /* Execute machine until current list is empty */ 76 for(tlp=tl; tlp->inst; tlp++){ 77 for(inst=tlp->inst; ; inst = inst->u2.next){ 78 switch(inst->type){ 79 case RUNE: /* regular character */ 80 if(inst->u1.r == r) 81 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 82 return -1; 83 break; 84 case LBRA: 85 tlp->se.m[inst->u1.subid].s.rsp = s; 86 continue; 87 case RBRA: 88 tlp->se.m[inst->u1.subid].e.rep = s; 89 continue; 90 case ANY: 91 if(r != '\n') 92 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 93 return -1; 94 break; 95 case ANYNL: 96 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 97 return -1; 98 break; 99 case BOL: 100 if(s == bol || *(s-1) == '\n') 101 continue; 102 break; 103 case EOL: 104 if(s == j->reol || r == 0 || r == '\n') 105 continue; 106 break; 107 case CCLASS: 108 ep = inst->u1.cp->end; 109 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 110 if(r >= rp[0] && r <= rp[1]){ 111 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 112 return -1; 113 break; 114 } 115 break; 116 case NCCLASS: 117 ep = inst->u1.cp->end; 118 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 119 if(r >= rp[0] && r <= rp[1]) 120 break; 121 if(rp == ep) 122 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 123 return -1; 124 break; 125 case OR: 126 /* evaluate right choice later */ 127 if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) 128 return -1; 129 /* efficiency: advance and re-evaluate */ 130 continue; 131 case END: /* Match! */ 132 match = 1; 133 tlp->se.m[0].e.rep = s; 134 if(mp != 0) 135 _renewmatch(mp, ms, &tlp->se); 136 break; 137 } 138 break; 139 } 140 } 141 if(s == j->reol) 142 break; 143 checkstart = j->startchar && nl->inst==0; 144 s++; 145 }while(r); 146 return match; 147 } 148 149 static int 150 rregexec2(Reprog *progp, /* program to run */ 151 Rune *bol, /* string to run machine on */ 152 Resub *mp, /* subexpression elements */ 153 int ms, /* number of elements at mp */ 154 Reljunk *j 155 ) 156 { 157 Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; 158 159 /* mark space */ 160 j->relist[0] = relist0; 161 j->relist[1] = relist1; 162 j->reliste[0] = relist0 + nelem(relist0) - 2; 163 j->reliste[1] = relist1 + nelem(relist1) - 2; 164 165 return rregexec1(progp, bol, mp, ms, j); 166 } 167 168 extern int 169 rregexec(Reprog *progp, /* program to run */ 170 Rune *bol, /* string to run machine on */ 171 Resub *mp, /* subexpression elements */ 172 int ms) /* number of elements at mp */ 173 { 174 Reljunk j; 175 Relist relist0[LISTSIZE], relist1[LISTSIZE]; 176 int rv; 177 178 /* 179 * use user-specified starting/ending location if specified 180 */ 181 j.rstarts = bol; 182 j.reol = 0; 183 if(mp && ms>0){ 184 if(mp->s.sp) 185 j.rstarts = mp->s.rsp; 186 if(mp->e.ep) 187 j.reol = mp->e.rep; 188 } 189 j.starttype = 0; 190 j.startchar = 0; 191 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { 192 j.starttype = RUNE; 193 j.startchar = progp->startinst->u1.r; 194 } 195 if(progp->startinst->type == BOL) 196 j.starttype = BOL; 197 198 /* mark space */ 199 j.relist[0] = relist0; 200 j.relist[1] = relist1; 201 j.reliste[0] = relist0 + nelem(relist0) - 2; 202 j.reliste[1] = relist1 + nelem(relist1) - 2; 203 204 rv = rregexec1(progp, bol, mp, ms, &j); 205 if(rv >= 0) 206 return rv; 207 rv = rregexec2(progp, bol, mp, ms, &j); 208 if(rv >= 0) 209 return rv; 210 return -1; 211 }