plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

rregexec.c (4728B)


      1 #include "lib9.h"
      2 #include "regexp9.h"
      3 #include "regcomp.h"
      4 
      5 /*
      6  *  return	0 if no match
      7  *		>0 if a match
      8  *		<0 if we ran out of _relist space
      9  */
     10 static int
     11 rregexec1(Reprog *progp,	/* program to run */
     12 	Rune *bol,		/* string to run machine on */
     13 	Resub *mp,		/* subexpression elements */
     14 	int ms,			/* number of elements at mp */
     15 	Reljunk *j)
     16 {
     17 	int flag=0;
     18 	Reinst *inst;
     19 	Relist *tlp;
     20 	Rune *s;
     21 	int i, checkstart;
     22 	Rune r, *rp, *ep;
     23 	Relist* tl;		/* This list, next list */
     24 	Relist* nl;
     25 	Relist* tle;		/* ends of this and next list */
     26 	Relist* nle;
     27 	int match;
     28 	Rune *p;
     29 
     30 	match = 0;
     31 	checkstart = j->startchar;
     32 	if(mp)
     33 		for(i=0; i<ms; i++) {
     34 			mp[i].s.rsp = 0;
     35 			mp[i].e.rep = 0;
     36 		}
     37 	j->relist[0][0].inst = 0;
     38 	j->relist[1][0].inst = 0;
     39 
     40 	/* Execute machine once for each character, including terminal NUL */
     41 	s = j->rstarts;
     42 	do{
     43 		/* fast check for first char */
     44 		if(checkstart) {
     45 			switch(j->starttype) {
     46 			case RUNE:
     47 				p = runestrchr(s, j->startchar);
     48 				if(p == 0 || s == j->reol)
     49 					return match;
     50 				s = p;
     51 				break;
     52 			case BOL:
     53 				if(s == bol)
     54 					break;
     55 				p = runestrchr(s, '\n');
     56 				if(p == 0 || s == j->reol)
     57 					return match;
     58 				s = p+1;
     59 				break;
     60 			}
     61 		}
     62 
     63 		r = *s;
     64 
     65 		/* switch run lists */
     66 		tl = j->relist[flag];
     67 		tle = j->reliste[flag];
     68 		nl = j->relist[flag^=1];
     69 		nle = j->reliste[flag];
     70 		nl->inst = 0;
     71 
     72 		/* Add first instruction to current list */
     73 		_rrenewemptythread(tl, progp->startinst, ms, s);
     74 
     75 		/* Execute machine until current list is empty */
     76 		for(tlp=tl; tlp->inst; tlp++){
     77 			for(inst=tlp->inst; ; inst = inst->u2.next){
     78 				switch(inst->type){
     79 				case RUNE:	/* regular character */
     80 					if(inst->u1.r == r)
     81 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     82 							return -1;
     83 					break;
     84 				case LBRA:
     85 					tlp->se.m[inst->u1.subid].s.rsp = s;
     86 					continue;
     87 				case RBRA:
     88 					tlp->se.m[inst->u1.subid].e.rep = s;
     89 					continue;
     90 				case ANY:
     91 					if(r != '\n')
     92 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     93 							return -1;
     94 					break;
     95 				case ANYNL:
     96 					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     97 							return -1;
     98 					break;
     99 				case BOL:
    100 					if(s == bol || *(s-1) == '\n')
    101 						continue;
    102 					break;
    103 				case EOL:
    104 					if(s == j->reol || r == 0 || r == '\n')
    105 						continue;
    106 					break;
    107 				case CCLASS:
    108 					ep = inst->u1.cp->end;
    109 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    110 						if(r >= rp[0] && r <= rp[1]){
    111 							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    112 								return -1;
    113 							break;
    114 						}
    115 					break;
    116 				case NCCLASS:
    117 					ep = inst->u1.cp->end;
    118 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    119 						if(r >= rp[0] && r <= rp[1])
    120 							break;
    121 					if(rp == ep)
    122 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    123 							return -1;
    124 					break;
    125 				case OR:
    126 					/* evaluate right choice later */
    127 					if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
    128 						return -1;
    129 					/* efficiency: advance and re-evaluate */
    130 					continue;
    131 				case END:	/* Match! */
    132 					match = 1;
    133 					tlp->se.m[0].e.rep = s;
    134 					if(mp != 0)
    135 						_renewmatch(mp, ms, &tlp->se);
    136 					break;
    137 				}
    138 				break;
    139 			}
    140 		}
    141 		if(s == j->reol)
    142 			break;
    143 		checkstart = j->startchar && nl->inst==0;
    144 		s++;
    145 	}while(r);
    146 	return match;
    147 }
    148 
    149 static int
    150 rregexec2(Reprog *progp,	/* program to run */
    151 	Rune *bol,	/* string to run machine on */
    152 	Resub *mp,	/* subexpression elements */
    153 	int ms,		/* number of elements at mp */
    154 	Reljunk *j
    155 )
    156 {
    157 	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
    158 
    159 	/* mark space */
    160 	j->relist[0] = relist0;
    161 	j->relist[1] = relist1;
    162 	j->reliste[0] = relist0 + nelem(relist0) - 2;
    163 	j->reliste[1] = relist1 + nelem(relist1) - 2;
    164 
    165 	return rregexec1(progp, bol, mp, ms, j);
    166 }
    167 
    168 extern int
    169 rregexec(Reprog *progp,	/* program to run */
    170 	Rune *bol,	/* string to run machine on */
    171 	Resub *mp,	/* subexpression elements */
    172 	int ms)		/* number of elements at mp */
    173 {
    174 	Reljunk j;
    175 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
    176 	int rv;
    177 
    178 	/*
    179  	 *  use user-specified starting/ending location if specified
    180 	 */
    181 	j.rstarts = bol;
    182 	j.reol = 0;
    183 	if(mp && ms>0){
    184 		if(mp->s.sp)
    185 			j.rstarts = mp->s.rsp;
    186 		if(mp->e.ep)
    187 			j.reol = mp->e.rep;
    188 	}
    189 	j.starttype = 0;
    190 	j.startchar = 0;
    191 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
    192 		j.starttype = RUNE;
    193 		j.startchar = progp->startinst->u1.r;
    194 	}
    195 	if(progp->startinst->type == BOL)
    196 		j.starttype = BOL;
    197 
    198 	/* mark space */
    199 	j.relist[0] = relist0;
    200 	j.relist[1] = relist1;
    201 	j.reliste[0] = relist0 + nelem(relist0) - 2;
    202 	j.reliste[1] = relist1 + nelem(relist1) - 2;
    203 
    204 	rv = rregexec1(progp, bol, mp, ms, &j);
    205 	if(rv >= 0)
    206 		return rv;
    207 	rv = rregexec2(progp, bol, mp, ms, &j);
    208 	if(rv >= 0)
    209 		return rv;
    210 	return -1;
    211 }