commit 3d31240bfdaee03aff28103b530ff593e9ddbbc5
parent da3ed55e4e4d2d3052b03df3dd0a40d19f99ea70
Author: David du Colombier <0intro@gmail.com>
Date:   Tue,  2 Jul 2013 06:39:17 +0200
libregexp: update from Plan 9
R=rsc
https://codereview.appspot.com/10690044
Diffstat:
5 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c
@@ -15,6 +15,12 @@ struct Node
 	Reinst*	last;
 }Node;
 
+/* max character classes per program is nelem(reprog->class) */
+static Reprog	*reprog;
+
+/* max rune ranges per character class is nelem(classp->spans)/2 */
+#define NCCRUNE	nelem(classp->spans)
+
 #define	NSTACK	20
 static	Node	andstack[NSTACK];
 static	Node	*andp;
@@ -321,8 +327,8 @@ dump(Reprog *pp)
 static	Reclass*
 newclass(void)
 {
-	if(nclass >= NCLASS)
-		regerr2("too many character classes; limit", NCLASS+'0');
+	if(nclass >= nelem(reprog->class))
+		rcerror("too many character classes; increase Reprog.class size");
 	return &(classp[nclass++]);
 }
 
@@ -407,7 +413,7 @@ bldcclass(void)
 	}
 
 	/* parse class into a set of spans */
-	for(; ep<&r[NCCRUNE];){
+	while(ep < &r[NCCRUNE-1]){
 		if(rune == 0){
 			rcerror("malformed '[]'");
 			return 0;
@@ -431,6 +437,10 @@ bldcclass(void)
 		}
 		quoted = nextc(&rune);
 	}
+	if(ep >= &r[NCCRUNE-1]) {
+		rcerror("char class too large; increase Reclass.spans size");
+		return 0;
+	}
 
 	/* sort on span start */
 	for(p = r; p < ep; p += 2){
@@ -454,9 +464,10 @@ bldcclass(void)
 		np[0] = *p++;
 		np[1] = *p++;
 		for(; p < ep; p += 2)
-			if(p[0] <= np[1]){
-				if(p[1] > np[1])
-					np[1] = p[1];
+			/* overlapping or adjacent ranges? */
+			if(p[0] <= np[1] + 1){
+				if(p[1] >= np[1])
+					np[1] = p[1];	/* coalesce */
 			} else {
 				np += 2;
 				np[0] = p[0];
diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h
@@ -12,13 +12,6 @@ struct	Resublist
 	Resub	m[NSUBEXP];
 };
 
-/* max character classes per program */
-extern Reprog	RePrOg;
-#define	NCLASS	(sizeof(RePrOg.class)/sizeof(Reclass))
-
-/* max rune ranges per character class */
-#define NCCRUNE	(sizeof(Reclass)/sizeof(Rune))
-
 /*
  * Actions and Tokens (Reinst types)
  *
@@ -48,7 +41,7 @@ extern Reprog	RePrOg;
  *  regexec execution lists
  */
 #define LISTSIZE	10
-#define BIGLISTSIZE	(10*LISTSIZE)
+#define BIGLISTSIZE	(25*LISTSIZE)
 typedef struct Relist	Relist;
 struct Relist
 {
diff --git a/src/libregexp/regsub.c b/src/libregexp/regsub.c
@@ -27,7 +27,7 @@ regsub(char *sp,	/* source string */
 			case '8':
 			case '9':
 				i = *sp-'0';
-				if(mp[i].s.sp != 0 && mp!=0 && ms>i)
+				if(mp!=0 && mp[i].s.sp != 0 && ms>i)
 					for(ssp = mp[i].s.sp;
 					     ssp < mp[i].e.ep;
 					     ssp++)
@@ -46,9 +46,8 @@ regsub(char *sp,	/* source string */
 					*dp++ = *sp;
 				break;
 			}
-		}else if(*sp == '&'){				
-			if(mp[0].s.sp != 0 && mp!=0 && ms>0)
-			if(mp[0].s.sp != 0)
+		}else if(*sp == '&'){
+			if(mp!=0 && mp[0].s.sp != 0 && ms>0)
 				for(ssp = mp[0].s.sp;
 				     ssp < mp[0].e.ep; ssp++)
 					if(dp < ep)
diff --git a/src/libregexp/test.c b/src/libregexp/test.c
@@ -22,17 +22,16 @@ struct x t[] = {
 	{ 0, 0, 0 },
 };
 
+int
 main(int ac, char **av)
 {
 	Resub rs[10];
 	char dst[128];
-	int n;
 	struct x *tp;
 
 	for(tp = t; tp->re; tp++)
 		tp->p = regcomp(tp->re);
 
-
 	for(tp = t; tp->re; tp++){
 		print("%s VIA %s", av[1], tp->re);
 		memset(rs, 0, sizeof rs);
diff --git a/src/libregexp/test2.c b/src/libregexp/test2.c
@@ -1,20 +1,19 @@
 #include "lib9.h"
 #include <regexp9.h>
 
-
+int
 main(int ac, char **av)
 {
 	Resub rs[10];
 	Reprog *p;
 	char *s;
-	int i;
 
 	p = regcomp("[^a-z]");
 	s = "\n";
 	if(regexec(p, s, rs, 10))
-		print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep);
+		print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
 	s = "0";
 	if(regexec(p, s, rs, 10))
-		print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep);
+		print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
 	exit(0);
 }