plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

regexp.3 (3556B)


      1 .TH REGEXP 3
      2 .SH NAME
      3 regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression
      4 .SH SYNOPSIS
      5 .B #include <u.h>
      6 .br
      7 .B #include <libc.h>
      8 .br
      9 .B #include <regexp.h>
     10 .PP
     11 .ta \w'\fLRegprog 'u
     12 .B
     13 Reprog	*regcomp(char *exp)
     14 .PP
     15 .B
     16 Reprog	*regcomplit(char *exp)
     17 .PP
     18 .B
     19 Reprog	*regcompnl(char *exp)
     20 .PP
     21 .nf
     22 .B
     23 int  regexec(Reprog *prog, char *string, Resub *match, int msize)
     24 .PP
     25 .nf
     26 .B
     27 void regsub(char *source, char *dest, int dlen, Resub *match, int msize)
     28 .PP
     29 .nf
     30 .B
     31 int  rregexec(Reprog *prog, Rune *string, Resub *match, int msize)
     32 .PP
     33 .nf
     34 .B
     35 void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize)
     36 .PP
     37 .B
     38 void regerror(char *msg)
     39 .SH DESCRIPTION
     40 .I Regcomp
     41 compiles a
     42 regular expression and returns
     43 a pointer to the generated description.
     44 The space is allocated by
     45 .MR malloc (3)
     46 and may be released by
     47 .IR free .
     48 Regular expressions are exactly as in
     49 .MR regexp (7) .
     50 .PP
     51 .I Regcomplit
     52 is like
     53 .I regcomp
     54 except that all characters are treated literally.
     55 .I Regcompnl
     56 is like
     57 .I regcomp
     58 except that the
     59 .B .
     60 metacharacter matches all characters, including newlines.
     61 .PP
     62 .I Regexec
     63 matches a null-terminated
     64 .I string
     65 against the compiled regular expression in
     66 .IR prog .
     67 If it matches,
     68 .I regexec
     69 returns
     70 .B 1
     71 and fills in the array
     72 .I match
     73 with character pointers to the substrings of
     74 .I string
     75 that correspond to the
     76 parenthesized subexpressions of 
     77 .IR exp :
     78 .BI match[ i ].sp
     79 points to the beginning and
     80 .BI match[ i ].ep
     81 points just beyond
     82 the end of the
     83 .IR i th
     84 substring.
     85 (Subexpression
     86 .I i
     87 begins at the
     88 .IR i th
     89 left parenthesis, counting from 1.)
     90 Pointers in
     91 .B match[0]
     92 pick out the substring that corresponds to
     93 the whole regular expression.
     94 Unused elements of
     95 .I match
     96 are filled with zeros.
     97 Matches involving
     98 .LR * ,
     99 .LR + ,
    100 and 
    101 .L ?
    102 are extended as far as possible.
    103 The number of array elements in 
    104 .I match
    105 is given by
    106 .IR msize .
    107 The structure of elements of
    108 .I match 
    109 is:
    110 .IP
    111 .EX
    112 typedef struct {
    113 	union {
    114 	   char *sp;
    115 	   Rune *rsp;
    116 	} s;
    117 	union {
    118 	   char *ep;
    119 	   Rune *rep;
    120 	} e;
    121 } Resub;
    122 .EE
    123 .LP
    124 If
    125 .B match[0].s.sp
    126 is nonzero on entry,
    127 .I regexec
    128 starts matching at that point within
    129 .IR string .
    130 If
    131 .B match[0].e.ep
    132 is nonzero on entry,
    133 the last character matched is the one
    134 preceding that point.
    135 .PP
    136 .I Regsub
    137 places in
    138 .I dest
    139 a substitution instance of
    140 .I source
    141 in the context of the last
    142 .I regexec
    143 performed using
    144 .IR match .
    145 Each instance of
    146 .BI \e n\f1,
    147 where
    148 .I n
    149 is a digit, is replaced by the
    150 string delimited by
    151 .BI match[ n ].sp
    152 and
    153 .BI match[ n ].ep\f1.
    154 Each instance of 
    155 .L &
    156 is replaced by the string delimited by
    157 .B match[0].sp
    158 and
    159 .BR match[0].ep .
    160 The substitution will always be null terminated and
    161 trimmed to fit into dlen bytes.
    162 .PP
    163 .IR Regerror ,
    164 called whenever an error is detected in
    165 .IR regcomp ,
    166 writes the string
    167 .I msg
    168 on the standard error file and exits.
    169 .I Regerror
    170 can be replaced to perform
    171 special error processing.
    172 If the user supplied
    173 .I regerror
    174 returns rather than exits,
    175 .I regcomp
    176 will return 0. 
    177 .PP
    178 .I Rregexec
    179 and
    180 .I rregsub
    181 are variants of 
    182 .I regexec
    183 and
    184 .I regsub
    185 that use strings of
    186 .B Runes
    187 instead of strings of
    188 .BR chars .
    189 With these routines, the 
    190 .I rsp
    191 and
    192 .I rep
    193 fields of the
    194 .I match
    195 array elements should be used.
    196 .SH SOURCE
    197 .B \*9/src/libregexp
    198 .SH "SEE ALSO"
    199 .MR grep (1)
    200 .SH DIAGNOSTICS
    201 .I Regcomp
    202 returns 
    203 .B 0
    204 for an illegal expression
    205 or other failure.
    206 .I Regexec
    207 returns 0
    208 if
    209 .I string
    210 is not matched.
    211 .SH BUGS
    212 There is no way to specify or match a NUL character; NULs terminate patterns and strings.