0
|
1 /*
|
|
2 * CDDL HEADER START
|
|
3 *
|
|
4 * The contents of this file are subject to the terms of the
|
|
5 * Common Development and Distribution License, Version 1.0 only
|
|
6 * (the "License"). You may not use this file except in compliance
|
|
7 * with the License.
|
|
8 *
|
|
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
10 * or http://www.opensolaris.org/os/licensing.
|
|
11 * See the License for the specific language governing permissions
|
|
12 * and limitations under the License.
|
|
13 *
|
|
14 * When distributing Covered Code, include this CDDL HEADER in each
|
|
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
16 * If applicable, add the following below this CDDL HEADER, with the
|
|
17 * fields enclosed by brackets "[]" replaced with your own identifying
|
|
18 * information: Portions Copyright [yyyy] [name of copyright owner]
|
|
19 *
|
|
20 * CDDL HEADER END
|
|
21 */
|
|
22
|
|
23 /*
|
|
24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
|
25 * Use is subject to license terms.
|
|
26 */
|
|
27
|
|
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
|
|
29 /* All Rights Reserved */
|
|
30
|
|
31 #pragma ident "@(#)b.c 1.12 05/07/27 SMI"
|
|
32
|
|
33 #define DEBUG
|
|
34
|
|
35 #include "awk.h"
|
|
36 #include "y.tab.h"
|
|
37
|
|
38 #define HAT (NCHARS-1) /* matches ^ in regular expr */
|
|
39 /* NCHARS is 2**n */
|
|
40 #define MAXLIN (3 * LINE_MAX)
|
|
41
|
|
42 #define type(v) (v)->nobj
|
|
43 #define left(v) (v)->narg[0]
|
|
44 #define right(v) (v)->narg[1]
|
|
45 #define parent(v) (v)->nnext
|
|
46
|
|
47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
|
|
48 #define UNARY case STAR: case PLUS: case QUEST:
|
|
49
|
|
50 /*
|
|
51 * encoding in tree Nodes:
|
|
52 * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
|
|
53 * left is index, right contains value or pointer to value
|
|
54 * unary (STAR, PLUS, QUEST): left is child, right is null
|
|
55 * binary (CAT, OR): left and right are children
|
|
56 * parent contains pointer to parent
|
|
57 */
|
|
58
|
|
59 int setvec[MAXLIN];
|
|
60 int tmpset[MAXLIN];
|
|
61 Node *point[MAXLIN];
|
|
62
|
|
63 int rtok; /* next token in current re */
|
|
64 int rlxval;
|
|
65 uchar *rlxstr;
|
|
66 uchar *prestr; /* current position in current re */
|
|
67 uchar *lastre; /* origin of last re */
|
|
68
|
|
69 static int setcnt;
|
|
70 static int poscnt;
|
|
71
|
|
72 uchar *patbeg;
|
|
73 int patlen;
|
|
74
|
|
75 #define NFA 20 /* cache this many dynamic fa's */
|
|
76 fa *fatab[NFA];
|
|
77 int nfatab = 0; /* entries in fatab */
|
|
78
|
|
79 static fa *mkdfa(uchar *, int);
|
|
80 static int makeinit(fa *, int);
|
|
81 static void penter(Node *);
|
|
82 static void freetr(Node *);
|
|
83 static void overflo(char *);
|
|
84 static void cfoll(fa *, Node *);
|
|
85 static void follow(Node *);
|
|
86 static Node *reparse(uchar *);
|
|
87 static int relex(void);
|
|
88 static void freefa(fa *);
|
|
89 static int cgoto(fa *, int, int);
|
|
90
|
|
91 fa *
|
|
92 makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
|
|
93 {
|
|
94 int i, use, nuse;
|
|
95 fa *pfa;
|
|
96
|
|
97 if (compile_time) /* a constant for sure */
|
|
98 return (mkdfa(s, anchor));
|
|
99 for (i = 0; i < nfatab; i++) { /* is it there already? */
|
|
100 if (fatab[i]->anchor == anchor &&
|
|
101 strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
|
|
102 fatab[i]->use++;
|
|
103 return (fatab[i]);
|
|
104 }
|
|
105 }
|
|
106 pfa = mkdfa(s, anchor);
|
|
107 if (nfatab < NFA) { /* room for another */
|
|
108 fatab[nfatab] = pfa;
|
|
109 fatab[nfatab]->use = 1;
|
|
110 nfatab++;
|
|
111 return (pfa);
|
|
112 }
|
|
113 use = fatab[0]->use; /* replace least-recently used */
|
|
114 nuse = 0;
|
|
115 for (i = 1; i < nfatab; i++)
|
|
116 if (fatab[i]->use < use) {
|
|
117 use = fatab[i]->use;
|
|
118 nuse = i;
|
|
119 }
|
|
120 freefa(fatab[nuse]);
|
|
121 fatab[nuse] = pfa;
|
|
122 pfa->use = 1;
|
|
123 return (pfa);
|
|
124 }
|
|
125
|
|
126 fa *
|
|
127 mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
|
|
128 /* anchor = 1 for anchored matches, else 0 */
|
|
129 {
|
|
130 Node *p, *p1;
|
|
131 fa *f;
|
|
132
|
|
133 p = reparse(s);
|
|
134 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
|
|
135 /* put ALL STAR in front of reg. exp. */
|
|
136 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
|
|
137 /* put FINAL after reg. exp. */
|
|
138
|
|
139 poscnt = 0;
|
|
140 penter(p1); /* enter parent pointers and leaf indices */
|
|
141 if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
|
|
142 overflo("no room for fa");
|
|
143 /* penter has computed number of positions in re */
|
|
144 f->accept = poscnt-1;
|
|
145 cfoll(f, p1); /* set up follow sets */
|
|
146 freetr(p1);
|
|
147 if ((f->posns[0] =
|
|
148 (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
|
|
149 overflo("out of space in makedfa");
|
|
150 }
|
|
151 if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
|
|
152 overflo("out of space in makedfa");
|
|
153 *f->posns[1] = 0;
|
|
154 f->initstat = makeinit(f, anchor);
|
|
155 f->anchor = anchor;
|
|
156 f->restr = tostring(s);
|
|
157 return (f);
|
|
158 }
|
|
159
|
|
160 static int
|
|
161 makeinit(fa *f, int anchor)
|
|
162 {
|
|
163 register int i, k;
|
|
164
|
|
165 f->curstat = 2;
|
|
166 f->out[2] = 0;
|
|
167 f->reset = 0;
|
|
168 k = *(f->re[0].lfollow);
|
|
169 xfree(f->posns[2]);
|
|
170 if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
|
|
171 overflo("out of space in makeinit");
|
|
172 for (i = 0; i <= k; i++) {
|
|
173 (f->posns[2])[i] = (f->re[0].lfollow)[i];
|
|
174 }
|
|
175 if ((f->posns[2])[1] == f->accept)
|
|
176 f->out[2] = 1;
|
|
177 for (i = 0; i < NCHARS; i++)
|
|
178 f->gototab[2][i] = 0;
|
|
179 f->curstat = cgoto(f, 2, HAT);
|
|
180 if (anchor) {
|
|
181 *f->posns[2] = k-1; /* leave out position 0 */
|
|
182 for (i = 0; i < k; i++) {
|
|
183 (f->posns[0])[i] = (f->posns[2])[i];
|
|
184 }
|
|
185
|
|
186 f->out[0] = f->out[2];
|
|
187 if (f->curstat != 2)
|
|
188 --(*f->posns[f->curstat]);
|
|
189 }
|
|
190 return (f->curstat);
|
|
191 }
|
|
192
|
|
193 void
|
|
194 penter(Node *p) /* set up parent pointers and leaf indices */
|
|
195 {
|
|
196 switch (type(p)) {
|
|
197 LEAF
|
|
198 left(p) = (Node *) poscnt;
|
|
199 point[poscnt++] = p;
|
|
200 break;
|
|
201 UNARY
|
|
202 penter(left(p));
|
|
203 parent(left(p)) = p;
|
|
204 break;
|
|
205 case CAT:
|
|
206 case OR:
|
|
207 penter(left(p));
|
|
208 penter(right(p));
|
|
209 parent(left(p)) = p;
|
|
210 parent(right(p)) = p;
|
|
211 break;
|
|
212 default:
|
|
213 ERROR "unknown type %d in penter", type(p) FATAL;
|
|
214 break;
|
|
215 }
|
|
216 }
|
|
217
|
|
218 static void
|
|
219 freetr(Node *p) /* free parse tree */
|
|
220 {
|
|
221 switch (type(p)) {
|
|
222 LEAF
|
|
223 xfree(p);
|
|
224 break;
|
|
225 UNARY
|
|
226 freetr(left(p));
|
|
227 xfree(p);
|
|
228 break;
|
|
229 case CAT:
|
|
230 case OR:
|
|
231 freetr(left(p));
|
|
232 freetr(right(p));
|
|
233 xfree(p);
|
|
234 break;
|
|
235 default:
|
|
236 ERROR "unknown type %d in freetr", type(p) FATAL;
|
|
237 break;
|
|
238 }
|
|
239 }
|
|
240
|
|
241 uchar *
|
|
242 cclenter(uchar *p)
|
|
243 {
|
|
244 register int i, c;
|
|
245 uchar *op, *chars, *ret;
|
|
246 size_t bsize;
|
|
247
|
|
248 init_buf(&chars, &bsize, LINE_INCR);
|
|
249 op = p;
|
|
250 i = 0;
|
|
251 while ((c = *p++) != 0) {
|
|
252 if (c == '\\') {
|
|
253 if ((c = *p++) == 't')
|
|
254 c = '\t';
|
|
255 else if (c == 'n')
|
|
256 c = '\n';
|
|
257 else if (c == 'f')
|
|
258 c = '\f';
|
|
259 else if (c == 'r')
|
|
260 c = '\r';
|
|
261 else if (c == 'b')
|
|
262 c = '\b';
|
|
263 else if (c == '\\')
|
|
264 c = '\\';
|
|
265 else if (isdigit(c)) {
|
|
266 int n = c - '0';
|
|
267 if (isdigit(*p)) {
|
|
268 n = 8 * n + *p++ - '0';
|
|
269 if (isdigit(*p))
|
|
270 n = 8 * n + *p++ - '0';
|
|
271 }
|
|
272 c = n;
|
|
273 } /* else */
|
|
274 /* c = c; */
|
|
275 } else if (c == '-' && i > 0 && chars[i-1] != 0) {
|
|
276 if (*p != 0) {
|
|
277 c = chars[i-1];
|
|
278 while ((uchar)c < *p) { /* fails if *p is \\ */
|
|
279 expand_buf(&chars, &bsize, i);
|
|
280 chars[i++] = ++c;
|
|
281 }
|
|
282 p++;
|
|
283 continue;
|
|
284 }
|
|
285 }
|
|
286 expand_buf(&chars, &bsize, i);
|
|
287 chars[i++] = c;
|
|
288 }
|
|
289 chars[i++] = '\0';
|
|
290 dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
|
|
291 xfree(op);
|
|
292 ret = tostring(chars);
|
|
293 free(chars);
|
|
294 return (ret);
|
|
295 }
|
|
296
|
|
297 static void
|
|
298 overflo(char *s)
|
|
299 {
|
|
300 ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
|
|
301 }
|
|
302
|
|
303 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
|
|
304 static void
|
|
305 cfoll(fa *f, Node *v)
|
|
306 {
|
|
307 register int i;
|
|
308 register int *p;
|
|
309
|
|
310 switch (type(v)) {
|
|
311 LEAF
|
|
312 f->re[(int)left(v)].ltype = type(v);
|
|
313 f->re[(int)left(v)].lval = (int)right(v);
|
|
314 for (i = 0; i <= f->accept; i++)
|
|
315 setvec[i] = 0;
|
|
316 setcnt = 0;
|
|
317 follow(v); /* computes setvec and setcnt */
|
|
318 if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
|
|
319 overflo("follow set overflow");
|
|
320 f->re[(int)left(v)].lfollow = p;
|
|
321 *p = setcnt;
|
|
322 for (i = f->accept; i >= 0; i--) {
|
|
323 if (setvec[i] == 1)
|
|
324 *++p = i;
|
|
325 }
|
|
326 break;
|
|
327 UNARY
|
|
328 cfoll(f, left(v));
|
|
329 break;
|
|
330 case CAT:
|
|
331 case OR:
|
|
332 cfoll(f, left(v));
|
|
333 cfoll(f, right(v));
|
|
334 break;
|
|
335 default:
|
|
336 ERROR "unknown type %d in cfoll", type(v) FATAL;
|
|
337 }
|
|
338 }
|
|
339
|
|
340 /*
|
|
341 * collects initially active leaves of p into setvec
|
|
342 * returns 0 or 1 depending on whether p matches empty string
|
|
343 */
|
|
344 static int
|
|
345 first(Node *p)
|
|
346 {
|
|
347 register int b;
|
|
348
|
|
349 switch (type(p)) {
|
|
350 LEAF
|
|
351 if (setvec[(int)left(p)] != 1) {
|
|
352 setvec[(int)left(p)] = 1;
|
|
353 setcnt++;
|
|
354 }
|
|
355 if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
|
|
356 return (0); /* empty CCL */
|
|
357 else
|
|
358 return (1);
|
|
359 case PLUS:
|
|
360 if (first(left(p)) == 0)
|
|
361 return (0);
|
|
362 return (1);
|
|
363 case STAR:
|
|
364 case QUEST:
|
|
365 (void) first(left(p));
|
|
366 return (0);
|
|
367 case CAT:
|
|
368 if (first(left(p)) == 0 && first(right(p)) == 0)
|
|
369 return (0);
|
|
370 return (1);
|
|
371 case OR:
|
|
372 b = first(right(p));
|
|
373 if (first(left(p)) == 0 || b == 0)
|
|
374 return (0);
|
|
375 return (1);
|
|
376 }
|
|
377 ERROR "unknown type %d in first", type(p) FATAL;
|
|
378 return (-1);
|
|
379 }
|
|
380
|
|
381 /* collects leaves that can follow v into setvec */
|
|
382 static void
|
|
383 follow(Node *v)
|
|
384 {
|
|
385 Node *p;
|
|
386
|
|
387 if (type(v) == FINAL)
|
|
388 return;
|
|
389 p = parent(v);
|
|
390 switch (type(p)) {
|
|
391 case STAR:
|
|
392 case PLUS:
|
|
393 (void) first(v);
|
|
394 follow(p);
|
|
395 return;
|
|
396
|
|
397 case OR:
|
|
398 case QUEST:
|
|
399 follow(p);
|
|
400 return;
|
|
401
|
|
402 case CAT:
|
|
403 if (v == left(p)) { /* v is left child of p */
|
|
404 if (first(right(p)) == 0) {
|
|
405 follow(p);
|
|
406 return;
|
|
407 }
|
|
408 } else /* v is right child */
|
|
409 follow(p);
|
|
410 return;
|
|
411 default:
|
|
412 ERROR "unknown type %d in follow", type(p) FATAL;
|
|
413 break;
|
|
414 }
|
|
415 }
|
|
416
|
|
417 static int
|
|
418 member(uchar c, uchar *s) /* is c in s? */
|
|
419 {
|
|
420 while (*s)
|
|
421 if (c == *s++)
|
|
422 return (1);
|
|
423 return (0);
|
|
424 }
|
|
425
|
|
426
|
|
427 int
|
|
428 match(fa *f, uchar *p)
|
|
429 {
|
|
430 register int s, ns;
|
|
431
|
|
432 s = f->reset ? makeinit(f, 0) : f->initstat;
|
|
433 if (f->out[s])
|
|
434 return (1);
|
|
435 do {
|
|
436 if ((ns = f->gototab[s][*p]) != 0)
|
|
437 s = ns;
|
|
438 else
|
|
439 s = cgoto(f, s, *p);
|
|
440 if (f->out[s])
|
|
441 return (1);
|
|
442 } while (*p++ != 0);
|
|
443 return (0);
|
|
444 }
|
|
445
|
|
446 int
|
|
447 pmatch(fa *f, uchar *p)
|
|
448 {
|
|
449 register int s, ns;
|
|
450 register uchar *q;
|
|
451 int i, k;
|
|
452
|
|
453 if (f->reset) {
|
|
454 f->initstat = s = makeinit(f, 1);
|
|
455 } else {
|
|
456 s = f->initstat;
|
|
457 }
|
|
458 patbeg = p;
|
|
459 patlen = -1;
|
|
460 do {
|
|
461 q = p;
|
|
462 do {
|
|
463 if (f->out[s]) /* final state */
|
|
464 patlen = q-p;
|
|
465 if ((ns = f->gototab[s][*q]) != 0)
|
|
466 s = ns;
|
|
467 else
|
|
468 s = cgoto(f, s, *q);
|
|
469 if (s == 1) { /* no transition */
|
|
470 if (patlen >= 0) {
|
|
471 patbeg = p;
|
|
472 return (1);
|
|
473 } else
|
|
474 goto nextin; /* no match */
|
|
475 }
|
|
476 } while (*q++ != 0);
|
|
477 if (f->out[s])
|
|
478 patlen = q - p - 1; /* don't count $ */
|
|
479 if (patlen >= 0) {
|
|
480 patbeg = p;
|
|
481 return (1);
|
|
482 }
|
|
483 nextin:
|
|
484 s = 2;
|
|
485 if (f->reset) {
|
|
486 for (i = 2; i <= f->curstat; i++)
|
|
487 xfree(f->posns[i]);
|
|
488 k = *f->posns[0];
|
|
489 if ((f->posns[2] =
|
|
490 (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
|
|
491 overflo("out of space in pmatch");
|
|
492 }
|
|
493 for (i = 0; i <= k; i++)
|
|
494 (f->posns[2])[i] = (f->posns[0])[i];
|
|
495 f->initstat = f->curstat = 2;
|
|
496 f->out[2] = f->out[0];
|
|
497 for (i = 0; i < NCHARS; i++)
|
|
498 f->gototab[2][i] = 0;
|
|
499 }
|
|
500 } while (*p++ != 0);
|
|
501 return (0);
|
|
502 }
|
|
503
|
|
504 int
|
|
505 nematch(fa *f, uchar *p)
|
|
506 {
|
|
507 register int s, ns;
|
|
508 register uchar *q;
|
|
509 int i, k;
|
|
510
|
|
511 if (f->reset) {
|
|
512 f->initstat = s = makeinit(f, 1);
|
|
513 } else {
|
|
514 s = f->initstat;
|
|
515 }
|
|
516 patlen = -1;
|
|
517 while (*p) {
|
|
518 q = p;
|
|
519 do {
|
|
520 if (f->out[s]) /* final state */
|
|
521 patlen = q-p;
|
|
522 if ((ns = f->gototab[s][*q]) != 0)
|
|
523 s = ns;
|
|
524 else
|
|
525 s = cgoto(f, s, *q);
|
|
526 if (s == 1) { /* no transition */
|
|
527 if (patlen > 0) {
|
|
528 patbeg = p;
|
|
529 return (1);
|
|
530 } else
|
|
531 goto nnextin; /* no nonempty match */
|
|
532 }
|
|
533 } while (*q++ != 0);
|
|
534 if (f->out[s])
|
|
535 patlen = q - p - 1; /* don't count $ */
|
|
536 if (patlen > 0) {
|
|
537 patbeg = p;
|
|
538 return (1);
|
|
539 }
|
|
540 nnextin:
|
|
541 s = 2;
|
|
542 if (f->reset) {
|
|
543 for (i = 2; i <= f->curstat; i++)
|
|
544 xfree(f->posns[i]);
|
|
545 k = *f->posns[0];
|
|
546 if ((f->posns[2] =
|
|
547 (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
|
|
548 overflo("out of state space");
|
|
549 }
|
|
550 for (i = 0; i <= k; i++)
|
|
551 (f->posns[2])[i] = (f->posns[0])[i];
|
|
552 f->initstat = f->curstat = 2;
|
|
553 f->out[2] = f->out[0];
|
|
554 for (i = 0; i < NCHARS; i++)
|
|
555 f->gototab[2][i] = 0;
|
|
556 }
|
|
557 p++;
|
|
558 }
|
|
559 return (0);
|
|
560 }
|
|
561
|
|
562 static Node *regexp(void), *primary(void), *concat(Node *);
|
|
563 static Node *alt(Node *), *unary(Node *);
|
|
564
|
|
565 static Node *
|
|
566 reparse(uchar *p)
|
|
567 {
|
|
568 /* parses regular expression pointed to by p */
|
|
569 /* uses relex() to scan regular expression */
|
|
570 Node *np;
|
|
571
|
|
572 dprintf(("reparse <%s>\n", p));
|
|
573 lastre = prestr = p; /* prestr points to string to be parsed */
|
|
574 rtok = relex();
|
|
575 if (rtok == '\0')
|
|
576 ERROR "empty regular expression" FATAL;
|
|
577 np = regexp();
|
|
578 if (rtok == '\0') {
|
|
579 return (np);
|
|
580 } else {
|
|
581 ERROR "syntax error in regular expression %s at %s",
|
|
582 lastre, prestr FATAL;
|
|
583 }
|
|
584 /*NOTREACHED*/
|
|
585 return (NULL);
|
|
586 }
|
|
587
|
|
588 static Node *
|
|
589 regexp(void)
|
|
590 {
|
|
591 return (alt(concat(primary())));
|
|
592 }
|
|
593
|
|
594 static Node *
|
|
595 primary(void)
|
|
596 {
|
|
597 Node *np;
|
|
598
|
|
599 switch (rtok) {
|
|
600 case CHAR:
|
|
601 np = op2(CHAR, NIL, (Node *)rlxval);
|
|
602 rtok = relex();
|
|
603 return (unary(np));
|
|
604 case ALL:
|
|
605 rtok = relex();
|
|
606 return (unary(op2(ALL, NIL, NIL)));
|
|
607 case DOT:
|
|
608 rtok = relex();
|
|
609 return (unary(op2(DOT, NIL, NIL)));
|
|
610 case CCL:
|
|
611 /*LINTED align*/
|
|
612 np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
|
|
613 rtok = relex();
|
|
614 return (unary(np));
|
|
615 case NCCL:
|
|
616 /*LINTED align*/
|
|
617 np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
|
|
618 rtok = relex();
|
|
619 return (unary(np));
|
|
620 case '^':
|
|
621 rtok = relex();
|
|
622 return (unary(op2(CHAR, NIL, (Node *)HAT)));
|
|
623 case '$':
|
|
624 rtok = relex();
|
|
625 return (unary(op2(CHAR, NIL, NIL)));
|
|
626 case '(':
|
|
627 rtok = relex();
|
|
628 if (rtok == ')') { /* special pleading for () */
|
|
629 rtok = relex();
|
|
630 return (unary(op2(CCL, NIL,
|
|
631 /*LINTED align*/
|
|
632 (Node *)tostring((uchar *)""))));
|
|
633 }
|
|
634 np = regexp();
|
|
635 if (rtok == ')') {
|
|
636 rtok = relex();
|
|
637 return (unary(np));
|
|
638 } else {
|
|
639 ERROR "syntax error in regular expression %s at %s",
|
|
640 lastre, prestr FATAL;
|
|
641 }
|
|
642 default:
|
|
643 ERROR "illegal primary in regular expression %s at %s",
|
|
644 lastre, prestr FATAL;
|
|
645 }
|
|
646 /*NOTREACHED*/
|
|
647 return (NULL);
|
|
648 }
|
|
649
|
|
650 static Node *
|
|
651 concat(Node *np)
|
|
652 {
|
|
653 switch (rtok) {
|
|
654 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
|
|
655 return (concat(op2(CAT, np, primary())));
|
|
656 default:
|
|
657 return (np);
|
|
658 }
|
|
659 }
|
|
660
|
|
661 static Node *
|
|
662 alt(Node *np)
|
|
663 {
|
|
664 if (rtok == OR) {
|
|
665 rtok = relex();
|
|
666 return (alt(op2(OR, np, concat(primary()))));
|
|
667 }
|
|
668 return (np);
|
|
669 }
|
|
670
|
|
671 static Node *
|
|
672 unary(Node *np)
|
|
673 {
|
|
674 switch (rtok) {
|
|
675 case STAR:
|
|
676 rtok = relex();
|
|
677 return (unary(op2(STAR, np, NIL)));
|
|
678 case PLUS:
|
|
679 rtok = relex();
|
|
680 return (unary(op2(PLUS, np, NIL)));
|
|
681 case QUEST:
|
|
682 rtok = relex();
|
|
683 return (unary(op2(QUEST, np, NIL)));
|
|
684 default:
|
|
685 return (np);
|
|
686 }
|
|
687 }
|
|
688
|
|
689 static int
|
|
690 relex(void) /* lexical analyzer for reparse */
|
|
691 {
|
|
692 register int c;
|
|
693 uchar *cbuf;
|
|
694 int clen, cflag;
|
|
695
|
|
696 switch (c = *prestr++) {
|
|
697 case '|': return OR;
|
|
698 case '*': return STAR;
|
|
699 case '+': return PLUS;
|
|
700 case '?': return QUEST;
|
|
701 case '.': return DOT;
|
|
702 case '\0': prestr--; return '\0';
|
|
703 case '^':
|
|
704 case '$':
|
|
705 case '(':
|
|
706 case ')':
|
|
707 return (c);
|
|
708 case '\\':
|
|
709 if ((c = *prestr++) == 't')
|
|
710 c = '\t';
|
|
711 else if (c == 'n')
|
|
712 c = '\n';
|
|
713 else if (c == 'f')
|
|
714 c = '\f';
|
|
715 else if (c == 'r')
|
|
716 c = '\r';
|
|
717 else if (c == 'b')
|
|
718 c = '\b';
|
|
719 else if (c == '\\')
|
|
720 c = '\\';
|
|
721 else if (isdigit(c)) {
|
|
722 int n = c - '0';
|
|
723 if (isdigit(*prestr)) {
|
|
724 n = 8 * n + *prestr++ - '0';
|
|
725 if (isdigit(*prestr))
|
|
726 n = 8 * n + *prestr++ - '0';
|
|
727 }
|
|
728 c = n;
|
|
729 } /* else it's now in c */
|
|
730 rlxval = c;
|
|
731 return (CHAR);
|
|
732 default:
|
|
733 rlxval = c;
|
|
734 return (CHAR);
|
|
735 case '[':
|
|
736 clen = 0;
|
|
737 if (*prestr == '^') {
|
|
738 cflag = 1;
|
|
739 prestr++;
|
|
740 } else
|
|
741 cflag = 0;
|
|
742 init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
|
|
743 for (;;) {
|
|
744 if ((c = *prestr++) == '\\') {
|
|
745 cbuf[clen++] = '\\';
|
|
746 if ((c = *prestr++) == '\0') {
|
|
747 ERROR
|
|
748 "nonterminated character class %s", lastre FATAL;
|
|
749 }
|
|
750 cbuf[clen++] = c;
|
|
751 } else if (c == ']') {
|
|
752 cbuf[clen] = 0;
|
|
753 rlxstr = tostring(cbuf);
|
|
754 free(cbuf);
|
|
755 if (cflag == 0)
|
|
756 return (CCL);
|
|
757 else
|
|
758 return (NCCL);
|
|
759 } else if (c == '\n') {
|
|
760 ERROR "newline in character class %s...",
|
|
761 lastre FATAL;
|
|
762 } else if (c == '\0') {
|
|
763 ERROR "nonterminated character class %s",
|
|
764 lastre FATAL;
|
|
765 } else
|
|
766 cbuf[clen++] = c;
|
|
767 }
|
|
768 /*NOTREACHED*/
|
|
769 }
|
|
770 return (0);
|
|
771 }
|
|
772
|
|
773 static int
|
|
774 cgoto(fa *f, int s, int c)
|
|
775 {
|
|
776 register int i, j, k;
|
|
777 register int *p, *q;
|
|
778
|
|
779 for (i = 0; i <= f->accept; i++)
|
|
780 setvec[i] = 0;
|
|
781 setcnt = 0;
|
|
782 /* compute positions of gototab[s,c] into setvec */
|
|
783 p = f->posns[s];
|
|
784 for (i = 1; i <= *p; i++) {
|
|
785 if ((k = f->re[p[i]].ltype) != FINAL) {
|
|
786 if (k == CHAR && c == f->re[p[i]].lval ||
|
|
787 k == DOT && c != 0 && c != HAT ||
|
|
788 k == ALL && c != 0 ||
|
|
789 k == CCL &&
|
|
790 member(c, (uchar *)f->re[p[i]].lval) ||
|
|
791 k == NCCL &&
|
|
792 !member(c, (uchar *)f->re[p[i]].lval) &&
|
|
793 c != 0 && c != HAT) {
|
|
794 q = f->re[p[i]].lfollow;
|
|
795 for (j = 1; j <= *q; j++) {
|
|
796 if (setvec[q[j]] == 0) {
|
|
797 setcnt++;
|
|
798 setvec[q[j]] = 1;
|
|
799 }
|
|
800 }
|
|
801 }
|
|
802 }
|
|
803 }
|
|
804 /* determine if setvec is a previous state */
|
|
805 tmpset[0] = setcnt;
|
|
806 j = 1;
|
|
807 for (i = f->accept; i >= 0; i--)
|
|
808 if (setvec[i]) {
|
|
809 tmpset[j++] = i;
|
|
810 }
|
|
811 /* tmpset == previous state? */
|
|
812 for (i = 1; i <= f->curstat; i++) {
|
|
813 p = f->posns[i];
|
|
814 if ((k = tmpset[0]) != p[0])
|
|
815 goto different;
|
|
816 for (j = 1; j <= k; j++)
|
|
817 if (tmpset[j] != p[j])
|
|
818 goto different;
|
|
819 /* setvec is state i */
|
|
820 f->gototab[s][c] = i;
|
|
821 return (i);
|
|
822 different:;
|
|
823 }
|
|
824
|
|
825 /* add tmpset to current set of states */
|
|
826 if (f->curstat >= NSTATES-1) {
|
|
827 f->curstat = 2;
|
|
828 f->reset = 1;
|
|
829 for (i = 2; i < NSTATES; i++)
|
|
830 xfree(f->posns[i]);
|
|
831 } else
|
|
832 ++(f->curstat);
|
|
833 for (i = 0; i < NCHARS; i++)
|
|
834 f->gototab[f->curstat][i] = 0;
|
|
835 xfree(f->posns[f->curstat]);
|
|
836 if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
|
|
837 overflo("out of space in cgoto");
|
|
838
|
|
839 f->posns[f->curstat] = p;
|
|
840 f->gototab[s][c] = f->curstat;
|
|
841 for (i = 0; i <= setcnt; i++)
|
|
842 p[i] = tmpset[i];
|
|
843 if (setvec[f->accept])
|
|
844 f->out[f->curstat] = 1;
|
|
845 else
|
|
846 f->out[f->curstat] = 0;
|
|
847 return (f->curstat);
|
|
848 }
|
|
849
|
|
850 static void
|
|
851 freefa(fa *f)
|
|
852 {
|
|
853
|
|
854 register int i;
|
|
855
|
|
856 if (f == NULL)
|
|
857 return;
|
|
858 for (i = 0; i <= f->curstat; i++)
|
|
859 xfree(f->posns[i]);
|
|
860 for (i = 0; i <= f->accept; i++)
|
|
861 xfree(f->re[i].lfollow);
|
|
862 xfree(f->restr);
|
|
863 xfree(f);
|
|
864 }
|