This is Unofficial EPICS BASE Doxygen Site
nfa.c
Go to the documentation of this file.
1 /*************************************************************************\
2 * Copyright (c) 2002 The University of Chicago, as Operator of Argonne
3 * National Laboratory.
4 * Copyright (c) 2002 The Regents of the University of California, as
5 * Operator of Los Alamos National Laboratory.
6 * EPICS BASE is distributed subject to a Software License Agreement found
7 * in file LICENSE that is included with this distribution.
8 \*************************************************************************/
9 /* nfa - NFA construction routines */
10 
11 /*-
12  * Copyright (c) 1990 The Regents of the University of California.
13  * All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Vern Paxson.
17  *
18  * The United States Government has rights in this work pursuant
19  * to contract no. DE-AC03-76SF00098 between the United States
20  * Department of Energy and the University of California.
21  *
22  * Redistribution and use in source and binary forms are permitted provided
23  * that: (1) source distributions retain this entire copyright notice and
24  * comment, and (2) distributions including binaries display the following
25  * acknowledgement: ``This product includes software developed by the
26  * University of California, Berkeley and its contributors'' in the
27  * documentation or other materials provided with the distribution and in
28  * all advertising materials mentioning features or use of this software.
29  * Neither the name of the University nor the names of its contributors may
30  * be used to endorse or promote products derived from this software without
31  * specific prior written permission.
32  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
33  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
34  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
35  */
36 
37 #include "flexdef.h"
38 
39 
40 /* declare functions that have forward references */
41 
42 int dupmachine (int);
43 void mkxtion (int, int);
44 
45 
46 /* add_accept - add an accepting state to a machine
47  *
48  * synopsis
49  *
50  * add_accept( mach, accepting_number );
51  *
52  * accepting_number becomes mach's accepting number.
53  */
54 
55 void add_accept(int mach, int accepting_number)
56 {
57  /* hang the accepting number off an epsilon state. if it is associated
58  * with a state that has a non-epsilon out-transition, then the state
59  * will accept BEFORE it makes that transition, i.e., one character
60  * too soon
61  */
62 
63  if ( transchar[finalst[mach]] == SYM_EPSILON )
64  accptnum[finalst[mach]] = accepting_number;
65 
66  else
67  {
68  int astate = mkstate( SYM_EPSILON );
69  accptnum[astate] = accepting_number;
70  mach = link_machines( mach, astate );
71  }
72  }
73 
74 
75 /* copysingl - make a given number of copies of a singleton machine
76  *
77  * synopsis
78  *
79  * newsng = copysingl( singl, num );
80  *
81  * newsng - a new singleton composed of num copies of singl
82  * singl - a singleton machine
83  * num - the number of copies of singl to be present in newsng
84  */
85 
86 int copysingl(int singl, int num)
87 {
88  int copy, i;
89 
90  copy = mkstate( SYM_EPSILON );
91 
92  for ( i = 1; i <= num; ++i )
93  copy = link_machines( copy, dupmachine( singl ) );
94 
95  return ( copy );
96  }
97 
98 
99 /* dumpnfa - debugging routine to write out an nfa
100  *
101  * synopsis
102  * int state1;
103  * dumpnfa( state1 );
104  */
105 
106 void dumpnfa(int state1)
107 {
108  int sym, tsp1, tsp2, anum, ns;
109 
110  fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n",
111  state1 );
112 
113  /* we probably should loop starting at firstst[state1] and going to
114  * lastst[state1], but they're not maintained properly when we "or"
115  * all of the rules together. So we use our knowledge that the machine
116  * starts at state 1 and ends at lastnfa.
117  */
118 
119  /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
120  for ( ns = 1; ns <= lastnfa; ++ns )
121  {
122  fprintf( stderr, "state # %4d\t", ns );
123 
124  sym = transchar[ns];
125  tsp1 = trans1[ns];
126  tsp2 = trans2[ns];
127  anum = accptnum[ns];
128 
129  fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 );
130 
131  if ( anum != NIL )
132  fprintf( stderr, " [%d]", anum );
133 
134  fprintf( stderr, "\n" );
135  }
136 
137  fprintf( stderr, "********** end of dump\n" );
138  }
139 
140 
141 /* dupmachine - make a duplicate of a given machine
142  *
143  * synopsis
144  *
145  * copy = dupmachine( mach );
146  *
147  * copy - holds duplicate of mach
148  * mach - machine to be duplicated
149  *
150  * note that the copy of mach is NOT an exact duplicate; rather, all the
151  * transition states values are adjusted so that the copy is self-contained,
152  * as the original should have been.
153  *
154  * also note that the original MUST be contiguous, with its low and high
155  * states accessible by the arrays firstst and lastst
156  */
157 
158 int dupmachine(int mach)
159 {
160  int i, init, state_offset;
161  int state = 0;
162  int last = lastst[mach];
163 
164  for ( i = firstst[mach]; i <= last; ++i )
165  {
166  state = mkstate( transchar[i] );
167 
168  if ( trans1[i] != NO_TRANSITION )
169  {
170  mkxtion( finalst[state], trans1[i] + state - i );
171 
172  if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION )
173  mkxtion( finalst[state], trans2[i] + state - i );
174  }
175 
176  accptnum[state] = accptnum[i];
177  }
178 
179  if ( state == 0 )
180  flexfatal( "empty machine in dupmachine()" );
181 
182  state_offset = state - i + 1;
183 
184  init = mach + state_offset;
185  firstst[init] = firstst[mach] + state_offset;
186  finalst[init] = finalst[mach] + state_offset;
187  lastst[init] = lastst[mach] + state_offset;
188 
189  return ( init );
190  }
191 
192 
193 /* finish_rule - finish up the processing for a rule
194  *
195  * synopsis
196  *
197  * finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
198  *
199  * An accepting number is added to the given machine. If variable_trail_rule
200  * is true then the rule has trailing context and both the head and trail
201  * are variable size. Otherwise if headcnt or trailcnt is non-zero then
202  * the machine recognizes a pattern with trailing context and headcnt is
203  * the number of characters in the matched part of the pattern, or zero
204  * if the matched part has variable length. trailcnt is the number of
205  * trailing context characters in the pattern, or zero if the trailing
206  * context has variable length.
207  */
208 
209 void finish_rule(int mach, int variable_trail_rule, int headcnt, int trailcnt)
210 {
211  add_accept( mach, num_rules );
212 
213  /* we did this in new_rule(), but it often gets the wrong
214  * number because we do it before we start parsing the current rule
215  */
217 
218  /* if this is a continued action, then the line-number has
219  * already been updated, giving us the wrong number
220  */
221  if ( continued_action )
223 
224  fprintf( temp_action_file, "case %d:\n", num_rules );
225 
226  if ( variable_trail_rule )
227  {
229 
230  if ( performance_report )
231  fprintf( stderr, "Variable trailing context rule at line %d\n",
233 
235  }
236 
237  else
238  {
240 
241  if ( headcnt > 0 || trailcnt > 0 )
242  {
243  /* do trailing context magic to not match the trailing characters */
244  char *scanner_cp = "yy_c_buf_p = yy_cp";
245  char *scanner_bp = "yy_bp";
246 
247  fprintf( temp_action_file,
248  "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
249 
250  if ( headcnt > 0 )
251  fprintf( temp_action_file, "%s = %s + %d;\n",
252  scanner_cp, scanner_bp, headcnt );
253 
254  else
255  fprintf( temp_action_file,
256  "%s -= %d;\n", scanner_cp, trailcnt );
257 
258  fprintf( temp_action_file,
259  "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
260  }
261  }
262 
264  }
265 
266 
267 /* link_machines - connect two machines together
268  *
269  * synopsis
270  *
271  * new = link_machines( first, last );
272  *
273  * new - a machine constructed by connecting first to last
274  * first - the machine whose successor is to be last
275  * last - the machine whose predecessor is to be first
276  *
277  * note: this routine concatenates the machine first with the machine
278  * last to produce a machine new which will pattern-match first first
279  * and then last, and will fail if either of the sub-patterns fails.
280  * FIRST is set to new by the operation. last is unmolested.
281  */
282 
283 int link_machines(int first, int last)
284 {
285  if ( first == NIL )
286  return ( last );
287 
288  else if ( last == NIL )
289  return ( first );
290 
291  else
292  {
293  mkxtion( finalst[first], last );
294  finalst[first] = finalst[last];
295  lastst[first] = max( lastst[first], lastst[last] );
296  firstst[first] = min( firstst[first], firstst[last] );
297 
298  return ( first );
299  }
300  }
301 
302 
303 /* mark_beginning_as_normal - mark each "beginning" state in a machine
304  * as being a "normal" (i.e., not trailing context-
305  * associated) states
306  *
307  * synopsis
308  *
309  * mark_beginning_as_normal( mach )
310  *
311  * mach - machine to mark
312  *
313  * The "beginning" states are the epsilon closure of the first state
314  */
315 
317 {
318  switch ( state_type[mach] )
319  {
320  case STATE_NORMAL:
321  /* oh, we've already visited here */
322  return;
323 
325  state_type[mach] = STATE_NORMAL;
326 
327  if ( transchar[mach] == SYM_EPSILON )
328  {
329  if ( trans1[mach] != NO_TRANSITION )
331 
332  if ( trans2[mach] != NO_TRANSITION )
334  }
335  break;
336 
337  default:
338  flexerror( "bad state type in mark_beginning_as_normal()" );
339  break;
340  }
341  }
342 
343 
344 /* mkbranch - make a machine that branches to two machines
345  *
346  * synopsis
347  *
348  * branch = mkbranch( first, second );
349  *
350  * branch - a machine which matches either first's pattern or second's
351  * first, second - machines whose patterns are to be or'ed (the | operator)
352  *
353  * note that first and second are NEITHER destroyed by the operation. Also,
354  * the resulting machine CANNOT be used with any other "mk" operation except
355  * more mkbranch's. Compare with mkor()
356  */
357 
358 int mkbranch(int first, int second)
359 {
360  int eps;
361 
362  if ( first == NO_TRANSITION )
363  return ( second );
364 
365  else if ( second == NO_TRANSITION )
366  return ( first );
367 
368  eps = mkstate( SYM_EPSILON );
369 
370  mkxtion( eps, first );
371  mkxtion( eps, second );
372 
373  return ( eps );
374  }
375 
376 
377 /* mkclos - convert a machine into a closure
378  *
379  * synopsis
380  * new = mkclos( state );
381  *
382  * new - a new state which matches the closure of "state"
383  */
384 
385 int mkclos(int state)
386 {
387  return ( mkopt( mkposcl( state ) ) );
388  }
389 
390 
391 /* mkopt - make a machine optional
392  *
393  * synopsis
394  *
395  * new = mkopt( mach );
396  *
397  * new - a machine which optionally matches whatever mach matched
398  * mach - the machine to make optional
399  *
400  * notes:
401  * 1. mach must be the last machine created
402  * 2. mach is destroyed by the call
403  */
404 
405 int mkopt(int mach)
406 {
407  int eps;
408 
409  if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
410  {
411  eps = mkstate( SYM_EPSILON );
412  mach = link_machines( mach, eps );
413  }
414 
415  /* can't skimp on the following if FREE_EPSILON(mach) is true because
416  * some state interior to "mach" might point back to the beginning
417  * for a closure
418  */
419  eps = mkstate( SYM_EPSILON );
420  mach = link_machines( eps, mach );
421 
422  mkxtion( mach, finalst[mach] );
423 
424  return ( mach );
425  }
426 
427 
428 /* mkor - make a machine that matches either one of two machines
429  *
430  * synopsis
431  *
432  * new = mkor( first, second );
433  *
434  * new - a machine which matches either first's pattern or second's
435  * first, second - machines whose patterns are to be or'ed (the | operator)
436  *
437  * note that first and second are both destroyed by the operation
438  * the code is rather convoluted because an attempt is made to minimize
439  * the number of epsilon states needed
440  */
441 
442 int mkor(int first, int second)
443 {
444  int eps, orend;
445 
446  if ( first == NIL )
447  return ( second );
448 
449  else if ( second == NIL )
450  return ( first );
451 
452  else
453  {
454  /* see comment in mkopt() about why we can't use the first state
455  * of "first" or "second" if they satisfy "FREE_EPSILON"
456  */
457  eps = mkstate( SYM_EPSILON );
458 
459  first = link_machines( eps, first );
460 
461  mkxtion( first, second );
462 
463  if ( SUPER_FREE_EPSILON(finalst[first]) &&
464  accptnum[finalst[first]] == NIL )
465  {
466  orend = finalst[first];
467  mkxtion( finalst[second], orend );
468  }
469 
470  else if ( SUPER_FREE_EPSILON(finalst[second]) &&
471  accptnum[finalst[second]] == NIL )
472  {
473  orend = finalst[second];
474  mkxtion( finalst[first], orend );
475  }
476 
477  else
478  {
479  eps = mkstate( SYM_EPSILON );
480 
481  first = link_machines( first, eps );
482  orend = finalst[first];
483 
484  mkxtion( finalst[second], orend );
485  }
486  }
487 
488  finalst[first] = orend;
489  return ( first );
490  }
491 
492 
493 /* mkposcl - convert a machine into a positive closure
494  *
495  * synopsis
496  * new = mkposcl( state );
497  *
498  * new - a machine matching the positive closure of "state"
499  */
500 
501 int mkposcl(int state)
502 {
503  int eps;
504 
505  if ( SUPER_FREE_EPSILON(finalst[state]) )
506  {
507  mkxtion( finalst[state], state );
508  return ( state );
509  }
510 
511  else
512  {
513  eps = mkstate( SYM_EPSILON );
514  mkxtion( eps, state );
515  return ( link_machines( state, eps ) );
516  }
517  }
518 
519 
520 /* mkrep - make a replicated machine
521  *
522  * synopsis
523  * new = mkrep( mach, lb, ub );
524  *
525  * new - a machine that matches whatever "mach" matched from "lb"
526  * number of times to "ub" number of times
527  *
528  * note
529  * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
530  */
531 
532 int mkrep(int mach, int lb, int ub)
533 {
534  int base_mach, tail, copy, i;
535 
536  base_mach = copysingl( mach, lb - 1 );
537 
538  if ( ub == INFINITY )
539  {
540  copy = dupmachine( mach );
541  mach = link_machines( mach,
542  link_machines( base_mach, mkclos( copy ) ) );
543  }
544 
545  else
546  {
547  tail = mkstate( SYM_EPSILON );
548 
549  for ( i = lb; i < ub; ++i )
550  {
551  copy = dupmachine( mach );
552  tail = mkopt( link_machines( copy, tail ) );
553  }
554 
555  mach = link_machines( mach, link_machines( base_mach, tail ) );
556  }
557 
558  return ( mach );
559  }
560 
561 
562 /* mkstate - create a state with a transition on a given symbol
563  *
564  * synopsis
565  *
566  * state = mkstate( sym );
567  *
568  * state - a new state matching sym
569  * sym - the symbol the new state is to have an out-transition on
570  *
571  * note that this routine makes new states in ascending order through the
572  * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
573  * relies on machines being made in ascending order and that they are
574  * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
575  * that it admittedly is)
576  */
577 
578 int mkstate(int sym)
579 {
580  if ( ++lastnfa >= current_mns )
581  {
582  if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
583  lerrif( "input rules are too complicated (>= %d NFA states)",
584  current_mns );
585 
586  ++num_reallocs;
587 
597  }
598 
602  transchar[lastnfa] = sym;
605  accptnum[lastnfa] = NIL;
608 
609  /* fix up equivalence classes base on this transition. Note that any
610  * character which has its own transition gets its own equivalence class.
611  * Thus only characters which are only in character classes have a chance
612  * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b'
613  * into two different equivalence classes. "[ab]" puts them in the same
614  * equivalence class (barring other differences elsewhere in the input).
615  */
616 
617  if ( sym < 0 )
618  {
619  /* we don't have to update the equivalence classes since that was
620  * already done when the ccl was created for the first time
621  */
622  }
623 
624  else if ( sym == SYM_EPSILON )
625  ++numeps;
626 
627  else
628  {
629  if ( useecs )
630  /* map NUL's to csize */
631  mkechar( sym ? sym : csize, nextecm, ecgroup );
632  }
633 
634  return ( lastnfa );
635  }
636 
637 
638 /* mkxtion - make a transition from one state to another
639  *
640  * synopsis
641  *
642  * mkxtion( statefrom, stateto );
643  *
644  * statefrom - the state from which the transition is to be made
645  * stateto - the state to which the transition is to be made
646  */
647 
648 void mkxtion(int statefrom, int stateto)
649 {
650  if ( trans1[statefrom] == NO_TRANSITION )
651  trans1[statefrom] = stateto;
652 
653  else if ( (transchar[statefrom] != SYM_EPSILON) ||
654  (trans2[statefrom] != NO_TRANSITION) )
655  flexfatal( "found too many transitions in mkxtion()" );
656 
657  else
658  { /* second out-transition for an epsilon state */
659  ++eps2;
660  trans2[statefrom] = stateto;
661  }
662  }
663 
664 /* new_rule - initialize for a new rule
665  *
666  * synopsis
667  *
668  * new_rule();
669  *
670  * the global num_rules is incremented and the any corresponding dynamic
671  * arrays (such as rule_type[]) are grown as needed.
672  */
673 
674 void new_rule(void)
675 {
676  if ( ++num_rules >= current_max_rules )
677  {
678  ++num_reallocs;
681  rule_linenum =
683  }
684 
685  if ( num_rules > MAX_RULE )
686  lerrif( "too many rules (> %d)!", MAX_RULE );
687 
689  }
#define MAXIMUM_MNS
Definition: flexdef.h:182
#define max(x, y)
Definition: flexdef.h:81
int mkposcl(int state)
Definition: nfa.c:501
#define RULE_VARIABLE
Definition: flexdef.h:422
int continued_action
Definition: flex.c:69
int * rule_type
Definition: flex.c:78
#define MAX_RULE
Definition: flexdef.h:147
int ecgroup[CSIZE+1]
Definition: flex.c:83
int i
Definition: scan.c:967
int * firstst
Definition: flex.c:77
#define min(x, y)
Definition: flexdef.h:78
int variable_trailing_context_rules
Definition: flex.c:80
#define SYM_EPSILON
Definition: flexdef.h:192
int num_reallocs
Definition: flex.c:100
int nextecm[CSIZE+1]
Definition: flex.c:83
#define NIL
Definition: flexdef.h:153
int linenum
Definition: flex.c:71
#define STATE_TRAILING_CONTEXT
Definition: flexdef.h:414
int current_max_rules
Definition: flex.c:76
int numeps
Definition: flex.c:100
int * rule_linenum
Definition: flex.c:78
void mark_beginning_as_normal(int mach)
Definition: nfa.c:316
int mkbranch(int first, int second)
Definition: nfa.c:358
#define MAX_RULES_INCREMENT
Definition: flexdef.h:168
int mkrep(int mach, int lb, int ub)
Definition: nfa.c:532
void copy(PVValueArray< T > &pvFrom, size_t fromOffset, size_t fromStride, PVValueArray< T > &pvTo, size_t toOffset, size_t toStride, size_t count)
Copy a subarray from one scalar array to another.
int current_state_type
Definition: flex.c:79
int csize
Definition: flex.c:68
int num_rules
Definition: flex.c:76
int useecs
Definition: flex.c:67
int * finalst
Definition: flex.c:77
void finish_rule(int mach, int variable_trail_rule, int headcnt, int trailcnt)
Definition: nfa.c:209
int current_mns
Definition: flex.c:76
void lerrif(char[], int)
int dupmachine(int)
Definition: nfa.c:158
void flexerror(char[]) NORETURN
int mkor(int first, int second)
Definition: nfa.c:442
int * accptnum
Definition: flex.c:78
void dumpnfa(int state1)
Definition: nfa.c:106
#define INFINITY
Definition: flexdef.h:158
void add_accept(int mach, int accepting_number)
Definition: nfa.c:55
int performance_report
Definition: flex.c:68
int * state_type
Definition: flex.c:78
int mkstate(int sym)
Definition: nfa.c:578
int eps2
Definition: flex.c:100
int link_machines(int first, int last)
Definition: nfa.c:283
void mkxtion(int, int)
Definition: nfa.c:648
void new_rule(void)
Definition: nfa.c:674
#define reallocate_integer_array(array, size)
Definition: flexdef.h:584
#define SUPER_FREE_EPSILON(state)
Definition: flexdef.h:120
void line_directive_out(FILE *)
Definition: misc.c:479
int * assoc_rule
Definition: flex.c:78
void flexfatal(char[])
int * trans2
Definition: flex.c:77
int * trans1
Definition: flex.c:77
#define NO_TRANSITION
Definition: flexdef.h:156
#define STATE_NORMAL
Definition: flexdef.h:413
int lastnfa
Definition: flex.c:76
FILE * temp_action_file
Definition: flex.c:103
#define stderr
Definition: epicsStdio.h:32
#define MNS_INCREMENT
Definition: flexdef.h:171
int * lastst
Definition: flex.c:77
int * transchar
Definition: flex.c:77
int copysingl(int singl, int num)
Definition: nfa.c:86
int mkclos(int state)
Definition: nfa.c:385
int mkopt(int mach)
Definition: nfa.c:405
#define RULE_NORMAL
Definition: flexdef.h:421
void mkechar(int tch, int fwd[], int bck[])
Definition: ecs.c:328