1 package antlr;
2
3 * <b>SOFTWARE RIGHTS</b>
5 * <p>
6 * ANTLR 2.5.0 MageLang Institute, 1998
7 * <p>
8 * We reserve no legal rights to the ANTLR--it is fully in the
9 * public domain. An individual or company may do whatever
10 * they wish with source code distributed with ANTLR or the
11 * code generated by ANTLR, including the incorporation of
12 * ANTLR, or its output, into commerical software.
13 * <p>
14 * We encourage users to develop software with ANTLR. However,
15 * we do ask that credit is given to us for developing
16 * ANTLR. By "credit", we mean that if you use ANTLR or
17 * incorporate any source code into one of your programs
18 * (commercial product, research project, or otherwise) that
19 * you acknowledge this fact somewhere in the documentation,
20 * research report, etc... If you like ANTLR and have
21 * developed a nice tool with the output, please mention that
22 * you developed it using ANTLR. In addition, we ask that the
23 * headers remain intact in our source code. As long as these
24 * guidelines are kept, we expect to continue enhancing this
25 * system and expect to make other tools available as they are
26 * completed.
27 * <p>
28 * The ANTLR gang:
29 * @version ANTLR 2.5.0 MageLang Institute, 1998
30 * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
31 * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
32 */
33 import antlr.collections.impl.BitSet;
34 import antlr.collections.impl.Vector;
35
36 *
38 * All lookahead elements are sets of token types.
39 *
40 * @author Terence Parr, John Lilley
41 * @version 2.00a
42 * @see antlr.Grammar
43 * @see antlr.Lookahead
44 */
45 public class LLkAnalyzer implements LLkGrammarAnalyzer {
46 public boolean DEBUG_ANALYZER = false;
48 private AlternativeBlock currentBlock;
49 protected Tool tool = null;
50 protected Grammar grammar = null;
51 protected boolean lexicalAnalysis = false;
53 CharFormatter charFormatter = new JavaCharFormatter();
55
56
57
58 public LLkAnalyzer(Tool tool_) {
59 tool = tool_;
60 }
61 * @return true if the block is deterministic
63 */
64 public boolean deterministic(AlternativeBlock blk) {
65
66 int k=1; if ( DEBUG_ANALYZER ) System.out.println("deterministic("+blk+")");
68 boolean det = true;
69 int nalts = blk.alternatives.size();
70 AlternativeBlock saveCurrentBlock = currentBlock;
71 currentBlock = blk;
72
73 if ( nalts==1 ) {
77 AlternativeElement e = blk.getAlternativeAt(0).head;
78 currentBlock.alti = 0;
79 blk.getAlternativeAt(0).cache[1] = e.look(1);
80 blk.getAlternativeAt(0).lookaheadDepth = 1; currentBlock = saveCurrentBlock;
82 return true; }
84
85 outer:
86 for (int i=0; i<nalts-1; i++) {
87 currentBlock.alti = i;
88 currentBlock.analysisAlt = i; currentBlock.altj = i+1; inner:
92 for (int j=i+1; j<nalts; j++) {
94 currentBlock.altj = j;
95 if ( DEBUG_ANALYZER ) System.out.println("comparing "+i+" against alt "+j);
96 currentBlock.analysisAlt = j; k = 1;
99 Lookahead[] r = new Lookahead[grammar.maxk+1];
102 boolean haveAmbiguity;
103 do {
104 haveAmbiguity = false;
105 if ( DEBUG_ANALYZER ) System.out.println("checking depth "+k+"<="+grammar.maxk);
106 LookaheadLookahead p = getAltLookahead(blk, i, k);
107 q = getAltLookahead(blk, j, k);
108
109 if ( DEBUG_ANALYZER ) System.out.println("p is "+p.toString(",", charFormatter, grammar));
112 if ( DEBUG_ANALYZER ) System.out.println("q is "+q.toString(",", charFormatter, grammar));
113 r[k] = p.intersection(q);
115 if ( DEBUG_ANALYZER ) System.out.println("intersection at depth "+k+" is "+r[k].toString());
116 if ( !r[k].nil() ) {
117 haveAmbiguity = true;
118 k++;
119 }
120 } while ( haveAmbiguity && k <= grammar.maxk );
122
123 Alternative ai = blk.getAlternativeAt(i);
124 Alternative aj = blk.getAlternativeAt(j);
125 if ( haveAmbiguity ) {
126 det = false;
127 ai.lookaheadDepth = NONDETERMINISTIC;
128 aj.lookaheadDepth = NONDETERMINISTIC;
129
130 * lookahead is still done for code generation, but messages
132 * should not be generated when comparing against alt j.
133 * Alternatives with syn preds that are unnecessary do
134 * not result in syn pred try-blocks.
135 */
136 if ( ai.synPred != null ) {
137 if ( DEBUG_ANALYZER ) {
138 System.out.println("alt "+i+" has a syn pred");
139 }
140 }
146
147 * lookahead is still done for code generation, but messages
149 * should not be generated when comparing against alt j.
150 */
151 else if ( ai.semPred != null ) {
152 if ( DEBUG_ANALYZER ) {
153 System.out.println("alt "+i+" has a sem pred");
154 }
155 }
156
157 * can turn off this warning IFF one of the alts is empty;
159 * that is, it points immediately at the end block.
160 */
161 else if ( !blk.warnWhenFollowAmbig &&
162 (ai.head instanceof BlockEndElement ||
163 aj.head instanceof BlockEndElement) )
164 {
165 }
168
169 * then don't generate a warning.
171 */
172 else if ( !blk.generateAmbigWarnings ) {
173 }
174
175
176 else {
177 tool.errorHandler.warnAltAmbiguity(
178 grammar,
179 blk, lexicalAnalysis, grammar.maxk, r, i, j );
186 }
187 }
188 else {
189 ai.lookaheadDepth = Math.max(ai.lookaheadDepth,k);
191 aj.lookaheadDepth = Math.max(aj.lookaheadDepth,k);
192 }
193 }
194 }
195
196 currentBlock = saveCurrentBlock;
197 return det;
198 }
199 * @return true if the block is deterministic
201 */
202 public boolean deterministic(OneOrMoreBlock blk) {
203 if ( DEBUG_ANALYZER ) System.out.println("deterministic(...)+("+blk+")");
204 AlternativeBlock saveCurrentBlock = currentBlock;
205 currentBlock = blk;
206 boolean blkOk = deterministic((AlternativeBlock)blk);
207 boolean det = deterministicImpliedPath(blk);
210 currentBlock = saveCurrentBlock;
211 return det&&blkOk;
212 }
213 * @return true if the block is deterministic
215 */
216 public boolean deterministic(ZeroOrMoreBlock blk) {
217 if ( DEBUG_ANALYZER ) System.out.println("deterministic(...)*("+blk+")");
218 AlternativeBlock saveCurrentBlock = currentBlock;
219 currentBlock = blk;
220 boolean blkOk = deterministic((AlternativeBlock)blk);
221 boolean det = deterministicImpliedPath(blk);
224 currentBlock = saveCurrentBlock;
225 return det&&blkOk;
226 }
227 * @return true if the block is deterministic
229 */
230 public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
231
232 int k;
233 boolean det = true;
234 Vector alts = blk.getAlternatives();
235 int nalts = alts.size();
236 currentBlock.altj = -1;
238 if ( DEBUG_ANALYZER ) System.out.println("deterministicImpliedPath");
239 for (int i=0; i<nalts; i++) { Alternative alt = blk.getAlternativeAt(i);
241 k = 1; Lookahead[] r = new Lookahead[grammar.maxk+1];
245 boolean haveAmbiguity;
246 do {
247 haveAmbiguity = false;
248 if ( DEBUG_ANALYZER ) System.out.println("checking depth "+k+"<="+grammar.maxk);
249 Lookahead p;
250 Lookahead follow = blk.next.look(k);
251 blk.exitCache[k] = follow;
252 currentBlock.alti = i;
253 p = getAltLookahead(blk, i, k);
254
255 if ( DEBUG_ANALYZER ) System.out.println("follow is "+follow.toString(",", charFormatter, grammar));
256 if ( DEBUG_ANALYZER ) System.out.println("p is "+p.toString(",", charFormatter, grammar));
257 r[k] = follow.intersection(p);
259 if ( DEBUG_ANALYZER ) System.out.println("intersection at depth "+k+" is "+r[k]);
260 if ( !r[k].nil() ) {
261 haveAmbiguity = true;
262 k++;
263 }
264 } while ( haveAmbiguity && k <= grammar.maxk );
266
267 if ( haveAmbiguity )
268 {
269 det = false;
270 alt.lookaheadDepth = NONDETERMINISTIC;
271 blk.exitLookaheadDepth = NONDETERMINISTIC;
272
273 * can turn off this warning.
275 */
276 if ( !blk.warnWhenFollowAmbig ) {
277 }
278
279 * then don't generate a warning.
281 */
282 else if ( !blk.generateAmbigWarnings ) {
283 }
284
285 else {
287 tool.errorHandler.warnAltExitAmbiguity(
288 grammar,
289 blk, lexicalAnalysis, grammar.maxk, r, i );
295 }
296 }
297 else {
298 alt.lookaheadDepth = Math.max(alt.lookaheadDepth,k);
299 blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth,k);
300 }
301 }
302 return det;
303 }
304 * the rule associated witht the FOLLOW block.
306 */
307 public Lookahead FOLLOW(int k, RuleEndElement end) {
308 RuleBlock rb = (RuleBlock)end.block;
310 String rule;
312 if (lexicalAnalysis) {
313 rule = CodeGenerator.lexerRuleName(rb.getRuleName());
314 } else {
315 rule = rb.getRuleName();
316 }
317
318 if ( DEBUG_ANALYZER ) System.out.println("FOLLOW("+k+","+rule+")");
319
320 if ( end.lock[k] ) {
322 if ( DEBUG_ANALYZER ) System.out.println("FOLLOW cycle to "+rule);
323 return new Lookahead(rule);
324 }
325
326 if ( end.cache[k]!=null ) {
328 if ( DEBUG_ANALYZER ) {
329 System.out.println("cache entry FOLLOW("+k+") for "+rule+": "+end.cache[k].toString(",", charFormatter, grammar));
330 }
331 if ( end.cache[k].cycle==null ) {
333 return (Lookahead)end.cache[k].clone();
334 }
335 RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
337 RuleEndElement re = rs.getBlock().endNode;
338 if ( re.cache[k]==null ) {
341 return (Lookahead)end.cache[k].clone();
343 }
344 else {
345 end.cache[k] = re.cache[k];
351 return (Lookahead)re.cache[k].clone();
353 }
354 }
355
356 end.lock[k] = true;
358 Lookahead p = new Lookahead();
359
360 RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
361
362 for (int i=0; i<rs.numReferences(); i++) {
364 RuleRefElement rr = rs.getReference(i);
365 if ( DEBUG_ANALYZER ) System.out.println("next["+rule+"] is "+rr.next.toString());
366 Lookahead q = rr.next.look(k);
367 if ( DEBUG_ANALYZER ) System.out.println("FIRST of next["+rule+"] ptr is "+q.toString());
368 * this end block, you have a cycle to yourself. Remove the
370 * cycle indication--the lookahead is complete.
371 */
372 if ( q.cycle!=null && q.cycle.equals(rule) ) {
373 q.cycle = null; }
375 p.combineWith(q);
377 if ( DEBUG_ANALYZER ) System.out.println("combined FOLLOW["+rule+"] is "+p.toString());
378 }
379
380 end.lock[k] = false;
382 if ( p.fset.nil() && p.cycle==null ) {
385 if ( grammar instanceof TreeWalkerGrammar ) {
386 p.fset.add(Token.NULL_TREE_LOOKAHEAD);
389 }
390 else if ( grammar instanceof LexerGrammar ) {
391 p.setEpsilon();
398 }
399 else {
400 p.fset.add(Token.EOF_TYPE);
401 }
402 }
403
404 if ( DEBUG_ANALYZER ) {
406 System.out.println("saving FOLLOW("+k+") for "+rule+": "+p.toString(",", charFormatter, grammar));
407 }
408 end.cache[k] = (Lookahead)p.clone();
409
410 return p;
411 }
412 private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
413 Lookahead p;
414 Alternative a = blk.getAlternativeAt(alt);
415 AlternativeElement e = a.head;
416 if ( a.cache[k]==null ) {
418 p = e.look(k);
419 a.cache[k] = p;
420 }
421 else {
422 p = a.cache[k];
423 }
424 return p;
425 }
426
427 public Lookahead look(int k, ActionElement action) {
428 if ( DEBUG_ANALYZER ) System.out.println("lookAction("+k+","+action+")");
429 return action.next.look(k);
430 }
431
432 public Lookahead look(int k, AlternativeBlock blk) {
433 if ( DEBUG_ANALYZER ) System.out.println("lookAltBlk(" + k + "," + blk + ")");
434 AlternativeBlock saveCurrentBlock = currentBlock;
435 currentBlock = blk;
436 Lookahead p = new Lookahead();
437 for (int i=0; i<blk.alternatives.size(); i++) {
438 if ( DEBUG_ANALYZER ) System.out.println("alt " + i);
439 currentBlock.analysisAlt = i;
441 AlternativeElement elem = blk.getAlternativeAt(i).head;
442 Lookahead q = elem.look(k);
443 p.combineWith(q);
444 }
445 if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
446 if (lexicalAnalysis) {
448 BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
449 int[] elems = p.fset.toArray();
450 for (int j = 0; j < elems.length; j++) {
451 b.remove(elems[j]);
452 }
453 p.fset = b;
454 } else {
455 p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
456 }
457 }
458 currentBlock = saveCurrentBlock;
459 return p;
460 }
461 * what begins the associated loop unless the
463 * node is locked.
464 * <p>
465 * if we hit the end of a loop, we have to include
466 * what tokens can begin the loop as well. If the start
467 * node is locked, then we simply found an empty path
468 * through this subrule while analyzing it. If the
469 * start node is not locked, then this node was hit
470 * during a FOLLOW operation and the FIRST of this
471 * block must be included in that lookahead computation.
472 */
473 public Lookahead look(int k, BlockEndElement end) {
474 if ( DEBUG_ANALYZER ) System.out.println("lookBlockEnd("+k+")");
475 if ( end.lock[k] ) {
476 return new Lookahead();
481 }
482 end.lock[k] = true;
483 Lookahead p;
484
485
486 if ( end.block instanceof ZeroOrMoreBlock ||
487 end.block instanceof OneOrMoreBlock ) {
488 p = look(k, end.block);
490 }
491 else {
492 p = new Lookahead();
493 }
494
495 * of what surrounds them. For example, A #(B C) D results in
497 * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
498 * indicates that nothing can follow the last node of tree #(B C)
499 */
500 if (end.block instanceof TreeElement) {
501 p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
502 }
503
504 * We cannot accurately say what would be matched following a
506 * syntactic predicate (you MIGHT be ok if you said it was whatever
507 * followed the alternative predicted by the predicate). Hence,
508 * (like end-of-token) we return Epsilon to indicate "unknown
509 * lookahead."
510 */
511 else if ( end.block instanceof SynPredBlock ) {
512 p.setEpsilon();
513 }
514
515 else {
517 Lookahead q = end.block.next.look(k);
518 p.combineWith(q);
519 }
520
521 end.lock[k] = false;
522 return p;
523 }
524 * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
526 * <p>
527 * If the atom has the <tt>not</tt> flag on, then
528 * create the set complement of the tokenType
529 * which is the set of all characters referenced
530 * in the grammar with this char turned off.
531 * Also remove characters from the set that
532 * are currently allocated for predicting
533 * previous alternatives. This avoids ambiguity
534 * messages and is more properly what is meant.
535 * ( 'a' | ~'a' ) implies that the ~'a' is the
536 * "else" clause.
537 * <p>
538 * NOTE: we do <b>NOT</b> include exit path in
539 * the exclusion set. E.g.,
540 * ( 'a' | ~'a' )* 'b'
541 * should exit upon seeing a 'b' during the loop.
542 */
543 public Lookahead look(int k, CharLiteralElement atom) {
544 if ( DEBUG_ANALYZER ) System.out.println("lookCharLiteral("+k+","+atom+")");
545 if ( k>1 ) {
547 return atom.next.look(k-1);
548 }
549 if ( lexicalAnalysis) {
550 if (atom.not) {
551 BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
552 if ( DEBUG_ANALYZER ) System.out.println("charVocab is "+b.toString());
553 removeCompetingPredictionSets(b, atom);
555 if ( DEBUG_ANALYZER ) System.out.println("charVocab after removal of prior alt lookahead "+b.toString());
556 b.clear(atom.tokenType);
558 return new Lookahead(b);
559 } else {
560 return Lookahead.of(atom.tokenType);
561 }
562 }
563 else {
564 tool.panic("Character literal reference found in parser");
566 return Lookahead.of(atom.tokenType);
568 }
569 }
570 public Lookahead look(int k, CharRangeElement r) {
571 if ( DEBUG_ANALYZER ) System.out.println("lookCharRange("+k+","+r+")");
572 if ( k>1 ) {
574 return r.next.look(k-1);
575 }
576 BitSet p = BitSet.of(r.begin);
577 for (int i=r.begin+1; i<=r.end; i++) {
578 p.add(i);
579 }
580 return new Lookahead(p);
581 }
582 public Lookahead look(int k, GrammarAtom atom) {
583 if ( DEBUG_ANALYZER ) System.out.println("look("+k+","+atom+"["+atom.tokenType+"])");
584
585 if ( lexicalAnalysis ) {
586 tool.panic("token reference found in lexer");
588 }
589 if ( k>1 ) {
591 return atom.next.look(k-1);
592 }
593 Lookahead l = Lookahead.of(atom.tokenType);
594 if (atom.not) {
595 int maxToken = grammar.tokenManager.maxTokenType();
597 l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
598 removeCompetingPredictionSets(l.fset, atom);
600 }
601 return l;
602 }
603 * all alternatives and, if an empty path is found, the lookahead
605 * of what follows the block.
606 */
607 public Lookahead look(int k, OneOrMoreBlock blk) {
608 if ( DEBUG_ANALYZER ) System.out.println("look+"+k+","+blk+")");
609 Lookahead p = look(k, (AlternativeBlock)blk);
610 return p;
611 }
612 * Lock the node so that no other computation may come back
614 * on itself--infinite loop. This also implies infinite left-recursion
615 * in the grammar (or an error in this algorithm ;)).
616 */
617 public Lookahead look(int k, RuleBlock blk) {
618 if ( DEBUG_ANALYZER ) System.out.println("lookRuleBlk("+k+","+blk+")");
619 Lookahead p = look(k, (AlternativeBlock)blk);
620 return p;
621 }
622 * <p>
624 * Lexical rules never compute follow. They set epsilon and
625 * the code generator gens code to check for any character.
626 * The code generator must remove the tokens used to predict
627 * any previous alts in the same block.
628 * <p>
629 * When the last node of a rule is reached and noFOLLOW,
630 * it implies that a "local" FOLLOW will be computed
631 * after this call. I.e.,
632 * <pre>
633 * a : b A;
634 * b : B | ;
635 * c : b C;
636 * </pre>
637 * Here, when computing the look of rule b from rule a,
638 * we want only {B,EPSILON_TYPE} so that look(b A) will
639 * be {B,A} not {B,A,C}.
640 * <p>
641 * if the end block is not locked and the FOLLOW is
642 * wanted, the algorithm must compute the lookahead
643 * of what follows references to this rule. If
644 * end block is locked, FOLLOW will return an empty set
645 * with a cycle to the rule associated with this end block.
646 */
647 public Lookahead look(int k, RuleEndElement end) {
648 if ( DEBUG_ANALYZER )
649 System.out.println("lookRuleBlockEnd("+k+"); noFOLLOW="+end.noFOLLOW+"; lock is "+end.lock[k]);
650 if ( end.noFOLLOW ) {
651 Lookahead p = new Lookahead();
652 p.setEpsilon();
653 p.epsilonDepth = BitSet.of(k);
654 return p;
655 }
656 Lookahead p = FOLLOW(k,end);
657 return p;
658 }
659 *
661 * <p>
662 * When computing ruleref lookahead, we don't want the FOLLOW
663 * computation done if an empty path exists for the rule.
664 * The FOLLOW is too loose of a set...we want only to
665 * include the "local" FOLLOW or what can follow this
666 * particular ref to the node. In other words, we use
667 * context information to reduce the complexity of the
668 * analysis and strengthen the parser.
669 *
670 * The noFOLLOW flag is used as a means of restricting
671 * the FOLLOW to a "local" FOLLOW. This variable is
672 * orthogonal to the <tt>lock</tt> variable that prevents
673 * infinite recursion. noFOLLOW does not care about what k is.
674 */
675 public Lookahead look(int k, RuleRefElement rr) {
676 if ( DEBUG_ANALYZER ) System.out.println("lookRuleRef("+k+","+rr+")");
677 RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
678 if ( rs==null || !rs.defined ) {
679 tool.error("no definition of rule "+rr.targetRule,rr.getLine());
680 return new Lookahead();
681 }
682 RuleBlock rb = rs.getBlock();
683 RuleEndElement end = rb.endNode;
684 boolean saveEnd = end.noFOLLOW;
685 end.noFOLLOW = true;
686 Lookahead p = look(k, rr.targetRule);
688 if ( DEBUG_ANALYZER ) System.out.println("back from rule ref to "+rr.targetRule);
689 end.noFOLLOW = saveEnd;
691
692 if ( p.cycle!=null ) {
694 tool.error("infinite recursion to rule "+p.cycle+" from rule "+
695 rr.enclosingRuleName, rr.getLine());
696 }
697
698 if ( p.containsEpsilon() ) {
700 if ( DEBUG_ANALYZER )
701 System.out.println("rule ref to "+
702 rr.targetRule+" has eps, depth: "+p.epsilonDepth);
703
704 p.resetEpsilon();
706
708 int[] depths = p.epsilonDepth.toArray();
710 p.epsilonDepth = null; for (int i=0; i<depths.length; i++) {
712 int rk = k - (k-depths[i]);
713 Lookahead q = rr.next.look(rk); p.combineWith(q);
715 }
716 }
719
720 return p;
721 }
722 public Lookahead look(int k, StringLiteralElement atom) {
723 if ( DEBUG_ANALYZER ) System.out.println("lookStringLiteral("+k+","+atom+")");
724 if ( lexicalAnalysis ) {
725 if ( k > atom.processedAtomText.length() ) {
727 return atom.next.look(k - atom.processedAtomText.length());
728 }
729 else {
730 return Lookahead.of(atom.processedAtomText.charAt(k-1));
732 }
733 }
734 else {
735 if ( k>1 ) {
737 return atom.next.look(k-1);
738 }
739 Lookahead l = Lookahead.of(atom.tokenType);
740 if (atom.not) {
741 int maxToken = grammar.tokenManager.maxTokenType();
743 l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
744 }
745 return l;
746 }
747 }
748 * what follows the block. By definition, the syntactic
750 * predicate block defies static analysis (you want to try it
751 * out at run-time). The LOOK of (a)=>A B is A for LL(1)
752 * ### is this even called?
753 */
754 public Lookahead look(int k, SynPredBlock blk) {
755 if ( DEBUG_ANALYZER ) System.out.println("look=>("+k+","+blk+")");
756 return blk.next.look(k);
757 }
758 public Lookahead look(int k, TokenRangeElement r) {
759 if ( DEBUG_ANALYZER ) System.out.println("lookTokenRange("+k+","+r+")");
760 if ( k>1 ) {
762 return r.next.look(k-1);
763 }
764 BitSet p = BitSet.of(r.begin);
765 for (int i=r.begin+1; i<=r.end; i++) {
766 p.add(i);
767 }
768 return new Lookahead(p);
769 }
770 public Lookahead look(int k, TreeElement t) {
771 if ( DEBUG_ANALYZER ) System.out.println("look("+k+","+t.root+"["+t.root.tokenType+"])");
772 if ( k>1 ) {
773 return t.next.look(k-1);
774 }
775 Lookahead l = Lookahead.of(t.root.tokenType);
776 if (t.root.not) {
777 int maxToken = grammar.tokenManager.maxTokenType();
779 l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
780 }
781 return l;
782 }
783 public Lookahead look(int k, WildcardElement wc) {
784 if ( DEBUG_ANALYZER ) System.out.println("look(" + k + "," + wc + ")");
785
786 if ( k>1 ) {
788 return wc.next.look(k-1);
789 }
790
791 BitSet b;
792 if ( lexicalAnalysis ) {
793 b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
795 }
796 else {
797 b = new BitSet(1);
798 int maxToken = grammar.tokenManager.maxTokenType();
800 b.notInPlace(Token.MIN_USER_TYPE, maxToken);
801 }
802
803 removeCompetingPredictionSets(b, wc);
805
806 return new Lookahead(b);
807 }
808 * follow the loop.
810 */
811 public Lookahead look(int k, ZeroOrMoreBlock blk) {
812 if ( DEBUG_ANALYZER ) System.out.println("look*("+k+","+blk+")");
813 Lookahead p = look(k, (AlternativeBlock)blk);
814 Lookahead q = blk.next.look(k);
815 p.combineWith(q);
816 return p;
817 }
818 * If the lookahead returns with epsilon, at least one epsilon
820 * path exists (one that consumes no tokens). The noFOLLOW
821 * flag being set for this endruleblk, indicates that the
822 * a rule ref invoked this rule.
823 *
824 * Currently only look(RuleRef) calls this. There is no need
825 * for the code generator to call this.
826 */
827 public Lookahead look(int k, String rule) {
828 if ( DEBUG_ANALYZER ) System.out.println("lookRuleName("+k+","+rule+")");
829 RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
830 RuleBlock rb = rs.getBlock();
831
832 if ( rb.lock[k] ) {
833 if ( DEBUG_ANALYZER )
834 System.out.println("infinite recursion to rule "+rb.getRuleName());
835 return new Lookahead(rule);
836 }
837
838 if ( rb.cache[k]!=null ) {
840 if ( DEBUG_ANALYZER ) {
841 System.out.println("found depth "+k+" result in FIRST "+rule+" cache: "+
842 rb.cache[k].toString(",", charFormatter, grammar));
843 }
844 return (Lookahead)rb.cache[k].clone();
845 }
846
847 rb.lock[k] = true;
848 Lookahead p = look(k, (RuleBlock)rb);
849 rb.lock[k] = false;
850
851 rb.cache[k] = (Lookahead)p.clone();
853 if ( DEBUG_ANALYZER ) {
854 System.out.println("saving depth "+k+" result in FIRST "+rule+" cache: "+
855 rb.cache[k].toString(",", charFormatter, grammar));
856 }
857 return p;
858 }
859 * and follow set, but *only* if this element is the first element
861 * of the alternative. The class members currenBlock and
862 * currentBlock.analysisAlt must be set correctly.
863 * @param b The prediction bitset to be modified
864 * @el The element of interest
865 */
866 private void removeCompetingPredictionSets(BitSet b, AlternativeElement el)
867 {
868 if (el == currentBlock.getAlternativeAt(currentBlock.analysisAlt).head) {
871 for (int i=0; i<currentBlock.analysisAlt; i++) {
872 AlternativeElement e = currentBlock.getAlternativeAt(i).head;
873 b.subtractInPlace( e.look(1).fset );
874 }
875 }
876 }
877
878 private void reset() {
879 grammar = null;
880 DEBUG_ANALYZER = false;
881 currentBlock = null;
882 lexicalAnalysis = false;
883 }
884
885 public void setGrammar(Grammar g) {
886 if (grammar != null) {
887 reset();
888 }
889 grammar = g;
890
891 lexicalAnalysis = (grammar instanceof LexerGrammar);
893 DEBUG_ANALYZER = grammar.analyzerDebug;
894 }
895 public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer)
896 {
897 if (
898 blk instanceof ZeroOrMoreBlock ||
899 blk instanceof OneOrMoreBlock ||
900 blk instanceof SynPredBlock
901 ) {
902 return false;
903 }
904 if (blk.alternatives.size() == 0) {
906 return false;
907 }
908 for (int i = 0; i < blk.alternatives.size(); i++) {
911 Alternative alt = blk.getAlternativeAt(i);
912 if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
914 return false;
915 }
916 AlternativeElement elt = alt.head;
918 if (
919 !(
920 elt instanceof CharLiteralElement ||
921 elt instanceof TokenRefElement ||
922 elt instanceof CharRangeElement ||
923 elt instanceof TokenRangeElement ||
924 (elt instanceof StringLiteralElement && !forLexer)
925 ) ||
926 !(elt.next instanceof BlockEndElement) ||
927 elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
928 ) {
929 return false;
930 }
931 }
932 return true;
933 }
934 }
935