1	package antlr;
2	
3	/**
4	 * <b>SOFTWARE RIGHTS</b>
5	 * <p>
6	 * ANTLR 2.5.0 MageLang Institute, 1998
7	 * <p>
8	 * We reserve no legal rights to the ANTLR--it is fully in the
9	 * public domain. An individual or company may do whatever
10	 * they wish with source code distributed with ANTLR or the
11	 * code generated by ANTLR, including the incorporation of
12	 * ANTLR, or its output, into commerical software.
13	 * <p>
14	 * We encourage users to develop software with ANTLR. However,
15	 * we do ask that credit is given to us for developing
16	 * ANTLR. By "credit", we mean that if you use ANTLR or
17	 * incorporate any source code into one of your programs
18	 * (commercial product, research project, or otherwise) that
19	 * you acknowledge this fact somewhere in the documentation,
20	 * research report, etc... If you like ANTLR and have
21	 * developed a nice tool with the output, please mention that
22	 * you developed it using ANTLR. In addition, we ask that the
23	 * headers remain intact in our source code. As long as these
24	 * guidelines are kept, we expect to continue enhancing this
25	 * system and expect to make other tools available as they are
26	 * completed.
27	 * <p>
28	 * The ANTLR gang:
29	 * @version ANTLR 2.5.0 MageLang Institute, 1998
30	 * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
31	 * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
32	 */
33	import antlr.collections.impl.BitSet;
34	import antlr.collections.impl.Vector;
35	
36	/**A linear-approximate LL(k) grammar analzyer.
37	 *
38	 * All lookahead elements are sets of token types.
39	 *
40	 * @author  Terence Parr, John Lilley
41	 * @version 2.00a
42	 * @see     antlr.Grammar
43	 * @see     antlr.Lookahead
44	 */
45	public class LLkAnalyzer implements LLkGrammarAnalyzer {
46		// Set "analyzerDebug" to true
47		public boolean DEBUG_ANALYZER = false;
48		private AlternativeBlock currentBlock;
49		protected Tool tool = null;
50		protected Grammar grammar = null;
51		// True if analyzing a lexical grammar
52		protected boolean lexicalAnalysis = false;
53		// Used for formatting bit sets in default (Java) format
54		CharFormatter charFormatter = new JavaCharFormatter();
55	
56	
57		/** Create an LLk analyzer */
58		public LLkAnalyzer(Tool tool_) {
59			tool = tool_;
60		}
61		/**Is this block of alternatives LL(k)?  Fill in alternative cache for this block.
62		 * @return true if the block is deterministic
63		 */
64		public boolean deterministic(AlternativeBlock blk) {
65			/** The lookahead depth for this decision */
66			int k=1;	// start at k=1
67			if ( DEBUG_ANALYZER ) System.out.println("deterministic("+blk+")");
68			boolean det = true;
69			int nalts = blk.alternatives.size();
70			AlternativeBlock saveCurrentBlock = currentBlock;
71			currentBlock = blk;
72	
73			// SPECIAL CASE: only one alternative.  We don't need to check the
74			// determinism, but other code expects the lookahead cache to be
75			// set for the single alt.
76			if ( nalts==1 ) {
77				AlternativeElement e = blk.getAlternativeAt(0).head;
78				currentBlock.alti = 0;
79				blk.getAlternativeAt(0).cache[1] = e.look(1);
80				blk.getAlternativeAt(0).lookaheadDepth = 1;	// set lookahead to LL(1)
81				currentBlock = saveCurrentBlock;
82				return true;	// always deterministic for one alt
83			}
84	
85	outer:
86			for (int i=0; i<nalts-1; i++) {
87				currentBlock.alti = i;
88				currentBlock.analysisAlt = i;	// which alt are we analyzing?
89				currentBlock.altj = i+1;		// reset this alt.  Haven't computed yet,
90												// but we need the alt number.
91	inner:
92				// compare against other alternatives with lookahead depth k
93				for (int j=i+1; j<nalts; j++) {
94					currentBlock.altj = j;
95					if ( DEBUG_ANALYZER ) System.out.println("comparing "+i+" against alt "+j);
96					currentBlock.analysisAlt = j;	// which alt are we analyzing?
97					k = 1;	// always attempt minimum lookahead possible.
98					
99					// check to see if there is a lookahead depth that distinguishes
100					// between alternatives i and j.
101					Lookahead[] r = new Lookahead[grammar.maxk+1];
102					boolean haveAmbiguity;
103					do {
104						haveAmbiguity = false;
105						if ( DEBUG_ANALYZER ) System.out.println("checking depth "+k+"<="+grammar.maxk);
106						LookaheadLookahead		p = getAltLookahead(blk, i, k);
107						q = getAltLookahead(blk, j, k);
108	
109						// compare LOOK(alt i) with LOOK(alt j).  Is there an intersection?
110						// Lookahead must be disjoint.
111						if ( DEBUG_ANALYZER ) System.out.println("p is "+p.toString(",", charFormatter, grammar));
112						if ( DEBUG_ANALYZER ) System.out.println("q is "+q.toString(",", charFormatter, grammar));
113						// r[i] = p.fset.and(q.fset);
114						r[k] = p.intersection(q);
115						if ( DEBUG_ANALYZER ) System.out.println("intersection at depth "+k+" is "+r[k].toString());
116						if ( !r[k].nil() ) {
117							haveAmbiguity = true;
118							k++;
119						}
120						// go until no more lookahead to use or no intersection
121					} while ( haveAmbiguity && k <= grammar.maxk );
122	
123					Alternative ai = blk.getAlternativeAt(i);
124					Alternative aj = blk.getAlternativeAt(j);
125					if ( haveAmbiguity ) {
126						det = false;
127						ai.lookaheadDepth = NONDETERMINISTIC;
128						aj.lookaheadDepth = NONDETERMINISTIC;
129	
130						/* if ith alt starts with a syntactic predicate, computing the
131						 * lookahead is still done for code generation, but messages
132						 * should not be generated when comparing against alt j.
133						 * Alternatives with syn preds that are unnecessary do
134						 * not result in syn pred try-blocks.
135						 */
136						if ( ai.synPred != null ) {
137							if ( DEBUG_ANALYZER ) {
138								System.out.println("alt "+i+" has a syn pred");
139							}
140							// The alt with the (...)=> block is nondeterministic for sure.
141							// If the (...)=> conflicts with alt j, j is nondeterministic.
142							// This prevents alt j from being in any switch statements.
143							// move on to next alternative=>no possible ambiguity!
144	//						continue inner;
145						}
146	
147						/* if ith alt starts with a semantic predicate, computing the
148						 * lookahead is still done for code generation, but messages
149						 * should not be generated when comparing against alt j.
150						 */
151						else if ( ai.semPred != null ) {
152							if ( DEBUG_ANALYZER ) {
153								System.out.println("alt "+i+" has a sem pred");
154							}
155						}
156						
157						/* If the user specified warnWhenFollowAmbig=false, then we
158						 * can turn off this warning IFF one of the alts is empty;
159						 * that is, it points immediately at the end block.
160						 */
161						else if ( !blk.warnWhenFollowAmbig &&
162									 (ai.head instanceof BlockEndElement ||
163								    aj.head instanceof BlockEndElement) )
164						{
165							// System.out.println("ai.head pts to "+ai.head.getClass());
166							// System.out.println("aj.head pts to "+aj.head.getClass());
167						}
168	
169						/* If they have the generateAmbigWarnings option off for the block
170						 * then don't generate a warning.
171						 */
172						else if ( !blk.generateAmbigWarnings ) {
173						}
174							
175						/* We have no choice, but to report a nondetermism */
176						else {
177							tool.errorHandler.warnAltAmbiguity(
178								grammar,
179								blk,			// the block
180								lexicalAnalysis,// true if lexical
181								grammar.maxk,	// depth of ambiguity
182								r,				// set of linear ambiguities
183								i,				// first ambiguous alternative
184								j				// second ambiguous alternative
185							);
186						}
187					}
188					else {
189						// a lookahead depth, k, was found where i and j do not conflict
190						ai.lookaheadDepth = Math.max(ai.lookaheadDepth,k);
191						aj.lookaheadDepth = Math.max(aj.lookaheadDepth,k);
192					}
193				}
194			}
195	
196			currentBlock = saveCurrentBlock;
197			return det;
198		}
199		/**Is (...)+ block LL(1)?  Fill in alternative cache for this block.
200		 * @return true if the block is deterministic
201		 */
202		public boolean deterministic(OneOrMoreBlock blk) {
203			if ( DEBUG_ANALYZER ) System.out.println("deterministic(...)+("+blk+")");
204			AlternativeBlock saveCurrentBlock = currentBlock;
205			currentBlock = blk;
206			boolean blkOk = deterministic((AlternativeBlock)blk);
207			// block has been checked, now check that what follows does not conflict
208			// with the lookahead of the (...)+ block.
209			boolean det = deterministicImpliedPath(blk);
210			currentBlock = saveCurrentBlock;
211			return det&&blkOk;
212		}
213		/**Is (...)* block LL(1)?  Fill in alternative cache for this block.
214		 * @return true if the block is deterministic
215		 */
216		public boolean deterministic(ZeroOrMoreBlock blk) {
217			if ( DEBUG_ANALYZER ) System.out.println("deterministic(...)*("+blk+")");
218			AlternativeBlock saveCurrentBlock = currentBlock;
219			currentBlock = blk;
220			boolean blkOk = deterministic((AlternativeBlock)blk);
221			// block has been checked, now check that what follows does not conflict
222			// with the lookahead of the (...)* block.
223			boolean det = deterministicImpliedPath(blk);
224			currentBlock = saveCurrentBlock;
225			return det&&blkOk;
226		}
227		/**Is this (...)* or (...)+ block LL(k)?
228		 * @return true if the block is deterministic
229		 */
230		public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
231			/** The lookahead depth for this decision considering implied exit path */
232			int k;
233			boolean det = true;
234			Vector alts = blk.getAlternatives();
235			int nalts = alts.size();
236			currentBlock.altj = -1;	// comparing against implicit optional/exit alt
237	
238			if ( DEBUG_ANALYZER ) System.out.println("deterministicImpliedPath");
239			for (int i=0; i<nalts; i++) {		// check follow against all alts
240				Alternative alt = blk.getAlternativeAt(i);
241				k = 1;							// assume eac alt is LL(1) with exit branch
242				// check to see if there is a lookahead depth that distinguishes
243				// between alternative i and the exit branch.
244				Lookahead[] r = new Lookahead[grammar.maxk+1];
245				boolean haveAmbiguity;
246				do {
247					haveAmbiguity = false;
248					if ( DEBUG_ANALYZER ) System.out.println("checking depth "+k+"<="+grammar.maxk);
249					Lookahead p;
250					Lookahead follow = blk.next.look(k);
251					blk.exitCache[k] = follow;
252					currentBlock.alti = i;
253					p = getAltLookahead(blk, i, k);
254	
255					if ( DEBUG_ANALYZER ) System.out.println("follow is "+follow.toString(",", charFormatter, grammar));
256					if ( DEBUG_ANALYZER ) System.out.println("p is "+p.toString(",", charFormatter, grammar));
257					//r[k] = follow.fset.and(p.fset);
258					r[k] = follow.intersection(p);
259					if ( DEBUG_ANALYZER ) System.out.println("intersection at depth "+k+" is "+r[k]);
260					if ( !r[k].nil() ) {
261						haveAmbiguity = true;
262						k++;
263					}
264					// go until no more lookahead to use or no intersection
265				} while ( haveAmbiguity && k <= grammar.maxk );
266	
267				if ( haveAmbiguity )
268				{
269					det = false;
270					alt.lookaheadDepth = NONDETERMINISTIC;
271					blk.exitLookaheadDepth = NONDETERMINISTIC;
272	
273					/* If the user specified warnWhenFollowAmbig=false, then we
274					 * can turn off this warning.
275					 */
276					if ( !blk.warnWhenFollowAmbig ) {
277					}
278	
279					/* If they have the generateAmbigWarnings option off for the block
280					 * then don't generate a warning.
281					 */
282					else if ( !blk.generateAmbigWarnings ) {
283					}
284	
285					// no choice but to generate a warning
286					else {
287						tool.errorHandler.warnAltExitAmbiguity(
288							grammar,
289							blk,			// the block
290							lexicalAnalysis,// true if lexical
291							grammar.maxk,	// depth of ambiguity
292							r,				// set of linear ambiguities
293							i				// ambiguous alternative
294						);
295					}		
296				}
297				else {
298					alt.lookaheadDepth = Math.max(alt.lookaheadDepth,k);
299					blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth,k);
300				}
301			}
302			return det;
303		}
304		/**Compute the lookahead set of whatever follows references to
305		 * the rule associated witht the FOLLOW block.
306		 */
307		public Lookahead FOLLOW(int k, RuleEndElement end) {
308			// what rule are we trying to compute FOLLOW of?
309			RuleBlock rb = (RuleBlock)end.block;
310			// rule name is different in lexer
311			String rule;
312			if (lexicalAnalysis) {
313				rule = CodeGenerator.lexerRuleName(rb.getRuleName());
314			} else {
315				rule = rb.getRuleName();
316			}
317	
318			if ( DEBUG_ANALYZER ) System.out.println("FOLLOW("+k+","+rule+")");
319	
320			// are we in the midst of computing this FOLLOW already?
321			if ( end.lock[k] ) {
322				if ( DEBUG_ANALYZER ) System.out.println("FOLLOW cycle to "+rule);
323				return new Lookahead(rule);
324			}
325	
326			// Check to see if there is cached value
327			if ( end.cache[k]!=null ) {
328				if ( DEBUG_ANALYZER ) {
329					System.out.println("cache entry FOLLOW("+k+") for "+rule+": "+end.cache[k].toString(",", charFormatter, grammar));
330				}
331				// if the cache is a complete computation then simply return entry 
332				if ( end.cache[k].cycle==null ) {
333					return (Lookahead)end.cache[k].clone();
334				}
335				// A cache entry exists, but it is a reference to a cyclic computation.
336				RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
337				RuleEndElement re = rs.getBlock().endNode;
338				// The other entry may not exist because it is still being
339				// computed when this cycle cache entry was found here.
340				if ( re.cache[k]==null ) {
341					// return the cycle...that's all we can do at the moment.
342					return (Lookahead)end.cache[k].clone();
343				}
344				else {
345					// replace this cache entry with the entry from the referenced computation.
346					// Eventually, this percolates a complete (no cycle reference) cache entry
347					// to this node (or at least gets it closer and closer).  This is not
348					// crucial, but makes cache lookup faster as we might have to look up
349					// lots of cycle references before finding a complete reference.
350					end.cache[k] = re.cache[k];
351					// Return the cache entry associated with the cycle reference.
352					return (Lookahead)re.cache[k].clone();
353				}
354			}
355	
356			end.lock[k] = true;	// prevent FOLLOW computation cycles
357	
358			Lookahead p = new Lookahead();
359	
360			RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
361	
362			// Walk list of references to this rule to compute FOLLOW
363			for (int i=0; i<rs.numReferences(); i++) {
364				RuleRefElement rr = rs.getReference(i);
365				if ( DEBUG_ANALYZER ) System.out.println("next["+rule+"] is "+rr.next.toString());
366				Lookahead q = rr.next.look(k);
367				if ( DEBUG_ANALYZER ) System.out.println("FIRST of next["+rule+"] ptr is "+q.toString());
368				/* If there is a cycle then if the cycle is to the rule for
369				 * this end block, you have a cycle to yourself.  Remove the
370				 * cycle indication--the lookahead is complete.
371				 */
372				if ( q.cycle!=null && q.cycle.equals(rule) ) {
373					q.cycle = null;	// don't want cycle to yourself!
374				}
375				// add the lookahead into the current FOLLOW computation set
376				p.combineWith(q);
377				if ( DEBUG_ANALYZER ) System.out.println("combined FOLLOW["+rule+"] is "+p.toString());
378			}
379	
380			end.lock[k] = false; // we're not doing FOLLOW anymore
381	
382			// if no rules follow this, it can be a start symbol or called by a start sym.
383			// set the follow to be end of file.
384			if ( p.fset.nil() && p.cycle==null ) {
385				if ( grammar instanceof TreeWalkerGrammar ) {
386					// Tree grammars don't see EOF, they see end of sibling list or
387					// "NULL TREE LOOKAHEAD".
388					p.fset.add(Token.NULL_TREE_LOOKAHEAD);
389				}
390				else if ( grammar instanceof LexerGrammar ) {
391					// Lexical grammars use Epsilon to indicate that the end of rule has been hit
392					// EOF would be misleading; any character can follow a token rule not just EOF
393					// as in a grammar (where a start symbol is followed by EOF).  There is no
394					// sequence info in a lexer between tokens to indicate what is the last token
395					// to be seen.
396					// p.fset.add(EPSILON_TYPE);
397					p.setEpsilon();
398				}	
399				else {
400					p.fset.add(Token.EOF_TYPE);
401				}
402			}
403	
404			// Cache the result of the FOLLOW computation
405			if ( DEBUG_ANALYZER ) {
406				System.out.println("saving FOLLOW("+k+") for "+rule+": "+p.toString(",", charFormatter, grammar));
407			}
408			end.cache[k] = (Lookahead)p.clone();
409	
410			return p;
411		}
412		private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
413			Lookahead p;
414			Alternative a = blk.getAlternativeAt(alt);
415			AlternativeElement e = a.head;
416			//System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
417			if ( a.cache[k]==null ) {
418				p = e.look(k);
419				a.cache[k] = p;
420			}
421			else {
422				p = a.cache[k];
423			}
424			return p;
425		}
426		/**Actions are ignored */
427		public Lookahead look(int k, ActionElement action) {
428			if ( DEBUG_ANALYZER ) System.out.println("lookAction("+k+","+action+")");
429			return action.next.look(k);
430		}
431		/**Combine the lookahead computed for each alternative */
432		public Lookahead look(int k, AlternativeBlock blk) {
433			if ( DEBUG_ANALYZER ) System.out.println("lookAltBlk(" + k + "," + blk + ")");
434			AlternativeBlock saveCurrentBlock = currentBlock;
435			currentBlock = blk;
436			Lookahead p = new Lookahead();
437			for (int i=0; i<blk.alternatives.size(); i++) {
438				if ( DEBUG_ANALYZER ) System.out.println("alt " + i);
439				// must set analysis alt
440				currentBlock.analysisAlt = i;
441				AlternativeElement elem = blk.getAlternativeAt(i).head;
442				Lookahead q = elem.look(k);
443				p.combineWith(q);
444			}
445			if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
446				// Invert the lookahead set
447				if (lexicalAnalysis) {
448					BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
449					int[] elems = p.fset.toArray();
450					for (int j = 0; j < elems.length; j++) {
451						b.remove(elems[j]);
452					}
453					p.fset = b;
454				} else {
455					p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
456				}
457			}
458			currentBlock = saveCurrentBlock;
459			return p;
460		}
461		/**Compute what follows this place-holder node and possibly
462		 * what begins the associated loop unless the
463		 * node is locked.
464		 * <p>
465		 * if we hit the end of a loop, we have to include
466		 * what tokens can begin the loop as well.  If the start
467		 * node is locked, then we simply found an empty path
468		 * through this subrule while analyzing it.  If the
469		 * start node is not locked, then this node was hit
470		 * during a FOLLOW operation and the FIRST of this
471		 * block must be included in that lookahead computation.
472		 */
473		public Lookahead look(int k, BlockEndElement end) {
474			if ( DEBUG_ANALYZER ) System.out.println("lookBlockEnd("+k+")");
475			if ( end.lock[k] ) {
476				// computation in progress => the tokens we would have
477				// computed (had we not been locked) will be included
478				// in the set by that computation with the lock on this
479				// node.
480				return new Lookahead();
481			}
482			end.lock[k] = true;
483			Lookahead p;
484			
485			/* Hitting the end of a loop means you can see what begins the loop */
486			if ( end.block instanceof ZeroOrMoreBlock ||
487				 end.block instanceof OneOrMoreBlock ) {
488				// compute what can start the block
489				p = look(k, end.block);
490			}
491			else {
492				p = new Lookahead();
493			}
494	
495			/* Tree blocks do not have any follow because they are children
496			 * of what surrounds them.  For example, A #(B C) D results in
497			 * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
498			 * indicates that nothing can follow the last node of tree #(B C)
499			 */
500			if (end.block instanceof TreeElement) {
501				p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
502			}
503			
504			/* Syntactic predicates such as ( (A)? )=> have no follow per se.
505			 * We cannot accurately say what would be matched following a
506			 * syntactic predicate (you MIGHT be ok if you said it was whatever
507			 * followed the alternative predicted by the predicate).  Hence,
508			 * (like end-of-token) we return Epsilon to indicate "unknown
509			 * lookahead."
510			 */
511			else if ( end.block instanceof SynPredBlock ) {
512				p.setEpsilon();
513			}	
514	
515			// compute what can follow the block
516			else {
517				Lookahead q = end.block.next.look(k);
518				p.combineWith(q);
519			}
520			
521			end.lock[k] = false;
522			return p;
523		}
524		/**Return this char as the lookahead if k=1.
525		 * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
526		 * <p>
527		 * If the atom has the <tt>not</tt> flag on, then
528		 * create the set complement of the tokenType
529		 * which is the set of all characters referenced
530		 * in the grammar with this char turned off.
531		 * Also remove characters from the set that
532		 * are currently allocated for predicting
533		 * previous alternatives.  This avoids ambiguity
534		 * messages and is more properly what is meant.
535		 * ( 'a' | ~'a' ) implies that the ~'a' is the
536		 * "else" clause.
537		 * <p>
538		 * NOTE: we do <b>NOT</b> include exit path in
539		 * the exclusion set. E.g.,
540		 * ( 'a' | ~'a' )* 'b'
541		 * should exit upon seeing a 'b' during the loop.
542		 */
543		public Lookahead look(int k, CharLiteralElement atom) {
544			if ( DEBUG_ANALYZER ) System.out.println("lookCharLiteral("+k+","+atom+")");
545			// Skip until analysis hits k==1 
546			if ( k>1 ) {
547				return atom.next.look(k-1);
548			}
549			if ( lexicalAnalysis) {
550				if (atom.not) {
551					BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
552					if ( DEBUG_ANALYZER ) System.out.println("charVocab is "+b.toString());
553					// remove stuff predicted by preceding alts and follow of block
554					removeCompetingPredictionSets(b, atom);
555					if ( DEBUG_ANALYZER ) System.out.println("charVocab after removal of prior alt lookahead "+b.toString());
556					// now remove element that is stated not to be in the set
557					b.clear(atom.tokenType);
558					return new Lookahead(b);
559				} else {
560					return Lookahead.of(atom.tokenType);
561				}
562			}
563			else {
564				// Should have been avoided by MakeGrammar
565				tool.panic("Character literal reference found in parser");
566				// ... so we make the compiler happy
567				return Lookahead.of(atom.tokenType);
568			}
569		}
570		public Lookahead look(int k, CharRangeElement r) {
571			if ( DEBUG_ANALYZER ) System.out.println("lookCharRange("+k+","+r+")");
572			// Skip until analysis hits k==1 
573			if ( k>1 ) {
574				return r.next.look(k-1);
575			}
576			BitSet p = BitSet.of(r.begin);
577			for (int i=r.begin+1; i<=r.end; i++) {
578				p.add(i);
579			}
580			return new Lookahead(p);
581		}
582		public Lookahead look(int k, GrammarAtom atom) {
583			if ( DEBUG_ANALYZER ) System.out.println("look("+k+","+atom+"["+atom.tokenType+"])");
584			
585			if ( lexicalAnalysis ) {
586				// MakeGrammar should have created a rule reference instead
587				tool.panic("token reference found in lexer");
588			}
589			// Skip until analysis hits k==1 
590			if ( k>1 ) {
591				return atom.next.look(k-1);
592			}
593			Lookahead l = Lookahead.of(atom.tokenType);
594			if (atom.not) {
595				// Invert the lookahead set against the token vocabulary
596				int maxToken = grammar.tokenManager.maxTokenType();
597				l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
598				// remove stuff predicted by preceding alts and follow of block
599				removeCompetingPredictionSets(l.fset, atom);
600			}
601			return l;
602		}
603		/**The lookahead of a (...)+ block is the combined lookahead of
604		 * all alternatives and, if an empty path is found, the lookahead
605		 * of what follows the block.
606		 */
607		public Lookahead look(int k, OneOrMoreBlock blk) {
608			if ( DEBUG_ANALYZER ) System.out.println("look+"+k+","+blk+")");
609			Lookahead p = look(k, (AlternativeBlock)blk);
610			return p;
611		}
612		/**Combine the lookahead computed for each alternative.
613		 * Lock the node so that no other computation may come back
614		 * on itself--infinite loop.  This also implies infinite left-recursion
615		 * in the grammar (or an error in this algorithm ;)).
616		 */
617		public Lookahead look(int k, RuleBlock blk) {
618			if ( DEBUG_ANALYZER ) System.out.println("lookRuleBlk("+k+","+blk+")");
619			Lookahead p = look(k, (AlternativeBlock)blk);
620			return p;
621		}
622		/**If not locked or noFOLLOW set, compute FOLLOW of a rule.
623		 * <p>
624		 * Lexical rules never compute follow.  They set epsilon and
625		 * the code generator gens code to check for any character.
626		 * The code generator must remove the tokens used to predict
627		 * any previous alts in the same block.
628		 * <p>
629		 * When the last node of a rule is reached and noFOLLOW,
630		 * it implies that a "local" FOLLOW will be computed
631		 * after this call.  I.e.,
632		 * <pre>
633		 *		a : b A;
634		 *		b : B | ;
635		 *		c : b C;
636		 * </pre>
637		 * Here, when computing the look of rule b from rule a,
638		 * we want only {B,EPSILON_TYPE} so that look(b A) will
639		 * be {B,A} not {B,A,C}.
640		 * <p>
641		 * if the end block is not locked and the FOLLOW is
642		 * wanted, the algorithm must compute the lookahead
643		 * of what follows references to this rule.  If
644		 * end block is locked, FOLLOW will return an empty set
645		 * with a cycle to the rule associated with this end block.
646		 */
647		public Lookahead look(int k, RuleEndElement end) {
648			if ( DEBUG_ANALYZER )
649				System.out.println("lookRuleBlockEnd("+k+"); noFOLLOW="+end.noFOLLOW+"; lock is "+end.lock[k]);
650			if ( /*lexicalAnalysis ||*/ end.noFOLLOW ) {
651				Lookahead p = new Lookahead();
652				p.setEpsilon();
653				p.epsilonDepth = BitSet.of(k);
654				return p;
655			}
656			Lookahead p = FOLLOW(k,end);
657			return p;
658		}
659		/**Compute the lookahead contributed by a rule reference.
660		 *
661		 * <p>
662		 * When computing ruleref lookahead, we don't want the FOLLOW
663		 * computation done if an empty path exists for the rule.
664		 * The FOLLOW is too loose of a set...we want only to
665		 * include the "local" FOLLOW or what can follow this
666		 * particular ref to the node.  In other words, we use
667		 * context information to reduce the complexity of the
668		 * analysis and strengthen the parser.
669		 *
670		 * The noFOLLOW flag is used as a means of restricting
671		 * the FOLLOW to a "local" FOLLOW.  This variable is
672		 * orthogonal to the <tt>lock</tt> variable that prevents
673		 * infinite recursion.  noFOLLOW does not care about what k is.
674		 */
675		public Lookahead look(int k, RuleRefElement rr) {
676			if ( DEBUG_ANALYZER ) System.out.println("lookRuleRef("+k+","+rr+")");
677			RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
678			if ( rs==null || !rs.defined ) {
679				tool.error("no definition of rule "+rr.targetRule,rr.getLine());
680				return new Lookahead();
681			}
682			RuleBlock rb = rs.getBlock();
683			RuleEndElement end = rb.endNode;
684			boolean saveEnd = end.noFOLLOW;
685			end.noFOLLOW = true;
686			// go off to the rule and get the lookahead (w/o FOLLOW)
687			Lookahead p = look(k, rr.targetRule);
688			if ( DEBUG_ANALYZER ) System.out.println("back from rule ref to "+rr.targetRule);
689			// restore state of end block
690			end.noFOLLOW = saveEnd;
691			
692			// check for infinite recursion.  If a cycle is returned: trouble!
693			if ( p.cycle!=null ) {
694				tool.error("infinite recursion to rule "+p.cycle+" from rule "+
695					rr.enclosingRuleName, rr.getLine());
696			}
697	
698			// is the local FOLLOW required?
699			if ( p.containsEpsilon() ) {
700				if ( DEBUG_ANALYZER )
701					System.out.println("rule ref to "+
702						rr.targetRule+" has eps, depth: "+p.epsilonDepth);
703		
704				// remove epsilon
705				p.resetEpsilon();
706				// fset.clear(EPSILON_TYPE);
707				
708				// for each lookahead depth that saw epsilon
709				int[] depths = p.epsilonDepth.toArray();
710				p.epsilonDepth = null;		// clear all epsilon stuff
711				for (int i=0; i<depths.length; i++) {
712					int rk = k - (k-depths[i]);
713					Lookahead q = rr.next.look(rk);	// see comments in Lookahead
714					p.combineWith(q);
715				}
716				// note: any of these look() computations for local follow can
717				// set EPSILON in the set again if the end of this rule is found.
718			}
719	
720			return p;
721		}
722		public Lookahead look(int k, StringLiteralElement atom) {
723			if ( DEBUG_ANALYZER ) System.out.println("lookStringLiteral("+k+","+atom+")");
724			if ( lexicalAnalysis ) {
725				// need more lookahead than string can provide?
726				if ( k > atom.processedAtomText.length() ) {
727					return atom.next.look(k - atom.processedAtomText.length());
728				}
729				else {
730					// get char at lookahead depth k, from the processed literal text
731					return Lookahead.of(atom.processedAtomText.charAt(k-1));
732				}
733			}
734			else {
735				// Skip until analysis hits k==1 
736				if ( k>1 ) {
737					return atom.next.look(k-1);
738				}
739				Lookahead l = Lookahead.of(atom.tokenType);
740				if (atom.not) {
741					// Invert the lookahead set against the token vocabulary
742					int maxToken = grammar.tokenManager.maxTokenType();
743					l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
744				}
745				return l;
746			}
747		}
748		/**The lookahead of a (...)=> block is the lookahead of
749		 * what follows the block.  By definition, the syntactic
750		 * predicate block defies static analysis (you want to try it
751		 * out at run-time).  The LOOK of (a)=>A B is A for LL(1)
752		 * ### is this even called?
753		 */
754		public Lookahead look(int k, SynPredBlock blk) {
755			if ( DEBUG_ANALYZER ) System.out.println("look=>("+k+","+blk+")");
756			return blk.next.look(k);
757		}
758		public Lookahead look(int k, TokenRangeElement r) {
759			if ( DEBUG_ANALYZER ) System.out.println("lookTokenRange("+k+","+r+")");
760			// Skip until analysis hits k==1 
761			if ( k>1 ) {
762				return r.next.look(k-1);
763			}
764			BitSet p = BitSet.of(r.begin);
765			for (int i=r.begin+1; i<=r.end; i++) {
766				p.add(i);
767			}
768			return new Lookahead(p);
769		}
770		public Lookahead look(int k, TreeElement t) {
771			if ( DEBUG_ANALYZER ) System.out.println("look("+k+","+t.root+"["+t.root.tokenType+"])");
772			if ( k>1 ) {
773				return t.next.look(k-1);
774			}
775			Lookahead l = Lookahead.of(t.root.tokenType);
776			if (t.root.not) {
777				// Invert the lookahead set against the token vocabulary
778				int maxToken = grammar.tokenManager.maxTokenType();
779				l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
780			}
781			return l;
782		}
783		public Lookahead look(int k, WildcardElement wc) {
784			if ( DEBUG_ANALYZER ) System.out.println("look(" + k + "," + wc + ")");
785			
786			// Skip until analysis hits k==1 
787			if ( k>1 ) {
788				return wc.next.look(k-1);
789			}
790	
791			BitSet b;
792			if ( lexicalAnalysis ) {
793				// Copy the character vocabulary
794				b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
795			}
796			else {
797				b = new BitSet(1);
798				// Invert the lookahead set against the token vocabulary
799				int maxToken = grammar.tokenManager.maxTokenType();
800				b.notInPlace(Token.MIN_USER_TYPE, maxToken);
801			}
802	
803			// Remove prediction sets from competing alternatives
804			removeCompetingPredictionSets(b, wc);
805	
806			return new Lookahead(b);
807		}
808		/** The (...)* element is the combined lookahead of the alternatives and what can
809		 *  follow the loop.
810		 */
811		public Lookahead look(int k, ZeroOrMoreBlock blk) {
812			if ( DEBUG_ANALYZER ) System.out.println("look*("+k+","+blk+")");
813			Lookahead p = look(k, (AlternativeBlock)blk);
814			Lookahead q = blk.next.look(k);
815			p.combineWith(q);
816			return p;
817		}
818		/**Compute the combined lookahead for all productions of a rule.
819		 * If the lookahead returns with epsilon, at least one epsilon
820		 * path exists (one that consumes no tokens).  The noFOLLOW
821		 * flag being set for this endruleblk, indicates that the
822		 * a rule ref invoked this rule.
823		 *
824		 * Currently only look(RuleRef) calls this.  There is no need
825		 * for the code generator to call this.
826		 */
827		public Lookahead look(int k, String rule) {
828			if ( DEBUG_ANALYZER ) System.out.println("lookRuleName("+k+","+rule+")");
829			RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
830			RuleBlock rb = rs.getBlock();
831			
832			if ( rb.lock[k] ) {
833				if ( DEBUG_ANALYZER )
834					System.out.println("infinite recursion to rule "+rb.getRuleName());
835				return new Lookahead(rule);
836			}
837	
838			// have we computed it before?
839			if ( rb.cache[k]!=null ) {
840				if ( DEBUG_ANALYZER ) {
841					System.out.println("found depth "+k+" result in FIRST "+rule+" cache: "+
842						rb.cache[k].toString(",", charFormatter, grammar));
843				}
844				return (Lookahead)rb.cache[k].clone();
845			}
846	
847			rb.lock[k] = true;
848			Lookahead p = look(k, (RuleBlock)rb);
849			rb.lock[k] = false;
850	
851			// cache results
852			rb.cache[k] = (Lookahead)p.clone();
853			if ( DEBUG_ANALYZER ) {
854				System.out.println("saving depth "+k+" result in FIRST "+rule+" cache: "+
855					rb.cache[k].toString(",", charFormatter, grammar));
856			}
857			return p;
858		}
859		/** Remove the prediction sets from preceding alternatives
860		 * and follow set, but *only* if this element is the first element 
861		 * of the alternative.  The class members currenBlock and
862		 * currentBlock.analysisAlt must be set correctly.
863		 * @param b The prediction bitset to be modified
864		 * @el The element of interest
865		 */
866		private void removeCompetingPredictionSets(BitSet b, AlternativeElement el)
867		{
868			// Only do this if the element is the first element of the alt, 
869			// because we are making an implicit assumption that k==1.
870			if (el == currentBlock.getAlternativeAt(currentBlock.analysisAlt).head) {
871				for (int i=0; i<currentBlock.analysisAlt; i++) {
872					AlternativeElement e = currentBlock.getAlternativeAt(i).head;
873					b.subtractInPlace( e.look(1).fset );
874				}
875			}
876		}
877		/** reset the analyzer so it looks like a new one */
878		private void reset() {
879			grammar = null;
880			DEBUG_ANALYZER = false;
881			currentBlock = null;
882			lexicalAnalysis = false;
883		}
884		/** Set the grammar for the analyzer */
885		public void setGrammar(Grammar g) {
886			if (grammar != null) {
887				reset();
888			}
889			grammar = g;
890	
891			// Is this lexical?
892			lexicalAnalysis = (grammar instanceof LexerGrammar);
893			DEBUG_ANALYZER = grammar.analyzerDebug;
894		}
895		public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer)
896		{
897			if (
898				blk instanceof ZeroOrMoreBlock ||
899				blk instanceof OneOrMoreBlock ||
900				blk instanceof SynPredBlock
901			) {
902				return false;
903			}
904			// Cannot invert an empty subrule
905			if (blk.alternatives.size() == 0) {
906				return false;
907			}
908			// The block must only contain alternatives with a single element,
909			// where each element is a char, token, char range, or token range.
910			for (int i = 0; i < blk.alternatives.size(); i++) {
911				Alternative alt = blk.getAlternativeAt(i);
912				// Cannot have anything interesting in the alternative ...
913				if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
914					return false;
915				}
916				// ... and there must be one simple element
917				AlternativeElement elt = alt.head;
918				if (
919					!(
920						elt instanceof CharLiteralElement ||
921						elt instanceof TokenRefElement ||
922						elt instanceof CharRangeElement ||
923						elt instanceof TokenRangeElement ||
924						(elt instanceof StringLiteralElement && !forLexer)
925					) ||
926					!(elt.next instanceof BlockEndElement) ||
927					elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
928				) {
929					return false;
930				}
931			}
932			return true;
933		}
934	}
935