1	package antlr;
2	
3	/**
4	 * <b>SOFTWARE RIGHTS</b>
5	 * <p>
6	 * ANTLR 2.5.0 MageLang Institute, 1998
7	 * <p>
8	 * We reserve no legal rights to the ANTLR--it is fully in the
9	 * public domain. An individual or company may do whatever
10	 * they wish with source code distributed with ANTLR or the
11	 * code generated by ANTLR, including the incorporation of
12	 * ANTLR, or its output, into commerical software.
13	 * <p>
14	 * We encourage users to develop software with ANTLR. However,
15	 * we do ask that credit is given to us for developing
16	 * ANTLR. By "credit", we mean that if you use ANTLR or
17	 * incorporate any source code into one of your programs
18	 * (commercial product, research project, or otherwise) that
19	 * you acknowledge this fact somewhere in the documentation,
20	 * research report, etc... If you like ANTLR and have
21	 * developed a nice tool with the output, please mention that
22	 * you developed it using ANTLR. In addition, we ask that the
23	 * headers remain intact in our source code. As long as these
24	 * guidelines are kept, we expect to continue enhancing this
25	 * system and expect to make other tools available as they are
26	 * completed.
27	 * <p>
28	 * The ANTLR gang:
29	 * @version ANTLR 2.5.0 MageLang Institute, 1998
30	 * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
31	 * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
32	 */
33	import java.io.PrintWriter; // SAS: for proper text i/o
34	import java.io.IOException;
35	import java.io.FileWriter;  // SAS: for proper text i/o
36	import antlr.collections.impl.Vector;
37	import antlr.collections.impl.BitSet;
38	
39	/**A generic ANTLR code generator.  All code generators
40	 * Derive from this class.
41	 *
42	 * <p>
43	 * A CodeGenerator knows about a Grammar data structure and
44	 * a grammar analyzer.  The Grammar is walked to generate the
45	 * appropriate code for both a parser and lexer (if present).
46	 * This interface may change slightly so that the lexer is
47	 * itself living inside of a Grammar object (in which case,
48	 * this class generates only one recognizer).  The main method
49	 * to call is <tt>gen()</tt>, which initiates all code gen.
50	 *
51	 * <p>
52	 * The interaction of the code generator with the analyzer is
53	 * simple: each subrule block calls deterministic() before generating
54	 * code for the block.  Method deterministic() sets lookahead caches
55	 * in each Alternative object.  Technically, a code generator
56	 * doesn't need the grammar analyzer if all lookahead analysis
57	 * is done at runtime, but this would result in a slower parser.
58	 *
59	 * <p>
60	 * This class provides a set of support utilities to handle argument
61	 * list parsing and so on.
62	 *
63	 * @author  Terence Parr, John Lilley
64	 * @version 2.00a
65	 * @see     antlr.JavaCodeGenerator
66	 * @see     antlr.DiagnosticCodeGenerator
67	 * @see     antlr.LLkAnalyzer
68	 * @see     antlr.Grammar
69	 * @see     antlr.AlternativeElement
70	 * @see     antlr.Lookahead
71	 */
72	public abstract class CodeGenerator {
73		/** Current tab indentation for code output */
74		protected int tabs=0;
75		/** Current output Stream */
76		transient protected PrintWriter currentOutput; // SAS: for proper text i/o
77		/** The grammar for which we generate code */
78		protected Grammar grammar = null;
79		/** List of all bitsets that must be dumped.  These are Vectors of BitSet. */
80		protected Vector bitsetsUsed;
81		/** The antlr Tool */
82		protected Tool tool;
83		/** The grammar behavior */
84		protected DefineGrammarSymbols behavior;
85		/** The LLk analyzer */
86		protected LLkGrammarAnalyzer analyzer;
87		/** Object used to format characters in the target language.
88		  * subclass must initialize this to the language-specific formatter
89		  */
90		protected CharFormatter charFormatter;
91	
92		/** Use option "codeGenDebug" to generate debugging output */
93		protected boolean DEBUG_CODE_GENERATOR = false;
94	
95		/** Default values for code-generation thresholds */
96		protected static final int DEFAULT_MAKE_SWITCH_THRESHOLD = 2;
97		protected static final int DEFAULT_BITSET_TEST_THRESHOLD = 4;
98	
99		/** This is a hint for the language-specific code generator.
100		 * A switch() or language-specific equivalent will be generated instead
101		 * of a series of if/else statements for blocks with number of alternates
102		 * greater than or equal to this number of non-predicated LL(1) alternates.
103		 * This is modified by the grammar option "codeGenMakeSwitchThreshold"
104		 */
105		protected int makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
106	
107		/** This is a hint for the language-specific code generator.
108		 * A bitset membership test will be generated instead of an
109		 * ORed series of LA(k) comparisions for lookahead sets with
110		 * degree greater than or equal to this value.
111		 * This is modified by the grammar option "codeGenBitsetTestThreshold"
112		 */
113		protected int bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
114		
115		private static boolean OLD_ACTION_TRANSLATOR = true;
116	
117	
118		/** Construct code generator base class */
119		public CodeGenerator() {}
120		/** Output a String to the currentOutput stream.
121		 * Ignored if string is null.
122		 * @param s The string to output
123		 */
124		protected void _print(String s) {
125			if (s != null) {
126				currentOutput.print(s);
127			}
128		}
129		/** Print an action without leading tabs, attempting to
130		 * preserve the current indentation level for multi-line actions
131		 * Ignored if string is null.
132		 * @param s The action string to output
133		 */
134		protected void _printAction(String s) {
135			if (s == null) {
136				return;
137			}
138	
139			// Skip leading newlines, tabs and spaces
140			int start = 0;
141			while (start < s.length() && Character.isSpaceChar(s.charAt(start)) )
142			{
143				start++;
144			}
145	
146			// Skip leading newlines, tabs and spaces
147			int end = s.length()-1;
148			while ( end > start && Character.isSpaceChar(s.charAt(end)) ) 
149			{
150				end--;
151			}
152	
153			char c=0;
154			for (int i = start; i <= end;)
155			{
156				c = s.charAt(i);
157				i++;
158				boolean newline = false;
159				switch (c)
160				{
161				case '\n':
162					newline=true;
163					break;
164				case '\r':
165					if ( i<=end && s.charAt(i)=='\n' ) {
166						i++;
167					}
168					newline=true;
169					break;
170				default: 
171					currentOutput.print(c); 
172					break;
173				}
174				if ( newline ) {
175					currentOutput.println(); 
176					printTabs();
177					// Absorb leading whitespace
178					while (i <= end && Character.isSpaceChar(s.charAt(i)) ) {
179						i++;
180					}
181					newline=false;
182				}
183			}
184			currentOutput.println();
185		}
186		/** Output a String followed by newline, to the currentOutput stream.
187		 * Ignored if string is null.
188		 * @param s The string to output
189		 */
190		protected void _println(String s) {
191			if (s != null) {
192				currentOutput.println(s);
193			}
194		}
195		/** Test if a set element array represents a contiguous range.
196		 * @param elems The array of elements representing the set, usually from BitSet.toArray().
197		 * @return true if the elements are a contiguous range (with two or more).
198		 */
199		public static boolean elementsAreRange(int[] elems) {
200			if (elems.length==0) {
201				return false;
202			}
203			int begin = elems[0];
204			int end = elems[elems.length-1];
205			if ( elems.length<=2 ) {
206				// Not enough elements for a range expression
207				return false;
208			}
209			if ( end-begin+1 > elems.length ) {
210				// The set does not represent a contiguous range
211				return false;
212			}
213			int v = begin+1;
214			for (int i=1; i<elems.length-1; i++) {
215				if ( v != elems[i] ) {
216					// The set does not represent a contiguous range
217					return false;
218				}
219				v++;
220			}
221			return true;
222		}
223		/** Get the identifier portion of an argument-action token.
224		 * The ID of an action is assumed to be a trailing identifier.
225		 * Specific code-generators may want to override this
226		 * if the language has unusual declaration syntax.
227		 * @param t The action token
228		 * @return A string containing the text of the identifier
229		 */
230		protected String extractIdOfAction(Token t) {
231			return extractIdOfAction(t.getText(), t.getLine());
232		}
233		/** Get the identifier portion of an argument-action.
234		 * The ID of an action is assumed to be a trailing identifier.
235		 * Specific code-generators may want to override this
236		 * if the language has unusual declaration syntax.
237		 * @param s The action text
238		 * @param line Line used for error reporting.
239		 * @return A string containing the text of the identifier
240		 */
241		protected String extractIdOfAction(String s, int line) {
242			// Search back from the end for a non alphanumeric.  That marks the
243			// beginning of the identifier
244			for (int i = s.length()-2; i >=0; i--)
245			{
246				// TODO: make this work for language-independent identifiers?
247				if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
248				{
249					// Found end of type part
250					return s.substring(i+1);
251				}
252			}
253			// Something is bogus, but we cannot parse the language-specific
254			// actions any better.  The compiler will have to catch the problem.
255			tool.warning("Ill-formed action", line);
256			return "";
257		}
258		/** Get the type string out of an argument-action token.
259		 * The type of an action is assumed to precede a trailing identifier
260		 * Specific code-generators may want to override this
261		 * if the language has unusual declaration syntax.
262		 * @param t The action token
263		 * @return A string containing the text of the type
264		 */
265		protected String extractTypeOfAction(Token t) {
266			return extractTypeOfAction(t.getText(), t.getLine());
267		}
268		/** Get the type portion of an argument-action.
269		 * The type of an action is assumed to precede a trailing identifier
270		 * Specific code-generators may want to override this
271		 * if the language has unusual declaration syntax.
272		 * @param s The action text
273		 * @param line Line used for error reporting.
274		 * @return A string containing the text of the type
275		 */
276		protected String extractTypeOfAction(String s, int line) {
277			// Search back from the end for a non alphanumeric.  That marks the
278			// beginning of the identifier
279			for (int i = s.length()-2; i >=0; i--)
280			{
281				// TODO: make this work for language-independent identifiers?
282				if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
283				{
284					// Found end of type part
285					return s.substring(0,i+1);
286				}
287			}
288			// Something is bogus, but we cannot parse the language-specific
289			// actions any better.  The compiler will have to catch the problem.
290			tool.warning("Ill-formed action", line);
291			return "";
292		}
293		/** Generate the code for all grammars
294		 */
295		public abstract void gen();
296		/** Generate code for the given grammar element.
297		 * @param action The {...} action to generate
298		 */
299		public abstract void gen(ActionElement action);
300		/** Generate code for the given grammar element.
301		 * @param blk The "x|y|z|..." block to generate
302		 */
303		public abstract void gen(AlternativeBlock blk);
304		/** Generate code for the given grammar element.
305		 * @param end The block-end element to generate.  Block-end
306		 * elements are synthesized by the grammar parser to represent
307		 * the end of a block.
308		 */
309		public abstract void gen(BlockEndElement end);
310		/** Generate code for the given grammar element.
311		 * @param atom The character literal reference to generate
312		 */
313		public abstract void gen(CharLiteralElement atom);
314		/** Generate code for the given grammar element.
315		 * @param r The character-range reference to generate
316		 */
317		public abstract void gen(CharRangeElement r);
318		/** Generate the code for a parser */
319		public abstract void gen(LexerGrammar g) throws IOException;
320		/** Generate code for the given grammar element.
321		 * @param blk The (...)+ block to generate
322		 */
323		public abstract void gen(OneOrMoreBlock blk);
324		/** Generate the code for a parser */
325		public abstract void gen(ParserGrammar g) throws IOException;
326		/** Generate code for the given grammar element.
327		 * @param rr The rule-reference to generate
328		 */
329		public abstract void gen(RuleRefElement rr);
330		/** Generate code for the given grammar element.
331		 * @param atom The string-literal reference to generate
332		 */
333		public abstract void gen(StringLiteralElement atom);
334		/** Generate code for the given grammar element.
335		 * @param r The token-range reference to generate
336		 */
337		public abstract void gen(TokenRangeElement r);
338		/** Generate code for the given grammar element.
339		 * @param atom The token-reference to generate
340		 */
341		public abstract void gen(TokenRefElement atom);
342		/** Generate code for the given grammar element.
343		 * @param blk The tree to generate code for.
344		 */
345		public abstract void gen(TreeElement t);
346		/** Generate the code for a parser */
347		public abstract void gen(TreeWalkerGrammar g) throws IOException;
348		/** Generate code for the given grammar element.
349		 * @param wc The wildcard element to generate
350		 */
351		public abstract void gen(WildcardElement wc);
352		/** Generate code for the given grammar element.
353		 * @param blk The (...)* block to generate
354		 */
355		public abstract void gen(ZeroOrMoreBlock blk);
356		//
357		// Support routines, can be used by all code-generators
358		//
359	
360		/** Generate the token types as a text file for persistence across shared lexer/parser */
361		protected void genTokenInterchange(TokenManager tm) throws IOException {
362			// Open the token output Java file and set the currentOutput stream
363			currentOutput = antlr.Tool.openOutputFile(tm.getName() + "TokenTypes.txt");
364			//SAS: changed for proper text file io
365	
366			tabs = 0;
367	
368			// Header
369			println(tm.getName() + "    // tokenVocabulary name");
370	
371			// Generate a definition for each token type
372			Vector v = tm.getVocabulary();
373			for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
374				String s = (String)v.elementAt(i);
375				if ( DEBUG_CODE_GENERATOR ) {
376					System.out.println("gen persistence file entry for: "+s);
377				}	
378				if (s != null && !s.startsWith("<") ) {
379					// if literal, find label
380					if ( s.startsWith("\"") ) {
381						StringLiteralSymbol sl = (StringLiteralSymbol)grammar.tokenManager.getTokenSymbol(s);
382						if ( sl!=null && sl.label != null ) {
383							print(sl.label+"=");
384						}	
385						println(s + "=" + i);
386					}
387					else {
388						print(s);
389						// check for a paraphrase
390						TokenSymbol ts = (TokenSymbol)grammar.tokenManager.getTokenSymbol(s);
391						if ( ts.getParaphrase()!=null ) {
392							print("("+ts.getParaphrase()+")");
393						}	
394						println("=" + i);
395					}		
396				}
397			}
398	
399			// Close the tokens output file
400			currentOutput.close();
401			currentOutput = null;
402		}
403		/** Get a string for an expression to generate creation of an AST subtree.
404		  * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
405		  */
406		public abstract String getASTCreateString(Vector v);
407		/** Get a string for an expression to generate creating of an AST node
408		  * @param str The text of the arguments to the AST construction
409		  */
410		public abstract String getASTCreateString(String str);
411		/** Given the index of a bitset in the bitset list, generate a unique name.
412		 * Specific code-generators may want to override this
413		 * if the language does not allow '_' or numerals in identifiers.
414		 * @param index  The index of the bitset in the bitset list.
415		 */
416		protected String getBitsetName(int index)
417		{
418			return "_tokenSet_" + index;
419		}
420		public static String lexerRuleName(String id) {
421			return "m"+id;
422		}
423		/** Map an identifier to it's corresponding tree-node variable.
424		  * This is context-sensitive, depending on the rule and alternative
425		  * being generated
426		  * @param id The identifier name to map
427		  * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned.
428		  * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
429		  */
430		public abstract String mapTreeId(String id, ActionTransInfo tInfo);
431		/** Add a bitset to the list of bitsets to be generated.
432		 * if the bitset is already in the list, ignore the request.
433		 * Always adds the bitset to the end of the list, so the
434		 * caller can rely on the position of bitsets in the list.
435		 * The returned position can be used to format the bitset 
436		 * name, since it is invariant.
437		 * @param p Bit set to mark for code generation
438		 * @param forParser true if the bitset is used for the parser, false for the lexer
439		 * @return The position of the bitset in the list.
440		 */
441		protected int markBitsetForGen(BitSet p) {
442			// Is the bitset (or an identical one) already marked for gen?
443			for (int i = 0; i < bitsetsUsed.size(); i++)
444			{
445				BitSet set = (BitSet)bitsetsUsed.elementAt(i);
446				if (p.equals(set))
447				{
448					// Use the identical one already stored
449					return i;
450				}
451			}
452	
453			// Add the new bitset
454			bitsetsUsed.appendElement(p.clone());
455			return bitsetsUsed.size()-1;
456		}
457		/** Output tab indent followed by a String, to the currentOutput stream.
458		 * Ignored if string is null.
459		 * @param s The string to output.  
460		 */
461		protected void print(String s) {
462			if (s != null) {
463				printTabs();
464				currentOutput.print(s);
465			}
466		}
467		/** Print an action with leading tabs, attempting to
468		 * preserve the current indentation level for multi-line actions
469		 * Ignored if string is null.
470		 * @param s The action string to output
471		 */
472		protected void printAction(String s) { 
473			if (s != null) {
474				printTabs();
475				_printAction(s);
476			}
477		}
478		/** Output tab indent followed by a String followed by newline,
479		 * to the currentOutput stream.  Ignored if string is null.
480		 * @param s The string to output
481		 */
482		protected void println(String s) {
483			if (s != null) {
484				printTabs();
485				currentOutput.println(s);
486			}
487		}
488		/** Output the current tab indentation.  This outputs the number of tabs 
489		 * indicated by the "tabs" variable to the currentOutput stream.
490		 */
491		protected void printTabs() {
492			for (int i=1; i<=tabs; i++) {
493				currentOutput.print("\t");
494			}
495		}
496		/** Lexically process tree-specifiers in the action.
497		 *  This will replace #id and #(...) with the appropriate
498		 *  function calls and/or variables.
499		 */
500		protected String processActionForTreeSpecifiers(String actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo) {
501			if ( actionStr==null ) return null;
502			// The action trans info tells us (at the moment) whether an
503			// assignment was done to the rule's tree root.
504			if (grammar==null) return actionStr;
505			if ( (grammar.buildAST && actionStr.indexOf('#') != -1) ||
506			     (grammar instanceof LexerGrammar && actionStr.indexOf('$') != -1) ) {
507				// Create a lexer to read an action and return the translated version
508				ActionLexerns.ActionLexer lexer = new antlr.actions.ActionLexer(actionStr, currentRule, this, tInfo);
509				lexer.setLineOffset(line);
510				lexer.setTool(tool);
511				try {
512					lexer.mACTION(true);
513					actionStr = lexer.getTokenObject().getText();
514					// System.out.println("action translated: "+actionStr);
515					// System.out.println("trans info is "+tInfo);
516				}
517				catch (ScannerException ex) {
518					lexer.reportError(ex);
519					return actionStr;
520				}
521				catch (IOException io) {
522					antlr.Tool.panic("IO error reading action:"+actionStr);
523					return actionStr;
524				}
525			}
526			return actionStr;
527		}
528		/** Set all fields back like one just created */
529		private void reset() {
530			tabs = 0;
531			// Allocate list of bitsets tagged for code generation
532			bitsetsUsed = new Vector();
533			currentOutput = null;
534			grammar = null;
535			DEBUG_CODE_GENERATOR = false;
536			makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
537			bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
538		}
539		public static String reverseLexerRuleName(String id) {
540			return id.substring(1,id.length());
541		}
542		public void setAnalyzer(LLkGrammarAnalyzer analyzer_) {
543			analyzer = analyzer_;
544		}
545		public void setBehavior(DefineGrammarSymbols behavior_) { 
546			behavior = behavior_;
547		}
548		/** Set a grammar for the code generator to use */
549		protected void setGrammar(Grammar g) {
550			reset();
551			grammar = g;
552			// Lookup make-switch threshold in the grammar generic options
553			if (grammar.hasOption("codeGenMakeSwitchThreshold")) {
554				try {
555					makeSwitchThreshold = grammar.getIntegerOption("codeGenMakeSwitchThreshold");
556					//System.out.println("setting codeGenMakeSwitchThreshold to " + makeSwitchThreshold);
557				} catch (NumberFormatException e) {
558					tool.error(
559						"option 'codeGenMakeSwitchThreshold' must be an integer",
560						grammar.getOption("codeGenMakeSwitchThreshold").getLine()
561					);
562				}
563			}
564			
565			// Lookup bitset-test threshold in the grammar generic options
566			if (grammar.hasOption("codeGenBitsetTestThreshold")) {
567				try {
568					bitsetTestThreshold = grammar.getIntegerOption("codeGenBitsetTestThreshold");
569					//System.out.println("setting codeGenBitsetTestThreshold to " + bitsetTestThreshold);
570				} catch (NumberFormatException e) {
571					tool.error(
572						"option 'codeGenBitsetTestThreshold' must be an integer",
573						grammar.getOption("codeGenBitsetTestThreshold").getLine()
574					);
575				}
576			}
577			
578			// Lookup debug code-gen in the grammar generic options
579			if (grammar.hasOption("codeGenDebug")) {
580				Token t = grammar.getOption("codeGenDebug");
581				if (t.getText().equals("true")) {
582					//System.out.println("setting code-generation debug ON");
583					DEBUG_CODE_GENERATOR = true;
584				}
585				else if (t.getText().equals("false")) {
586					//System.out.println("setting code-generation debug OFF");
587					DEBUG_CODE_GENERATOR = false;
588				}
589				else {
590					tool.error("option 'codeGenDebug' must be true or false", t.getLine());
591				}
592			}
593		}
594		public void setTool(Tool tool_) {
595			tool = tool_;
596		}
597	}
598