1 package antlr;
2
3 * <b>SOFTWARE RIGHTS</b>
5 * <p>
6 * ANTLR 2.5.0 MageLang Institute, 1998
7 * <p>
8 * We reserve no legal rights to the ANTLR--it is fully in the
9 * public domain. An individual or company may do whatever
10 * they wish with source code distributed with ANTLR or the
11 * code generated by ANTLR, including the incorporation of
12 * ANTLR, or its output, into commerical software.
13 * <p>
14 * We encourage users to develop software with ANTLR. However,
15 * we do ask that credit is given to us for developing
16 * ANTLR. By "credit", we mean that if you use ANTLR or
17 * incorporate any source code into one of your programs
18 * (commercial product, research project, or otherwise) that
19 * you acknowledge this fact somewhere in the documentation,
20 * research report, etc... If you like ANTLR and have
21 * developed a nice tool with the output, please mention that
22 * you developed it using ANTLR. In addition, we ask that the
23 * headers remain intact in our source code. As long as these
24 * guidelines are kept, we expect to continue enhancing this
25 * system and expect to make other tools available as they are
26 * completed.
27 * <p>
28 * The ANTLR gang:
29 * @version ANTLR 2.5.0 MageLang Institute, 1998
30 * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
31 * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
32 */
33 import java.io.PrintWriter; import java.io.IOException;
35 import java.io.FileWriter; import antlr.collections.impl.Vector;
37 import antlr.collections.impl.BitSet;
38
39 * Derive from this class.
41 *
42 * <p>
43 * A CodeGenerator knows about a Grammar data structure and
44 * a grammar analyzer. The Grammar is walked to generate the
45 * appropriate code for both a parser and lexer (if present).
46 * This interface may change slightly so that the lexer is
47 * itself living inside of a Grammar object (in which case,
48 * this class generates only one recognizer). The main method
49 * to call is <tt>gen()</tt>, which initiates all code gen.
50 *
51 * <p>
52 * The interaction of the code generator with the analyzer is
53 * simple: each subrule block calls deterministic() before generating
54 * code for the block. Method deterministic() sets lookahead caches
55 * in each Alternative object. Technically, a code generator
56 * doesn't need the grammar analyzer if all lookahead analysis
57 * is done at runtime, but this would result in a slower parser.
58 *
59 * <p>
60 * This class provides a set of support utilities to handle argument
61 * list parsing and so on.
62 *
63 * @author Terence Parr, John Lilley
64 * @version 2.00a
65 * @see antlr.JavaCodeGenerator
66 * @see antlr.DiagnosticCodeGenerator
67 * @see antlr.LLkAnalyzer
68 * @see antlr.Grammar
69 * @see antlr.AlternativeElement
70 * @see antlr.Lookahead
71 */
72 public abstract class CodeGenerator {
73
74 protected int tabs=0;
75
76 transient protected PrintWriter currentOutput;
78 protected Grammar grammar = null;
79
80 protected Vector bitsetsUsed;
81
82 protected Tool tool;
83
84 protected DefineGrammarSymbols behavior;
85
86 protected LLkGrammarAnalyzer analyzer;
87 * subclass must initialize this to the language-specific formatter
89 */
90 protected CharFormatter charFormatter;
91
92
93 protected boolean DEBUG_CODE_GENERATOR = false;
94
95
96 protected static final int DEFAULT_MAKE_SWITCH_THRESHOLD = 2;
97 protected static final int DEFAULT_BITSET_TEST_THRESHOLD = 4;
98
99 * A switch() or language-specific equivalent will be generated instead
101 * of a series of if/else statements for blocks with number of alternates
102 * greater than or equal to this number of non-predicated LL(1) alternates.
103 * This is modified by the grammar option "codeGenMakeSwitchThreshold"
104 */
105 protected int makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
106
107 * A bitset membership test will be generated instead of an
109 * ORed series of LA(k) comparisions for lookahead sets with
110 * degree greater than or equal to this value.
111 * This is modified by the grammar option "codeGenBitsetTestThreshold"
112 */
113 protected int bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
114
115 private static boolean OLD_ACTION_TRANSLATOR = true;
116
117
118
119 public CodeGenerator() {}
120 * Ignored if string is null.
122 * @param s The string to output
123 */
124 protected void _print(String s) {
125 if (s != null) {
126 currentOutput.print(s);
127 }
128 }
129 * preserve the current indentation level for multi-line actions
131 * Ignored if string is null.
132 * @param s The action string to output
133 */
134 protected void _printAction(String s) {
135 if (s == null) {
136 return;
137 }
138
139 int start = 0;
141 while (start < s.length() && Character.isSpaceChar(s.charAt(start)) )
142 {
143 start++;
144 }
145
146 int end = s.length()-1;
148 while ( end > start && Character.isSpaceChar(s.charAt(end)) )
149 {
150 end--;
151 }
152
153 char c=0;
154 for (int i = start; i <= end;)
155 {
156 c = s.charAt(i);
157 i++;
158 boolean newline = false;
159 switch (c)
160 {
161 case '\n':
162 newline=true;
163 break;
164 case '\r':
165 if ( i<=end && s.charAt(i)=='\n' ) {
166 i++;
167 }
168 newline=true;
169 break;
170 default:
171 currentOutput.print(c);
172 break;
173 }
174 if ( newline ) {
175 currentOutput.println();
176 printTabs();
177 while (i <= end && Character.isSpaceChar(s.charAt(i)) ) {
179 i++;
180 }
181 newline=false;
182 }
183 }
184 currentOutput.println();
185 }
186 * Ignored if string is null.
188 * @param s The string to output
189 */
190 protected void _println(String s) {
191 if (s != null) {
192 currentOutput.println(s);
193 }
194 }
195 * @param elems The array of elements representing the set, usually from BitSet.toArray().
197 * @return true if the elements are a contiguous range (with two or more).
198 */
199 public static boolean elementsAreRange(int[] elems) {
200 if (elems.length==0) {
201 return false;
202 }
203 int begin = elems[0];
204 int end = elems[elems.length-1];
205 if ( elems.length<=2 ) {
206 return false;
208 }
209 if ( end-begin+1 > elems.length ) {
210 return false;
212 }
213 int v = begin+1;
214 for (int i=1; i<elems.length-1; i++) {
215 if ( v != elems[i] ) {
216 return false;
218 }
219 v++;
220 }
221 return true;
222 }
223 * The ID of an action is assumed to be a trailing identifier.
225 * Specific code-generators may want to override this
226 * if the language has unusual declaration syntax.
227 * @param t The action token
228 * @return A string containing the text of the identifier
229 */
230 protected String extractIdOfAction(Token t) {
231 return extractIdOfAction(t.getText(), t.getLine());
232 }
233 * The ID of an action is assumed to be a trailing identifier.
235 * Specific code-generators may want to override this
236 * if the language has unusual declaration syntax.
237 * @param s The action text
238 * @param line Line used for error reporting.
239 * @return A string containing the text of the identifier
240 */
241 protected String extractIdOfAction(String s, int line) {
242 for (int i = s.length()-2; i >=0; i--)
245 {
246 if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
248 {
249 return s.substring(i+1);
251 }
252 }
253 tool.warning("Ill-formed action", line);
256 return "";
257 }
258 * The type of an action is assumed to precede a trailing identifier
260 * Specific code-generators may want to override this
261 * if the language has unusual declaration syntax.
262 * @param t The action token
263 * @return A string containing the text of the type
264 */
265 protected String extractTypeOfAction(Token t) {
266 return extractTypeOfAction(t.getText(), t.getLine());
267 }
268 * The type of an action is assumed to precede a trailing identifier
270 * Specific code-generators may want to override this
271 * if the language has unusual declaration syntax.
272 * @param s The action text
273 * @param line Line used for error reporting.
274 * @return A string containing the text of the type
275 */
276 protected String extractTypeOfAction(String s, int line) {
277 for (int i = s.length()-2; i >=0; i--)
280 {
281 if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
283 {
284 return s.substring(0,i+1);
286 }
287 }
288 tool.warning("Ill-formed action", line);
291 return "";
292 }
293 */
295 public abstract void gen();
296 * @param action The {...} action to generate
298 */
299 public abstract void gen(ActionElement action);
300 * @param blk The "x|y|z|..." block to generate
302 */
303 public abstract void gen(AlternativeBlock blk);
304 * @param end The block-end element to generate. Block-end
306 * elements are synthesized by the grammar parser to represent
307 * the end of a block.
308 */
309 public abstract void gen(BlockEndElement end);
310 * @param atom The character literal reference to generate
312 */
313 public abstract void gen(CharLiteralElement atom);
314 * @param r The character-range reference to generate
316 */
317 public abstract void gen(CharRangeElement r);
318
319 public abstract void gen(LexerGrammar g) throws IOException;
320 * @param blk The (...)+ block to generate
322 */
323 public abstract void gen(OneOrMoreBlock blk);
324
325 public abstract void gen(ParserGrammar g) throws IOException;
326 * @param rr The rule-reference to generate
328 */
329 public abstract void gen(RuleRefElement rr);
330 * @param atom The string-literal reference to generate
332 */
333 public abstract void gen(StringLiteralElement atom);
334 * @param r The token-range reference to generate
336 */
337 public abstract void gen(TokenRangeElement r);
338 * @param atom The token-reference to generate
340 */
341 public abstract void gen(TokenRefElement atom);
342 * @param blk The tree to generate code for.
344 */
345 public abstract void gen(TreeElement t);
346
347 public abstract void gen(TreeWalkerGrammar g) throws IOException;
348 * @param wc The wildcard element to generate
350 */
351 public abstract void gen(WildcardElement wc);
352 * @param blk The (...)* block to generate
354 */
355 public abstract void gen(ZeroOrMoreBlock blk);
356
360
361 protected void genTokenInterchange(TokenManager tm) throws IOException {
362 currentOutput = antlr.Tool.openOutputFile(tm.getName() + "TokenTypes.txt");
364
366 tabs = 0;
367
368 println(tm.getName() + " // tokenVocabulary name");
370
371 Vector v = tm.getVocabulary();
373 for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
374 String s = (String)v.elementAt(i);
375 if ( DEBUG_CODE_GENERATOR ) {
376 System.out.println("gen persistence file entry for: "+s);
377 }
378 if (s != null && !s.startsWith("<") ) {
379 if ( s.startsWith("\"") ) {
381 StringLiteralSymbol sl = (StringLiteralSymbol)grammar.tokenManager.getTokenSymbol(s);
382 if ( sl!=null && sl.label != null ) {
383 print(sl.label+"=");
384 }
385 println(s + "=" + i);
386 }
387 else {
388 print(s);
389 TokenSymbol ts = (TokenSymbol)grammar.tokenManager.getTokenSymbol(s);
391 if ( ts.getParaphrase()!=null ) {
392 print("("+ts.getParaphrase()+")");
393 }
394 println("=" + i);
395 }
396 }
397 }
398
399 currentOutput.close();
401 currentOutput = null;
402 }
403 * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
405 */
406 public abstract String getASTCreateString(Vector v);
407 * @param str The text of the arguments to the AST construction
409 */
410 public abstract String getASTCreateString(String str);
411 * Specific code-generators may want to override this
413 * if the language does not allow '_' or numerals in identifiers.
414 * @param index The index of the bitset in the bitset list.
415 */
416 protected String getBitsetName(int index)
417 {
418 return "_tokenSet_" + index;
419 }
420 public static String lexerRuleName(String id) {
421 return "m"+id;
422 }
423 * This is context-sensitive, depending on the rule and alternative
425 * being generated
426 * @param id The identifier name to map
427 * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned.
428 * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
429 */
430 public abstract String mapTreeId(String id, ActionTransInfo tInfo);
431 * if the bitset is already in the list, ignore the request.
433 * Always adds the bitset to the end of the list, so the
434 * caller can rely on the position of bitsets in the list.
435 * The returned position can be used to format the bitset
436 * name, since it is invariant.
437 * @param p Bit set to mark for code generation
438 * @param forParser true if the bitset is used for the parser, false for the lexer
439 * @return The position of the bitset in the list.
440 */
441 protected int markBitsetForGen(BitSet p) {
442 for (int i = 0; i < bitsetsUsed.size(); i++)
444 {
445 BitSet set = (BitSet)bitsetsUsed.elementAt(i);
446 if (p.equals(set))
447 {
448 return i;
450 }
451 }
452
453 bitsetsUsed.appendElement(p.clone());
455 return bitsetsUsed.size()-1;
456 }
457 * Ignored if string is null.
459 * @param s The string to output.
460 */
461 protected void print(String s) {
462 if (s != null) {
463 printTabs();
464 currentOutput.print(s);
465 }
466 }
467 * preserve the current indentation level for multi-line actions
469 * Ignored if string is null.
470 * @param s The action string to output
471 */
472 protected void printAction(String s) {
473 if (s != null) {
474 printTabs();
475 _printAction(s);
476 }
477 }
478 * to the currentOutput stream. Ignored if string is null.
480 * @param s The string to output
481 */
482 protected void println(String s) {
483 if (s != null) {
484 printTabs();
485 currentOutput.println(s);
486 }
487 }
488 * indicated by the "tabs" variable to the currentOutput stream.
490 */
491 protected void printTabs() {
492 for (int i=1; i<=tabs; i++) {
493 currentOutput.print("\t");
494 }
495 }
496 * This will replace #id and #(...) with the appropriate
498 * function calls and/or variables.
499 */
500 protected String processActionForTreeSpecifiers(String actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo) {
501 if ( actionStr==null ) return null;
502 if (grammar==null) return actionStr;
505 if ( (grammar.buildAST && actionStr.indexOf('#') != -1) ||
506 (grammar instanceof LexerGrammar && actionStr.indexOf('$') != -1) ) {
507 ActionLexerns.ActionLexer lexer = new antlr.actions.ActionLexer(actionStr, currentRule, this, tInfo);
509 lexer.setLineOffset(line);
510 lexer.setTool(tool);
511 try {
512 lexer.mACTION(true);
513 actionStr = lexer.getTokenObject().getText();
514 }
517 catch (ScannerException ex) {
518 lexer.reportError(ex);
519 return actionStr;
520 }
521 catch (IOException io) {
522 antlr.Tool.panic("IO error reading action:"+actionStr);
523 return actionStr;
524 }
525 }
526 return actionStr;
527 }
528
529 private void reset() {
530 tabs = 0;
531 bitsetsUsed = new Vector();
533 currentOutput = null;
534 grammar = null;
535 DEBUG_CODE_GENERATOR = false;
536 makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
537 bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
538 }
539 public static String reverseLexerRuleName(String id) {
540 return id.substring(1,id.length());
541 }
542 public void setAnalyzer(LLkGrammarAnalyzer analyzer_) {
543 analyzer = analyzer_;
544 }
545 public void setBehavior(DefineGrammarSymbols behavior_) {
546 behavior = behavior_;
547 }
548
549 protected void setGrammar(Grammar g) {
550 reset();
551 grammar = g;
552 if (grammar.hasOption("codeGenMakeSwitchThreshold")) {
554 try {
555 makeSwitchThreshold = grammar.getIntegerOption("codeGenMakeSwitchThreshold");
556 } catch (NumberFormatException e) {
558 tool.error(
559 "option 'codeGenMakeSwitchThreshold' must be an integer",
560 grammar.getOption("codeGenMakeSwitchThreshold").getLine()
561 );
562 }
563 }
564
565 if (grammar.hasOption("codeGenBitsetTestThreshold")) {
567 try {
568 bitsetTestThreshold = grammar.getIntegerOption("codeGenBitsetTestThreshold");
569 } catch (NumberFormatException e) {
571 tool.error(
572 "option 'codeGenBitsetTestThreshold' must be an integer",
573 grammar.getOption("codeGenBitsetTestThreshold").getLine()
574 );
575 }
576 }
577
578 if (grammar.hasOption("codeGenDebug")) {
580 Token t = grammar.getOption("codeGenDebug");
581 if (t.getText().equals("true")) {
582 DEBUG_CODE_GENERATOR = true;
584 }
585 else if (t.getText().equals("false")) {
586 DEBUG_CODE_GENERATOR = false;
588 }
589 else {
590 tool.error("option 'codeGenDebug' must be true or false", t.getLine());
591 }
592 }
593 }
594 public void setTool(Tool tool_) {
595 tool = tool_;
596 }
597 }
598