/* Copyright (c) 2003 Andy Tripp */ import antlr.*; import antlr.collections.*; import antlr.collections.impl.*; import java.io.*; import java.util.*; /** * JavaEmitter: A class that can take an ANTLR Java AST and produce reasonably formatted * Java code from it. To use it, create a JavaEmitter object, call setOut() * if you want to print to something other than System.out, and then call * print(), passing the AST. Typically, the AST node that you pass would be the * root of a tree - the ROOT_ID node that represents a Java file. See * main() for example usage. */ /* To compile, place this file and IndentingPrintStream.java in your * antlr-x.x.x/examples/java/java directory. Make sure this directory and ANTLR classes * are in your CLASSPATH. Run ANTLR on java.g and java.tree.g and then compile everything: * $ java antlr.Tool java.g * $ java antlr.Tool java.tree.g * $ javac *.java * * Test by running the main method here: * java JavaEmitter MyJavaFile.java * This will print both the AST as a tree (to stderr) and the formatted Java code (to stdout) * * This has been tested with ANTLR 2.7.1 and 2.7.2. */ public class JavaEmitter implements JavaTokenTypes { private IndentingPrintStream out = new IndentingPrintStream(System.out); private PrintStream debug = System.err; private static int ALL = -1; private String indentString = " "; private java.util.Stack stack = new java.util.Stack(); private static String[] tokenNames; private final static int ROOT_ID = 0; static { setupTokenNames(); } // Map each AST token type to a String private static void setupTokenNames() { tokenNames = new String[200]; for (int i=0; i Overall Approach
* It works by making recursive calls to print children. For example, * the root of the AST tree has type ROOT_ID. A Java AST node generated from the * ANTLR java.g file will have type ROOT_ID and will have * the following children, in this order: * 0 or 1 children of type PACKAGE_DEF * 0 or more children of type IMPORT * One child of type CLASS_DEF or type INTERFACE_DEF *

* So the code below is one big "switch" statement on the passed AST type. * In the "ROOT_ID" case, the code here does the following: *

    *
  1. calls getChild(ast, PACKAGE_DEF) to get the * child of type PACKAGE_DEF, and makes a recursive call to print() on that AST. * If there is no "PACKAGE_DEF" child, getChild() would return null and the * recursive print() call would print nothing. *
  2. Calls printChildren(), passing the AST, the "\n" separator, and * the type "IMPORT". printChildren() will print all children of the AST, * separating each by the "\n" separator. *
  3. Does the same thing will CLASS_DEF that it did with PACKAGE_DEF: calls * getChild() to get the child of type CLASS_DEF (or null if there is none), * and then makes a recursive call to print(). *
  4. Does the same thing for INTEFACE_DEF: call getChild() to get the child * of type INTEFACE_DEF, and make a recursive call. *
* *

Indenting
* One important issue is how to do proper indenting. The IndentingPrintStream * class handles indenting. Here, we simply create an IndentingPrintStream from * our normal PrintStream (either System.out or whatever was passed to setOut()). * And then we call increaseIndent() and decreaseIndent() as we see "{" and "}" * AST nodes. * *

Adding Parentheses
* * The only other tricky part here is in printing parentheses, which are not kept * as AST nodes, but are built-in ("inherent") to the structure of the AST. * The printWithParens() method is used to print all unary and binary operators. * This method uses a precendence table to determine whether it needs to print * parentheses or not. */ public void print (AST ast) { if (ast == null) { return; } AST parent = null; if (!stack.isEmpty()) { parent = (AST) stack.peek(); } stack.push(ast); AST child1 = ast.getFirstChild(); AST child2 = null; AST child3 = null; if (child1 != null) { child2 = child1.getNextSibling(); if (child2 != null) { child3 = child2.getNextSibling(); } } switch(ast.getType()) { // The top of the tree looks like this: // ROOT_ID "Whatever.java" // package // imports // class definition case ROOT_ID: print(getChild(ast, PACKAGE_DEF)); printChildren(ast, "\n", IMPORT); out.println(); out.println(); print(getChild(ast, CLASS_DEF)); // only one of these... print(getChild(ast, INTERFACE_DEF)); // will print anything out.println(); break; case PACKAGE_DEF: out.print("package "); print(ast.getFirstChild()); out.print(";"); out.println(); out.println(); break; // IMPORT has exactly one child case IMPORT: out.print("import "); print(ast.getFirstChild()); out.print(";"); break; case CLASS_DEF: case INTERFACE_DEF: print(getChild(ast, MODIFIERS)); if (ast.getType() == CLASS_DEF) { out.print("class "); } else { out.print("interface "); } print(getChild(ast, IDENT)); out.print(" "); print(getChild(ast, EXTENDS_CLAUSE)); print(getChild(ast, IMPLEMENTS_CLAUSE)); startBlock(); print(getChild(ast, OBJBLOCK)); endBlock(); break; case MODIFIERS: if (hasChildren(ast)) { printChildren(ast, " "); out.print(" "); } break; case EXTENDS_CLAUSE: if (hasChildren(ast)) { out.print("extends "); printChildren(ast, ", "); out.print(" "); } break; case IMPLEMENTS_CLAUSE: if (hasChildren(ast)) { out.print("implements "); printChildren(ast, ", "); out.print(" "); } break; // DOT always has exactly two children. case DOT: print(child1); out.print("."); print(child2); break; // the typical order of things within a class is: // variable definitions (no, we do not like these at the bottom, mr. C++ programmer) // static initialization block // instance initialization block // constructors // methods // inner classes case OBJBLOCK: if (printChildren(ast, "\n", VARIABLE_DEF)) { out.println(); } if (printChildren(ast, "\n", STATIC_INIT)) { out.println(); } if (printChildren(ast, "\n", INSTANCE_INIT)) { out.println(); } if (printChildren(ast, "\n", CTOR_DEF)) { out.println(); } if (printChildren(ast, "\n", METHOD_DEF)) { out.println(); } printChildren(ast, "\n", CLASS_DEF); break; case CTOR_DEF: case METHOD_DEF: print(getChild(ast, MODIFIERS)); if (ast.getType() != CTOR_DEF) { print(getChild(ast, TYPE)); out.print(" "); } print(getChild(ast, IDENT)); print(getChild(ast, PARAMETERS)); print(getChild(ast, LITERAL_throws)); AST methodBody = getChild(ast, SLIST); if (methodBody == null) { out.print(";"); } else { out.print(" "); print(methodBody); } break; case PARAMETERS: out.print("("); printChildren(ast, ", "); out.print(") "); break; case PARAMETER_DEF: print(getChild(ast, MODIFIERS)); print(getChild(ast, TYPE)); out.print(" "); print(getChild(ast, IDENT)); break; case VARIABLE_DEF: print(getChild(ast, MODIFIERS)); print(getChild(ast, TYPE)); out.print(" "); print(getChild(ast, IDENT)); print(getChild(ast, ASSIGN)); // don't always suffix with ';': example: "for (int i=0; i>= >>>= &= ^= |= // (12) ?: // (11) || // (10) && // ( 9) | // ( 8) ^ // ( 7) & // ( 6) == != // ( 5) < <= > >= // ( 4) << >> // ( 3) +(binary) -(binary) // ( 2) * / % // ( 1) ++ -- +(unary) -(unary) ~ ! (type) // [] () (method call) . (dot -- identifier qualification) // new () (explicit parenthesis) private static int getPrecedence(AST ast) { if (ast == null) { return -2; } switch (ast.getType()) { case EXPR: return getPrecedence(ast.getFirstChild()); case ASSIGN: case PLUS_ASSIGN: case MINUS_ASSIGN: case STAR_ASSIGN: case DIV_ASSIGN: case MOD_ASSIGN: case SR_ASSIGN: case BSR_ASSIGN: case SL_ASSIGN: case BAND_ASSIGN: case BXOR_ASSIGN: case BOR_ASSIGN: return 13; case QUESTION: return 12; case LOR: return 11; case LAND: return 10; case BOR: return 9; case BXOR: return 8; case BAND: return 7; case NOT_EQUAL: case EQUAL: return 6; case LT: case GT: case LE: case GE: case LITERAL_instanceof: return 5; case SL: case SR: case BSR: // not in chart above, but I would guess it goes here return 4; case PLUS: case MINUS: return 3; case DIV: case MOD: case STAR: return 2; case INC: case DEC: case POST_INC: case POST_DEC: case UNARY_PLUS: case UNARY_MINUS: case LNOT: case BNOT: case TYPE: return 1; case METHOD_CALL: case ARRAY_DECLARATOR: case DOT: return 0; case LITERAL_new: return -1; } // for any non-operator, return a value which will cause it to NOT need parens. return -2; } /** * This example takes a single filename on the command line, prints * the AST to stderr and the Java code to stdout. */ public static void main(String[] args) { if (args.length != 1) { System.err.println("Usage: java JavaEmitter MyFile.java"); System.exit(1); } String fileName = args[0]; File file = new File(fileName); if (!file.exists()) { System.err.println("File does not exist:" + fileName); System.exit(1); } if (!file.isFile()) { System.err.println("File is not a regular file:" + fileName); System.exit(1); } try { FileInputStream fis = new FileInputStream(fileName); // Create a scanner that reads from the input stream passed to us JavaLexer lexer = new JavaLexer(fis); lexer.setFilename(fileName); // Create a parser that reads from the scanner JavaRecognizer parser = new JavaRecognizer(lexer); parser.setFilename(fileName); // start parsing at the compilationUnit rule parser.compilationUnit(); // Create a root AST node with id 0, and its child is the AST produced by the parser: ASTFactory factory = new ASTFactory(); AST root = factory.create(ROOT_ID,"AST ROOT"); root.setFirstChild(parser.getAST()); // Look at the AST if you want, by uncommenting the line below: showTree(root, 0); // Print the AST as nice Java code: JavaEmitter emitter = new JavaEmitter(); emitter.print(root); } catch (Exception e) { System.err.println("parser exception: "+e); e.printStackTrace(); // so we can get stack trace } } // A simple method to print out an AST as a tree: private static String SPACES = " "; private static void showTree(AST t, int level) { if ( t==null ) return; System.err.print(SPACES.substring(0, level)); System.err.println("text:" + t.getText() + " type=" + t.getType()); AST child = t.getFirstChild(); showTree(child, level+2); AST next = t.getNextSibling(); showTree(next, level); } }