ANTLR Runtime Source Code

ANTLR is a powerful parser generator for multiple programming languages including Java.

ANTLR contains 2 major modules:

  • Runtime - For building and executing parsers/lexers generated in Java.
  • Tool (The Parser Generator) - For generating parsers/lexers Java class.

ANTLR Runtime Source Code files are provided in the distribution packge (antlr4-4.10.1.zip). You can download them at ANTLR Website.

You can also browse the source code below:

✍: FYIcenter

org/antlr/v4/runtime/TokenStreamRewriter.java

/*
 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */
package org.antlr.v4.runtime;

import org.antlr.v4.runtime.misc.Interval;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Useful for rewriting out a buffered input token stream after doing some
 * augmentation or other manipulations on it.
 *
 * <p>
 * You can insert stuff, replace, and delete chunks. Note that the operations
 * are done lazily--only if you convert the buffer to a {@link String} with
 * {@link TokenStream#getText()}. This is very efficient because you are not
 * moving data around all the time. As the buffer of tokens is converted to
 * strings, the {@link #getText()} method(s) scan the input token stream and
 * check to see if there is an operation at the current index. If so, the
 * operation is done and then normal {@link String} rendering continues on the
 * buffer. This is like having multiple Turing machine instruction streams
 * (programs) operating on a single input tape. :)</p>
 *
 * <p>
 * This rewriter makes no modifications to the token stream. It does not ask the
 * stream to fill itself up nor does it advance the input cursor. The token
 * stream {@link TokenStream#index()} will return the same value before and
 * after any {@link #getText()} call.</p>
 *
 * <p>
 * The rewriter only works on tokens that you have in the buffer and ignores the
 * current input cursor. If you are buffering tokens on-demand, calling
 * {@link #getText()} halfway through the input will only do rewrites for those
 * tokens in the first half of the file.</p>
 *
 * <p>
 * Since the operations are done lazily at {@link #getText}-time, operations do
 * not screw up the token index values. That is, an insert operation at token
 * index {@code i} does not change the index values for tokens
 * {@code i}+1..n-1.</p>
 *
 * <p>
 * Because operations never actually alter the buffer, you may always get the
 * original token stream back without undoing anything. Since the instructions
 * are queued up, you can easily simulate transactions and roll back any changes
 * if there is an error just by removing instructions. For example,</p>
 *
 * <pre>
 * CharStream input = new ANTLRFileStream("input");
 * TLexer lex = new TLexer(input);
 * CommonTokenStream tokens = new CommonTokenStream(lex);
 * T parser = new T(tokens);
 * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
 * parser.startRule();
 * </pre>
 *
 * <p>
 * Then in the rules, you can execute (assuming rewriter is visible):</p>
 *
 * <pre>
 * Token t,u;
 * ...
 * rewriter.insertAfter(t, "text to put after t");}
 * rewriter.insertAfter(u, "text after u");}
 * System.out.println(rewriter.getText());
 * </pre>
 *
 * <p>
 * You can also have multiple "instruction streams" and get multiple rewrites
 * from a single pass over the input. Just name the instruction streams and use
 * that name again when printing the buffer. This could be useful for generating
 * a C file and also its header file--all from the same buffer:</p>
 *
 * <pre>
 * rewriter.insertAfter("pass1", t, "text to put after t");}
 * rewriter.insertAfter("pass2", u, "text after u");}
 * System.out.println(rewriter.getText("pass1"));
 * System.out.println(rewriter.getText("pass2"));
 * </pre>
 *
 * <p>
 * If you don't use named rewrite streams, a "default" stream is used as the
 * first example shows.</p>
 */
public class TokenStreamRewriter {
	public static final String DEFAULT_PROGRAM_NAME = "default";
	public static final int PROGRAM_INIT_SIZE = 100;
	public static final int MIN_TOKEN_INDEX = 0;

	// Define the rewrite operation hierarchy

	public class RewriteOperation {
		/** What index into rewrites List are we? */
		protected int instructionIndex;
		/** Token buffer index. */
		protected int index;
		protected Object text;

		protected RewriteOperation(int index) {
			this.index = index;
		}

		protected RewriteOperation(int index, Object text) {
			this.index = index;
			this.text = text;
		}
		/** Execute the rewrite operation by possibly adding to the buffer.
		 *  Return the index of the next token to operate on.
		 */
		public int execute(StringBuilder buf) {
			return index;
		}

		@Override
		public String toString() {
			String opName = getClass().getName();
			int $index = opName.indexOf('$');
			opName = opName.substring($index+1, opName.length());
			return "<"+opName+"@"+tokens.get(index)+
					":\""+text+"\">";
		}
	}

	class InsertBeforeOp extends RewriteOperation {
		public InsertBeforeOp(int index, Object text) {
			super(index,text);
		}

		@Override
		public int execute(StringBuilder buf) {
			buf.append(text);
			if ( tokens.get(index).getType()!=Token.EOF ) {
				buf.append(tokens.get(index).getText());
			}
			return index+1;
		}
	}

	/** Distinguish between insert after/before to do the "insert afters"
	 *  first and then the "insert befores" at same index. Implementation
	 *  of "insert after" is "insert before index+1".
	 */
    class InsertAfterOp extends InsertBeforeOp {
        public InsertAfterOp(int index, Object text) {
            super(index+1, text); // insert after is insert before index+1
        }
    }

	/** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
	 *  instructions.
	 */
	class ReplaceOp extends RewriteOperation {
		protected int lastIndex;
		public ReplaceOp(int from, int to, Object text) {
			super(from,text);
			lastIndex = to;
		}
		@Override
		public int execute(StringBuilder buf) {
			if ( text!=null ) {
				buf.append(text);
			}
			return lastIndex+1;
		}
		@Override
		public String toString() {
			if ( text==null ) {
				return "<DeleteOp@"+tokens.get(index)+
						".."+tokens.get(lastIndex)+">";
			}
			return "<ReplaceOp@"+tokens.get(index)+
					".."+tokens.get(lastIndex)+":\""+text+"\">";
		}
	}

	/** Our source stream */
	protected final TokenStream tokens;

	/** You may have multiple, named streams of rewrite operations.
	 *  I'm calling these things "programs."
	 *  Maps String (name) &rarr; rewrite (List)
	 */
	protected final Map<String, List<RewriteOperation>> programs;

	/** Map String (program name) &rarr; Integer index */
	protected final Map<String, Integer> lastRewriteTokenIndexes;

	public TokenStreamRewriter(TokenStream tokens) {
		this.tokens = tokens;
		programs = new HashMap<String, List<RewriteOperation>>();
		programs.put(DEFAULT_PROGRAM_NAME,
					 new ArrayList<RewriteOperation>(PROGRAM_INIT_SIZE));
		lastRewriteTokenIndexes = new HashMap<String, Integer>();
	}

	public final TokenStream getTokenStream() {
		return tokens;
	}

	public void rollback(int instructionIndex) {
		rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
	}

	/** Rollback the instruction stream for a program so that
	 *  the indicated instruction (via instructionIndex) is no
	 *  longer in the stream. UNTESTED!
	 */
	public void rollback(String programName, int instructionIndex) {
		List<RewriteOperation> is = programs.get(programName);
		if ( is!=null ) {
			programs.put(programName, is.subList(MIN_TOKEN_INDEX,instructionIndex));
		}
	}

	public void deleteProgram() {
		deleteProgram(DEFAULT_PROGRAM_NAME);
	}

	/** Reset the program so that no instructions exist */
	public void deleteProgram(String programName) {
		rollback(programName, MIN_TOKEN_INDEX);
	}

	public void insertAfter(Token t, Object text) {
		insertAfter(DEFAULT_PROGRAM_NAME, t, text);
	}

	public void insertAfter(int index, Object text) {
		insertAfter(DEFAULT_PROGRAM_NAME, index, text);
	}

	public void insertAfter(String programName, Token t, Object text) {
		insertAfter(programName,t.getTokenIndex(), text);
	}

	public void insertAfter(String programName, int index, Object text) {
		// to insert after, just insert before next index (even if past end)
        RewriteOperation op = new InsertAfterOp(index, text);
        List<RewriteOperation> rewrites = getProgram(programName);
        op.instructionIndex = rewrites.size();
        rewrites.add(op);
	}

	public void insertBefore(Token t, Object text) {
		insertBefore(DEFAULT_PROGRAM_NAME, t, text);
	}

	public void insertBefore(int index, Object text) {
		insertBefore(DEFAULT_PROGRAM_NAME, index, text);
	}

	public void insertBefore(String programName, Token t, Object text) {
		insertBefore(programName, t.getTokenIndex(), text);
	}

	public void insertBefore(String programName, int index, Object text) {
		RewriteOperation op = new InsertBeforeOp(index,text);
		List<RewriteOperation> rewrites = getProgram(programName);
		op.instructionIndex = rewrites.size();
		rewrites.add(op);
	}

	public void replace(int index, Object text) {
		replace(DEFAULT_PROGRAM_NAME, index, index, text);
	}

	public void replace(int from, int to, Object text) {
		replace(DEFAULT_PROGRAM_NAME, from, to, text);
	}

	public void replace(Token indexT, Object text) {
		replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
	}

	public void replace(Token from, Token to, Object text) {
		replace(DEFAULT_PROGRAM_NAME, from, to, text);
	}

	public void replace(String programName, int from, int to, Object text) {
		if ( from > to || from<0 || to<0 || to >= tokens.size() ) {
			throw new IllegalArgumentException("replace: range invalid: "+from+".."+to+"(size="+tokens.size()+")");
		}
		RewriteOperation op = new ReplaceOp(from, to, text);
		List<RewriteOperation> rewrites = getProgram(programName);
		op.instructionIndex = rewrites.size();
		rewrites.add(op);
	}

	public void replace(String programName, Token from, Token to, Object text) {
		replace(programName,
				from.getTokenIndex(),
				to.getTokenIndex(),
				text);
	}

	public void delete(int index) {
		delete(DEFAULT_PROGRAM_NAME, index, index);
	}

	public void delete(int from, int to) {
		delete(DEFAULT_PROGRAM_NAME, from, to);
	}

	public void delete(Token indexT) {
		delete(DEFAULT_PROGRAM_NAME, indexT, indexT);
	}

	public void delete(Token from, Token to) {
		delete(DEFAULT_PROGRAM_NAME, from, to);
	}

	public void delete(String programName, int from, int to) {
		replace(programName,from,to,null);
	}

	public void delete(String programName, Token from, Token to) {
		replace(programName,from,to,null);
	}

	public int getLastRewriteTokenIndex() {
		return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
	}

	protected int getLastRewriteTokenIndex(String programName) {
		Integer I = lastRewriteTokenIndexes.get(programName);
		if ( I==null ) {
			return -1;
		}
		return I;
	}

	protected void setLastRewriteTokenIndex(String programName, int i) {
		lastRewriteTokenIndexes.put(programName, i);
	}

	protected List<RewriteOperation> getProgram(String name) {
		List<RewriteOperation> is = programs.get(name);
		if ( is==null ) {
			is = initializeProgram(name);
		}
		return is;
	}

	private List<RewriteOperation> initializeProgram(String name) {
		List<RewriteOperation> is = new ArrayList<RewriteOperation>(PROGRAM_INIT_SIZE);
		programs.put(name, is);
		return is;
	}

	/** Return the text from the original tokens altered per the
	 *  instructions given to this rewriter.
 	 */
	public String getText() {
		return getText(DEFAULT_PROGRAM_NAME, Interval.of(0,tokens.size()-1));
	}

	/** Return the text from the original tokens altered per the
	 *  instructions given to this rewriter in programName.
 	 */
	public String getText(String programName) {
		return getText(programName, Interval.of(0,tokens.size()-1));
	}

	/** Return the text associated with the tokens in the interval from the
	 *  original token stream but with the alterations given to this rewriter.
	 *  The interval refers to the indexes in the original token stream.
	 *  We do not alter the token stream in any way, so the indexes
	 *  and intervals are still consistent. Includes any operations done
	 *  to the first and last token in the interval. So, if you did an
	 *  insertBefore on the first token, you would get that insertion.
	 *  The same is true if you do an insertAfter the stop token.
 	 */
	public String getText(Interval interval) {
		return getText(DEFAULT_PROGRAM_NAME, interval);
	}

	public String getText(String programName, Interval interval) {
		List<RewriteOperation> rewrites = programs.get(programName);
		int start = interval.a;
		int stop = interval.b;

		// ensure start/end are in range
		if ( stop>tokens.size()-1 ) stop = tokens.size()-1;
		if ( start<0 ) start = 0;

		if ( rewrites==null || rewrites.isEmpty() ) {
			return tokens.getText(interval); // no instructions to execute
		}
		StringBuilder buf = new StringBuilder();

		// First, optimize instruction stream
		Map<Integer, RewriteOperation> indexToOp = reduceToSingleOperationPerIndex(rewrites);

		// Walk buffer, executing instructions and emitting tokens
		int i = start;
		while ( i <= stop && i < tokens.size() ) {
			RewriteOperation op = indexToOp.get(i);
			indexToOp.remove(i); // remove so any left have index size-1
			Token t = tokens.get(i);
			if ( op==null ) {
				// no operation at that index, just dump token
				if ( t.getType()!=Token.EOF ) buf.append(t.getText());
				i++; // move to next token
			}
			else {
				i = op.execute(buf); // execute operation and skip
			}
		}

		// include stuff after end if it's last index in buffer
		// So, if they did an insertAfter(lastValidIndex, "foo"), include
		// foo if end==lastValidIndex.
		if ( stop==tokens.size()-1 ) {
			// Scan any remaining operations after last token
			// should be included (they will be inserts).
			for (RewriteOperation op : indexToOp.values()) {
				if ( op.index >= tokens.size()-1 ) buf.append(op.text);
			}
		}
		return buf.toString();
	}

	/** We need to combine operations and report invalid operations (like
	 *  overlapping replaces that are not completed nested). Inserts to
	 *  same index need to be combined etc...  Here are the cases:
	 *
	 *  I.i.u I.j.v								leave alone, nonoverlapping
	 *  I.i.u I.i.v								combine: Iivu
	 *
	 *  R.i-j.u R.x-y.v	| i-j in x-y			delete first R
	 *  R.i-j.u R.i-j.v							delete first R
	 *  R.i-j.u R.x-y.v	| x-y in i-j			ERROR
	 *  R.i-j.u R.x-y.v	| boundaries overlap	ERROR
	 *
	 *  Delete special case of replace (text==null):
	 *  D.i-j.u D.x-y.v	| boundaries overlap	combine to max(min)..max(right)
	 *
	 *  I.i.u R.x-y.v | i in (x+1)-y			delete I (since insert before
	 *											we're not deleting i)
	 *  I.i.u R.x-y.v | i not in (x+1)-y		leave alone, nonoverlapping
	 *  R.x-y.v I.i.u | i in x-y				ERROR
	 *  R.x-y.v I.x.u 							R.x-y.uv (combine, delete I)
	 *  R.x-y.v I.i.u | i not in x-y			leave alone, nonoverlapping
	 *
	 *  I.i.u = insert u before op @ index i
	 *  R.x-y.u = replace x-y indexed tokens with u
	 *
	 *  First we need to examine replaces. For any replace op:
	 *
	 * 		1. wipe out any insertions before op within that range.
	 *		2. Drop any replace op before that is contained completely within
	 *	 that range.
	 *		3. Throw exception upon boundary overlap with any previous replace.
	 *
	 *  Then we can deal with inserts:
	 *
	 * 		1. for any inserts to same index, combine even if not adjacent.
	 * 		2. for any prior replace with same left boundary, combine this
	 *	 insert with replace and delete this replace.
	 * 		3. throw exception if index in same range as previous replace
	 *
	 *  Don't actually delete; make op null in list. Easier to walk list.
	 *  Later we can throw as we add to index &rarr; op map.
	 *
	 *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
	 *  inserted stuff would be before the replace range. But, if you
	 *  add tokens in front of a method body '{' and then delete the method
	 *  body, I think the stuff before the '{' you added should disappear too.
	 *
	 *  Return a map from token index to operation.
	 */
	protected Map<Integer, RewriteOperation> reduceToSingleOperationPerIndex(List<RewriteOperation> rewrites) {
//		System.out.println("rewrites="+rewrites);

		// WALK REPLACES
		for (int i = 0; i < rewrites.size(); i++) {
			RewriteOperation op = rewrites.get(i);
			if ( op==null ) continue;
			if ( !(op instanceof ReplaceOp) ) continue;
			ReplaceOp rop = (ReplaceOp)rewrites.get(i);
			// Wipe prior inserts within range
			List<? extends InsertBeforeOp> inserts = getKindOfOps(rewrites, InsertBeforeOp.class, i);
			for (InsertBeforeOp iop : inserts) {
				if ( iop.index == rop.index ) {
					// E.g., insert before 2, delete 2..2; update replace
					// text to include insert before, kill insert
					rewrites.set(iop.instructionIndex, null);
					rop.text = iop.text.toString() + (rop.text!=null?rop.text.toString():"");
				}
				else if ( iop.index > rop.index && iop.index <= rop.lastIndex ) {
					// delete insert as it's a no-op.
					rewrites.set(iop.instructionIndex, null);
				}
			}
			// Drop any prior replaces contained within
			List<? extends ReplaceOp> prevReplaces = getKindOfOps(rewrites, ReplaceOp.class, i);
			for (ReplaceOp prevRop : prevReplaces) {
				if ( prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
					// delete replace as it's a no-op.
					rewrites.set(prevRop.instructionIndex, null);
					continue;
				}
				// throw exception unless disjoint or identical
				boolean disjoint =
					prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
				// Delete special case of replace (text==null):
				// D.i-j.u D.x-y.v	| boundaries overlap	combine to max(min)..max(right)
				if ( prevRop.text==null && rop.text==null && !disjoint ) {
					//System.out.println("overlapping deletes: "+prevRop+", "+rop);
					rewrites.set(prevRop.instructionIndex, null); // kill first delete
					rop.index = Math.min(prevRop.index, rop.index);
					rop.lastIndex = Math.max(prevRop.lastIndex, rop.lastIndex);
					System.out.println("new rop "+rop);
				}
				else if ( !disjoint ) {
					throw new IllegalArgumentException("replace op boundaries of "+rop+" overlap with previous "+prevRop);
				}
			}
		}

		// WALK INSERTS
		for (int i = 0; i < rewrites.size(); i++) {
			RewriteOperation op = rewrites.get(i);
			if ( op==null ) continue;
			if ( !(op instanceof InsertBeforeOp) ) continue;
			InsertBeforeOp iop = (InsertBeforeOp)rewrites.get(i);
			// combine current insert with prior if any at same index
			List<? extends InsertBeforeOp> prevInserts = getKindOfOps(rewrites, InsertBeforeOp.class, i);
			for (InsertBeforeOp prevIop : prevInserts) {
				if ( prevIop.index==iop.index ) {
					if ( InsertAfterOp.class.isInstance(prevIop) ) {
						iop.text = catOpText(prevIop.text, iop.text);
						rewrites.set(prevIop.instructionIndex, null);
					}
					else if ( InsertBeforeOp.class.isInstance(prevIop) ) { // combine objects
						// convert to strings...we're in process of toString'ing
						// whole token buffer so no lazy eval issue with any templates
						iop.text = catOpText(iop.text, prevIop.text);
						// delete redundant prior insert
						rewrites.set(prevIop.instructionIndex, null);
					}
				}
			}
			// look for replaces where iop.index is in range; error
			List<? extends ReplaceOp> prevReplaces = getKindOfOps(rewrites, ReplaceOp.class, i);
			for (ReplaceOp rop : prevReplaces) {
				if ( iop.index == rop.index ) {
					rop.text = catOpText(iop.text,rop.text);
					rewrites.set(i, null);	// delete current insert
					continue;
				}
				if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
					throw new IllegalArgumentException("insert op "+iop+" within boundaries of previous "+rop);
				}
			}
		}
		// System.out.println("rewrites after="+rewrites);
		Map<Integer, RewriteOperation> m = new HashMap<Integer, RewriteOperation>();
		for (int i = 0; i < rewrites.size(); i++) {
			RewriteOperation op = rewrites.get(i);
			if ( op==null ) continue; // ignore deleted ops
			if ( m.get(op.index)!=null ) {
				throw new Error("should only be one op per index");
			}
			m.put(op.index, op);
		}
		//System.out.println("index to op: "+m);
		return m;
	}

	protected String catOpText(Object a, Object b) {
		String x = "";
		String y = "";
		if ( a!=null ) x = a.toString();
		if ( b!=null ) y = b.toString();
		return x+y;
	}

	/** Get all operations before an index of a particular kind */
	protected <T extends RewriteOperation> List<? extends T> getKindOfOps(List<? extends RewriteOperation> rewrites, Class<T> kind, int before) {
		List<T> ops = new ArrayList<T>();
		for (int i=0; i<before && i<rewrites.size(); i++) {
			RewriteOperation op = rewrites.get(i);
			if ( op==null ) continue; // ignore deleted
			if ( kind.isInstance(op) ) {
				ops.add(kind.cast(op));
			}
		}
		return ops;
	}
}

org/antlr/v4/runtime/TokenStreamRewriter.java

 

Or download all of them as a single archive file:

File name: antlr-runtime-4.10.1-sources.jar
File size: 308953 bytes
Release date: 2022-04-15
Download 

 

ANTLR Tool Source Code

What Is ANTLR Parser Generator

Download and Review ANTLR Parser Generator

⇑⇑ FAQ for ANTLR Parser Generator

2018-10-21, 31326👍, 0💬