Skip to content

Instantly share code, notes, and snippets.

@anon767
Last active February 10, 2023 12:35
Show Gist options
  • Save anon767/a85fcf8131b2f6a64f028687c95b3e46 to your computer and use it in GitHub Desktop.
Save anon767/a85fcf8131b2f6a64f028687c95b3e46 to your computer and use it in GitHub Desktop.
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import org.antlr.Tool;
import org.antlr.analysis.*;
import org.antlr.misc.IntervalSet;
import org.antlr.misc.Utils;
import org.antlr.tool.CompositeGrammar;
import org.antlr.tool.Grammar;
import org.antlr.tool.Rule;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;
/**
* Generate a random phrase given a grammar.
* Usage:
* java org.antlr.tool.RandomPhrase grammarFile.g startRule [seed]
* <p>
* For example:
* java org.antlr.tool.RandomPhrase simple.g program 342
* <p>
* The seed acts like a unique identifier so you can get the same random
* phrase back during unit testing, for example.
* <p>
* If you do not specify a seed then the current time in milliseconds is used
* guaranteeing that you'll never see that seed again.
* <p>
* NOTE: this does not work well for large grammars...it tends to recurse
* too much and build really long strings. I need throttle control; later.
*/
public class RandomPhrase {
public static final boolean debug = false;
protected static Random random;
private static HashMap<State, Integer> seen = new HashMap<State, Integer>();
protected static NFAState getRandomState(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
int randomAlt = random.nextInt(n) + 1;
if (debug) System.out.println("randomAlt=" + randomAlt);
return g.getNFAStateForAltOfDecision(state, randomAlt);
}
protected static NFAState getDiscountedOptional(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
for (int i = 1; i < n + 1; i++) {
NFAState curState = g.getNFAStateForAltOfDecision(state, i);
if ((curState.decisionStateType != 1 && curState.decisionStateType != 3) || StdRandom.bernoulli(1f / 1000f)) {
return g.getNFAStateForAltOfDecision(state, i);
}
}
return getNoOptional(state, g);
}
protected static NFAState getSkewedDiscountedOptional(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
for (int i = 1; i < n + 1; i++) {
NFAState curState = g.getNFAStateForAltOfDecision(state, i);
if ((curState.decisionStateType != 1
&& curState.decisionStateType != 3)
|| (curState.decisionStateType == 2 && StdRandom.bernoulli(1f / 100))) {
return g.getNFAStateForAltOfDecision(state, i);
}
}
return getNoOptional(state, g);
}
protected static NFAState getNoOptional(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
for (int i = 1; i < n + 1; i++) {
NFAState curState = g.getNFAStateForAltOfDecision(state, i);
if (curState.getNumberOfTransitions() < 2 && (curState.decisionStateType != 1 && curState.decisionStateType != 3)) {
return g.getNFAStateForAltOfDecision(state, i);
}
}
return getRandomState(state, g);
}
protected static NFAState getSkewedRandomState(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
for (int i = 1; i < n + 1; i++) {
NFAState curState = g.getNFAStateForAltOfDecision(state, i);
if (StdRandom.bernoulli(1f / ((float) curState.getNumberOfTransitions()))) {
return g.getNFAStateForAltOfDecision(state, i);
}
}
return getRandomState(state, g);
}
protected static NFAState getSingleTransitionState(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
for (int i = 1; i < n + 1; i++) {
NFAState curState = g.getNFAStateForAltOfDecision(state, i);
if (curState.getNumberOfTransitions() < 2)
return g.getNFAStateForAltOfDecision(state, i);
}
return getRandomState(state, g);
}
protected static NFAState getDiscountedSkewedRandomState(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
for (int i = 1; i < n + 1; i++) {
NFAState curState = g.getNFAStateForAltOfDecision(state, i);
if (StdRandom.bernoulli((1f * Math.pow(0.1, seen.getOrDefault(curState, 0))))) {
System.out.println(curState);
seen.put(curState, 1 + seen.getOrDefault(curState, 0));
return curState;
}
}
return getRandomState(state, g);
}
protected static NFAState getOnlyUnseen(NFAState state, Grammar g) {
int n = g.getNumberOfAltsForDecisionNFA(state);
for (int i = 1; i < n + 1; i++) {
NFAState curState = g.getNFAStateForAltOfDecision(state, i);
if (!seen.containsKey(curState)) {
seen.put(curState, 1);
return curState;
}
}
return getRandomState(state, g);
}
/**
* an experimental method to generate random phrases for a given
* grammar given a start rule. Return a list of token types.
*/
protected static void randomPhrase(Grammar g, List<Integer> tokenTypes, String startRule) {
NFAState state = g.getRuleStartState(startRule);
NFAState stopState = g.getRuleStopState(startRule);
Stack<NFAState> ruleInvocationStack = new Stack<NFAState>();
while (true) {
if (state == stopState && ruleInvocationStack.isEmpty()) {
break;
}
if (state.getNumberOfTransitions() == 0) {
if (debug) System.out.println("dangling state: " + state);
return;
}
// end of rule node
if (state.isAcceptState()) {
NFAState invokingState = ruleInvocationStack.pop();
if (debug) System.out.println("pop invoking state " + invokingState);
//System.out.println("leave " + state.enclosingRule.name);
RuleClosureTransition invokingTransition =
(RuleClosureTransition) invokingState.transition[0];
// move to node after state that invoked this rule
state = invokingTransition.followState;
continue;
}
if (state.getNumberOfTransitions() == 1) {
// no branching, just take this path
Transition t0 = state.transition[0];
if (t0 instanceof RuleClosureTransition) {
ruleInvocationStack.push(state);
if (debug) System.out.println("push state " + state);
//System.out.println("call " + ((RuleClosureTransition) t0).rule.name);
//System.out.println("stack depth=" + ruleInvocationStack.size());
} else if (t0.label.isSet() || t0.label.isAtom()) {
tokenTypes.add(getTokenType(t0.label));
}
state = (NFAState) t0.target;
continue;
}
int decisionNumber = state.getDecisionNumber();
if (decisionNumber == 0) {
System.out.println("weird: no decision number but a choice node");
continue;
}
NFAState altStartState = getSkewedDiscountedOptional(state, g);
System.out.println(altStartState.getNumberOfTransitions() + " " + altStartState.decisionStateType);
Transition t = altStartState.transition[0];
state = (NFAState) t.target;
}
}
protected static Integer getTokenType(Label label) {
if (label.isSet()) {
// pick random element of set
IntervalSet typeSet = (IntervalSet) label.getSet();
int randomIndex = random.nextInt(typeSet.size());
return typeSet.get(randomIndex);
} else {
return Utils.integer(label.getAtom());
}
//System.out.println(t0.label.toString(g));
}
/**
* Used to generate random strings
*/
public static void main(String[] args) {
if (args.length < 2) {
System.err.println("usage: java org.antlr.tool.RandomPhrase grammarfile startrule");
return;
}
String grammarFileName = args[0];
String startRule = args[1];
long seed = System.currentTimeMillis(); // use random seed unless spec.
if (args.length == 3) {
String seedStr = args[2];
seed = Long.parseLong(seedStr);
}
try {
random = new Random(seed);
CompositeGrammar composite = new CompositeGrammar();
Tool tool = new Tool();
Grammar parser = new Grammar(tool, grammarFileName, composite);
composite.setDelegationRoot(parser);
FileReader fr = new FileReader(grammarFileName);
BufferedReader br = new BufferedReader(fr);
parser.parseAndBuildAST(br);
br.close();
parser.composite.assignTokenTypes();
parser.composite.defineGrammarSymbols();
parser.composite.createNFAs();
List<? extends Collection<? extends Rule>> leftRecursiveRules = parser.checkAllRulesForLeftRecursion();
if (leftRecursiveRules.size() > 0) {
return;
}
if (parser.getRule(startRule) == null) {
System.out.println("undefined start rule " + startRule);
return;
}
String lexerGrammarText = parser.getLexerGrammar();
Grammar lexer = new Grammar(tool);
lexer.importTokenVocabulary(parser);
lexer.setFileName(grammarFileName);
if (lexerGrammarText != null) {
lexer.setGrammarContent(lexerGrammarText);
} else {
System.err.println("no lexer grammar found in " + grammarFileName);
}
lexer.buildNFA();
leftRecursiveRules = lexer.checkAllRulesForLeftRecursion();
if (leftRecursiveRules.size() > 0) {
return;
}
//System.out.println("lexer:\n"+lexer);
List<Integer> tokenTypes = new ArrayList<Integer>(100);
randomPhrase(parser, tokenTypes, startRule);
System.out.println("token types=" + tokenTypes);
for (int i = 0; i < tokenTypes.size(); i++) {
Integer ttypeI = tokenTypes.get(i);
int ttype = ttypeI;
String ttypeDisplayName = parser.getTokenDisplayName(ttype);
if (Character.isUpperCase(ttypeDisplayName.charAt(0))) {
List<Integer> charsInToken = new ArrayList<Integer>(10);
randomPhrase(lexer, charsInToken, ttypeDisplayName);
System.out.print(" ");
for (int j = 0; j < charsInToken.size(); j++) {
Integer cI = charsInToken.get(j);
System.out.print((char) cI.intValue());
}
} else { // it's a literal
String literal =
ttypeDisplayName.substring(1, ttypeDisplayName.length() - 1);
System.out.print(" " + literal);
}
}
System.out.println("\nseed: " + seed);
} catch (Error er) {
System.err.println("Error walking " + grammarFileName + " rule " + startRule + " seed " + seed);
er.printStackTrace(System.err);
} catch (Exception e) {
System.err.println("Exception walking " + grammarFileName + " rule " + startRule + " seed " + seed);
e.printStackTrace(System.err);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment