Skip to content

Instantly share code, notes, and snippets.

@ryan-beckett
Created February 10, 2012 11:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryan-beckett/1788927 to your computer and use it in GitHub Desktop.
Save ryan-beckett/1788927 to your computer and use it in GitHub Desktop.
A denomstration of how to strip block-style and inline comments using a finite-state machine. Handles edge cases correctly. You can change the behavior of the class by overriding CommentStripper.doAction(int index).
import java.util.*;
public class CommentStripper {
private StaticTransitionTable transitions;
private StringBuilder sb;
private int index;
private int state;
private final char EOF = '@';
public CommentStripper(String src) {
sb = new StringBuilder(src);
transitions = new StaticTransitionTable(7, new char[]{'/', '*', '\n'});
createTransitions();
}
private void createTransitions() {
newTransition(0, 1, '/');
newTransition(1, 3, '/');
newTransition(1, 4, '*');
newTransition(3, 2, '\n');
newTransition(4, 5, '*');
newTransition(5, 6, '/');
}
private boolean newTransition(int state1, int state2, char symbol) {
return transitions.newTransition(state1, state2, symbol);
}
public String strip() {
runFSM();
return sb.toString();
}
private void runFSM() {
this.state = 0;
char symbol = nextChar();
do {
int next = nextState(state, symbol);
transition(next);
process(this.state);
symbol = nextChar();
}while(symbol != EOF);
}
private void transition(int next) {
if(next > -1) {
this.state = next;
}
}
private void process(int state) {
//not in a comment
if(state == 0 || state == 2) {
;
}else {
//in a comment, do action to the
//character we just examined
doAction(this.index-1);
}
//final state
//comment replaced
//start FSM at initial state
if(state == 2 || state == 6)
this.state = 0;
}
//Override this to do whatever
//you want with the comment character
//I just remove it and reset the current character index.
private void doAction(int index) {
sb.replace(index, index+1, "");
this.index--;
}
private int nextState(int state, char symbol) {
return transitions.next(state, symbol);
}
private char nextChar() {
if(this.index >= sb.length())
return EOF;
return sb.charAt(this.index++);
}
}
/*
* FSM for comment stripper. (state) denotes a final state.
* The top row is the transition symbols.
* The first column is all the states.
* The remaining columns (minus the top row) are the transitions.
* The remaining columns are what the actual transition
* table consists of.
*
* s | / | * | \n
* ---------------
* 0 | 1 | |
* 1 | 3 | 4 |
*(2)| | |
* 3 | | | 2
* 4 | | 5 |
* 5 | 6 | |
*(6)| | |
*
*/
public class StaticTransitionTable {
private int[][] transitions;
private char[] symbols;
public StaticTransitionTable(int numStates, char[] symbols){
transitions = new int[numStates][symbols.length];
fillTransitions();
this.symbols = symbols;
}
private void fillTransitions() {
for(int[] row: transitions)
Arrays.fill(row, -1);
}
public boolean newTransition(int state1, int state2, char symbol) {
if(!validateState(state1) || !validateState(state2))return false;
int symbolCol = getSymbolColumn(symbol);
if(symbolCol < 0)return false;
transitions[state1][symbolCol] = state2;
return true;
}
private boolean validateState(int state) {
return state > -1 && state < transitions.length;
}
private int getSymbolColumn(char symbol) {
for(int i = 0; i < symbols.length; i++)
if(symbols[i] == symbol)
return i;
return -1;
}
public int next(int state, char symbol) {
if(!validateState(state))return -1;
int symbolCol = getSymbolColumn(symbol);
if(symbolCol < 0)return -1;
return transitions[state][symbolCol];
}
public String toString() {
StringBuilder sb = new StringBuilder();
for(int[] row: transitions)
sb.append(Arrays.toString(row)+"\n");
return sb.toString();
}
}
public class TestCommentStripper {
public static void main(String[] args) {
String src = "/*"
+"\n* This class strips all single-line and block-style comments."
+"\n* A finite-state machine is used match comments."
+"\n* Of course, you could just use the regex engine, "
+"\n* but that wouldn't be any fun!"
+"\n*/"
+"\npublic class CommentStripper {"
+"\n\tprivate StaticTransitionTable tbl; //FSM implementation"
+"\n\tprivate StringBuilder sb; //source code buffer"
+"\n}";
doStrip(src);
src = "public cl/*abc*/ass/*def*/ CommentStripper { String s = \"some/* abcdefg */ string\";} ////";
doStrip(src);
}
private static void doStrip(String src) {
System.out.println("-------------------------NEW TEST------------------------\n");
System.out.println(src);
CommentStripper cs = new CommentStripper(src);
String strippedSrc = cs.strip();
System.out.println("\n-----------------------STRIPPED------------------------\n");
System.out.println(strippedSrc);
System.out.println("\n-----------------------END TEST------------------------\n\n\n");
}
}
@krstn420
Copy link

This is simply wrong:

String s = "some/* abcdefg */ string";

/* abcdefg */ is not a comment in this case.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment