Created
July 25, 2013 14:44
-
-
Save michiakig/6080396 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.List; | |
import java.util.ArrayList; | |
/* | |
* Lexical analyzer for Scheme-like minilanguage: | |
* (define (foo x) (bar (baz x))) | |
*/ | |
public class Lexer { | |
public static enum Type { | |
// This Scheme-like language has three token types: | |
// open parens, close parens, and an "atom" type | |
LPAREN, RPAREN, ATOM; | |
} | |
public static class Token { | |
public final Type t; | |
public final String c; // contents mainly for atom tokens | |
// could have column and line number fields too, for reporting errors later | |
public Token(Type t, String c) { | |
this.t = t; | |
this.c = c; | |
} | |
public String toString() { | |
if(t == Type.ATOM) { | |
return "ATOM<" + c + ">"; | |
} | |
return t.toString(); | |
} | |
} | |
/* | |
* Given a String, and an index, get the atom starting at that index | |
*/ | |
public static String getAtom(String s, int i) { | |
int j = i; | |
for( ; j < s.length(); ) { | |
if(Character.isLetter(s.charAt(j))) { | |
j++; | |
} else { | |
return s.substring(i, j); | |
} | |
} | |
return s.substring(i, j); | |
} | |
public static List<Token> lex(String input) { | |
List<Token> result = new ArrayList<Token>(); | |
for(int i = 0; i < input.length(); ) { | |
switch(input.charAt(i)) { | |
case '(': | |
result.add(new Token(Type.LPAREN, "(")); | |
i++; | |
break; | |
case ')': | |
result.add(new Token(Type.RPAREN, ")")); | |
i++; | |
break; | |
default: | |
if(Character.isWhitespace(input.charAt(i))) { | |
i++; | |
} else { | |
String atom = getAtom(input, i); | |
i += atom.length(); | |
result.add(new Token(Type.ATOM, atom)); | |
} | |
break; | |
} | |
} | |
return result; | |
} | |
public static void main(String[] args) { | |
if(args.length < 1) { | |
System.out.println("Usage: java Lexer \"((some Scheme) (code to) lex)\"."); | |
return; | |
} | |
List<Token> tokens = lex(args[0]); | |
for(Token t : tokens) { | |
System.out.println(t); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment