Skip to content

Instantly share code, notes, and snippets.

@alekfrohlich
Last active November 13, 2019 00:10
Show Gist options
  • Save alekfrohlich/5920b881e833e4bbf4786a26afda6f78 to your computer and use it in GitHub Desktop.
Save alekfrohlich/5920b881e833e4bbf4786a26afda6f78 to your computer and use it in GitHub Desktop.
Simpe scheme-like lexer.
import java.util.List;
import java.util.ArrayList;
import java.math.BigDecimal;
import static java.lang.Character.*;
// non-reentrant lexer
public class Lexer {
public static enum TokenType {
BEGIN,
DEFINE,
IF,
LAMBDA,
PLUS,
MINUS,
TIMES,
DIV,
AND,
OR,
NOT,
LPAREN,
RPAREN,
NUMBER,
BOOLEAN,
VARIABLE,
}
public static class Token {
public TokenType type;
public Object value;
public Token(TokenType type, Object value) {
this.type = type;
this.value = value;
}
@Override
public String toString() {
switch(type) {
case NUMBER: return new String(((BigDecimal) value).toString());
case VARIABLE: return (String) value;
case BOOLEAN: return new String(((Boolean) value).toString());
default: return type.name();
}
}
}
public static List<Token> lex(String input) {
List<Token> tokens = new ArrayList<>();
for(int i = 0, j = 0; i < input.length(); ) {
char c = input.charAt(i);
switch(c) {
case '+':
tokens.add(new Token(TokenType.PLUS, null));
i++;
break;
case '-':
tokens.add(new Token(TokenType.MINUS, null));
i++;
break;
case '*':
tokens.add(new Token(TokenType.TIMES, null));
i++;
break;
case '/':
tokens.add(new Token(TokenType.DIV, null));
i++;
break;
case '(':
tokens.add(new Token(TokenType.LPAREN, null));
i++;
break;
case ')':
tokens.add(new Token(TokenType.RPAREN, null));
i++;
break;
default:
if (isDigit(c)) {
for (j = i; j < input.length() && isDigit(input.charAt(j)); j++);
tokens.add(new Token(TokenType.NUMBER, new BigDecimal(input.substring(i,j))));
i = j;
break;
}
if (isLetter(c)) {
for (j = i; j < input.length() && isLetterOrDigit(input.charAt(j)); j++);
String match = input.substring(i,j);
switch (match) {
// keyword
case "begin":
tokens.add(new Token(TokenType.BEGIN, null));
break;
case "define":
tokens.add(new Token(TokenType.DEFINE, null));
break;
case "if":
tokens.add(new Token(TokenType.IF, null));
break;
case "lambda":
tokens.add(new Token(TokenType.LAMBDA, null));
break;
// boolean
case "true":
tokens.add(new Token(TokenType.BOOLEAN, new Boolean(true)));
break;
case "false":
tokens.add(new Token(TokenType.BOOLEAN, new Boolean(false)));
break;
// operators
case "and":
tokens.add(new Token(TokenType.AND, null));
break;
case "or":
tokens.add(new Token(TokenType.OR, null));
break;
case "not":
tokens.add(new Token(TokenType.NOT, null));
break;
// variable
default:
tokens.add(new Token(TokenType.VARIABLE, match));
}
i = j;
break;
}
i++; // ignore the rest
}
}
return tokens;
}
public static void main(String[] args) {
String input =
"(define (sum a b) (+ a b))"
+ "(sum 1 2)"
+ "(define sum2 (lambda (a b) (+ a b)"
+ "(define TRUE (and true false))";
// create tokens and print them
List<Token> tokens = lex(input);
for (Token token : tokens)
System.out.println(token);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment