Last active
November 13, 2019 00:10
-
-
Save alekfrohlich/5920b881e833e4bbf4786a26afda6f78 to your computer and use it in GitHub Desktop.
Simpe scheme-like lexer.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.List; | |
import java.util.ArrayList; | |
import java.math.BigDecimal; | |
import static java.lang.Character.*; | |
// non-reentrant lexer | |
public class Lexer { | |
public static enum TokenType { | |
BEGIN, | |
DEFINE, | |
IF, | |
LAMBDA, | |
PLUS, | |
MINUS, | |
TIMES, | |
DIV, | |
AND, | |
OR, | |
NOT, | |
LPAREN, | |
RPAREN, | |
NUMBER, | |
BOOLEAN, | |
VARIABLE, | |
} | |
public static class Token { | |
public TokenType type; | |
public Object value; | |
public Token(TokenType type, Object value) { | |
this.type = type; | |
this.value = value; | |
} | |
@Override | |
public String toString() { | |
switch(type) { | |
case NUMBER: return new String(((BigDecimal) value).toString()); | |
case VARIABLE: return (String) value; | |
case BOOLEAN: return new String(((Boolean) value).toString()); | |
default: return type.name(); | |
} | |
} | |
} | |
public static List<Token> lex(String input) { | |
List<Token> tokens = new ArrayList<>(); | |
for(int i = 0, j = 0; i < input.length(); ) { | |
char c = input.charAt(i); | |
switch(c) { | |
case '+': | |
tokens.add(new Token(TokenType.PLUS, null)); | |
i++; | |
break; | |
case '-': | |
tokens.add(new Token(TokenType.MINUS, null)); | |
i++; | |
break; | |
case '*': | |
tokens.add(new Token(TokenType.TIMES, null)); | |
i++; | |
break; | |
case '/': | |
tokens.add(new Token(TokenType.DIV, null)); | |
i++; | |
break; | |
case '(': | |
tokens.add(new Token(TokenType.LPAREN, null)); | |
i++; | |
break; | |
case ')': | |
tokens.add(new Token(TokenType.RPAREN, null)); | |
i++; | |
break; | |
default: | |
if (isDigit(c)) { | |
for (j = i; j < input.length() && isDigit(input.charAt(j)); j++); | |
tokens.add(new Token(TokenType.NUMBER, new BigDecimal(input.substring(i,j)))); | |
i = j; | |
break; | |
} | |
if (isLetter(c)) { | |
for (j = i; j < input.length() && isLetterOrDigit(input.charAt(j)); j++); | |
String match = input.substring(i,j); | |
switch (match) { | |
// keyword | |
case "begin": | |
tokens.add(new Token(TokenType.BEGIN, null)); | |
break; | |
case "define": | |
tokens.add(new Token(TokenType.DEFINE, null)); | |
break; | |
case "if": | |
tokens.add(new Token(TokenType.IF, null)); | |
break; | |
case "lambda": | |
tokens.add(new Token(TokenType.LAMBDA, null)); | |
break; | |
// boolean | |
case "true": | |
tokens.add(new Token(TokenType.BOOLEAN, new Boolean(true))); | |
break; | |
case "false": | |
tokens.add(new Token(TokenType.BOOLEAN, new Boolean(false))); | |
break; | |
// operators | |
case "and": | |
tokens.add(new Token(TokenType.AND, null)); | |
break; | |
case "or": | |
tokens.add(new Token(TokenType.OR, null)); | |
break; | |
case "not": | |
tokens.add(new Token(TokenType.NOT, null)); | |
break; | |
// variable | |
default: | |
tokens.add(new Token(TokenType.VARIABLE, match)); | |
} | |
i = j; | |
break; | |
} | |
i++; // ignore the rest | |
} | |
} | |
return tokens; | |
} | |
public static void main(String[] args) { | |
String input = | |
"(define (sum a b) (+ a b))" | |
+ "(sum 1 2)" | |
+ "(define sum2 (lambda (a b) (+ a b)" | |
+ "(define TRUE (and true false))"; | |
// create tokens and print them | |
List<Token> tokens = lex(input); | |
for (Token token : tokens) | |
System.out.println(token); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment