Last active
March 4, 2024 16:52
-
-
Save aparajita31pandey/83c0b352559d4682bcd98142633b85eb to your computer and use it in GitHub Desktop.
This implements REPL that tokenizes the given source code and print token.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"os" | |
"os/user" | |
) | |
type Token struct { | |
Type TokenType | |
Literal string | |
} | |
type Lexer struct { | |
input string | |
position int // current position in input (points to current char) | |
nextPosition int // next position in input (after current char) | |
ch byte // current char under examination | |
} | |
func NewLexer(input string) *Lexer { | |
l := &Lexer{input: input} | |
l.readChar() | |
return l | |
} | |
const PROMPT = ">> " | |
func main() { | |
user, err := user.Current() | |
if err != nil { | |
panic(err) | |
} | |
fmt.Printf("Hello %s! This is the yourl programming language!\n", | |
user.Username) | |
fmt.Printf("Feel free to type in commands\n") | |
scanner := bufio.NewScanner(os.Stdin) | |
for { | |
fmt.Printf(PROMPT) | |
scanned := scanner.Scan() | |
if !scanned { | |
return | |
} | |
line := scanner.Text() | |
l := NewLexer(line) | |
for tok := l.NextToken(); tok.Type != EOF; tok = l.NextToken() { | |
fmt.Printf("%+v\n", tok) | |
} | |
} | |
} | |
func (l *Lexer) NextToken() Token { | |
var tok Token | |
l.skipWhitespace() | |
switch l.ch { | |
case '=': | |
if l.peekChar() == '=' { | |
ch := l.ch | |
l.readChar() | |
literal := string(ch) + string(l.ch) | |
tok = Token{Type: EQ, Literal: literal} | |
} else { | |
tok = newToken(ASSIGN, l.ch) | |
} | |
case '+': | |
tok = newToken(PLUS, l.ch) | |
case '-': | |
tok = newToken(MINUS, l.ch) | |
case '!': | |
if l.peekChar() == '=' { | |
ch := l.ch | |
l.readChar() | |
literal := string(ch) + string(l.ch) | |
tok = Token{Type: NOT_EQ, Literal: literal} | |
} else { | |
tok = newToken(BANG, l.ch) | |
} | |
case '/': | |
tok = newToken(SLASH, l.ch) | |
case '*': | |
tok = newToken(ASTERISK, l.ch) | |
case '<': | |
tok = newToken(LT, l.ch) | |
case '>': | |
tok = newToken(GT, l.ch) | |
case ';': | |
tok = newToken(SEMICOLON, l.ch) | |
case ',': | |
tok = newToken(COMMA, l.ch) | |
case '{': | |
tok = newToken(LBRACE, l.ch) | |
case '}': | |
tok = newToken(RBRACE, l.ch) | |
case '(': | |
tok = newToken(LPAREN, l.ch) | |
case ')': | |
tok = newToken(RPAREN, l.ch) | |
case 0: | |
tok.Literal = "" | |
tok.Type = EOF | |
default: | |
if isLetter(l.ch) { | |
tok.Literal = l.readIdentifier() | |
tok.Type = LookupIdent(tok.Literal) | |
return tok | |
} else if isDigit(l.ch) { | |
tok.Type = INT | |
tok.Literal = l.readNumber() | |
return tok | |
} else { | |
tok = newToken(ILLEGAL, l.ch) | |
} | |
} | |
l.readChar() | |
return tok | |
} | |
func (l *Lexer) skipWhitespace() { | |
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { | |
l.readChar() | |
} | |
} | |
func (l *Lexer) readChar() { | |
if l.nextPosition >= len(l.input) { | |
l.ch = 0 | |
} else { | |
l.ch = l.input[l.nextPosition] | |
} | |
l.position = l.nextPosition | |
l.nextPosition += 1 | |
} | |
func (l *Lexer) peekChar() byte { | |
if l.nextPosition >= len(l.input) { | |
return 0 | |
} else { | |
return l.input[l.nextPosition] | |
} | |
} | |
func (l *Lexer) readIdentifier() string { | |
position := l.position | |
for isLetter(l.ch) { | |
l.readChar() | |
} | |
return l.input[position:l.position] | |
} | |
func (l *Lexer) readNumber() string { | |
position := l.position | |
for isDigit(l.ch) { | |
l.readChar() | |
} | |
return l.input[position:l.position] | |
} | |
func isLetter(ch byte) bool { | |
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' | |
} | |
func isDigit(ch byte) bool { | |
return '0' <= ch && ch <= '9' | |
} | |
func newToken(Type TokenType, ch byte) Token { | |
return Token{Type: Type, Literal: string(ch)} | |
} | |
type TokenType string | |
const ( | |
ILLEGAL = "ILLEGAL" | |
EOF = "EOF" | |
// Identifiers + literals | |
IDENT = "IDENT" // add, foobar, x, y, ... | |
INT = "INT" // 1343456 | |
// Operators | |
ASSIGN = "=" | |
PLUS = "+" | |
MINUS = "-" | |
BANG = "!" | |
ASTERISK = "*" | |
SLASH = "/" | |
LT = "<" | |
GT = ">" | |
EQ = "==" | |
NOT_EQ = "!=" | |
// Delimiters | |
COMMA = "," | |
SEMICOLON = ";" | |
LPAREN = "(" | |
RPAREN = ")" | |
LBRACE = "{" | |
RBRACE = "}" | |
// Keywords | |
FUNCTION = "FUNCTION" | |
LET = "LET" | |
TRUE = "TRUE" | |
FALSE = "FALSE" | |
IF = "IF" | |
ELSE = "ELSE" | |
RETURN = "RETURN" | |
) | |
var keywords = map[string]TokenType{ | |
"fn": FUNCTION, | |
"let": LET, | |
"true": TRUE, | |
"false": FALSE, | |
"if": IF, | |
"else": ELSE, | |
"return": RETURN, | |
} | |
func LookupIdent(ident string) TokenType { | |
if tok, ok := keywords[ident]; ok { | |
return tok | |
} | |
return IDENT | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment