Skip to content

Instantly share code, notes, and snippets.

@aparajita31pandey
Last active March 4, 2024 16:52
Show Gist options
  • Save aparajita31pandey/83c0b352559d4682bcd98142633b85eb to your computer and use it in GitHub Desktop.
Save aparajita31pandey/83c0b352559d4682bcd98142633b85eb to your computer and use it in GitHub Desktop.
This implements REPL that tokenizes the given source code and print token.
package main
import (
"bufio"
"fmt"
"os"
"os/user"
)
type Token struct {
Type TokenType
Literal string
}
type Lexer struct {
input string
position int // current position in input (points to current char)
nextPosition int // next position in input (after current char)
ch byte // current char under examination
}
func NewLexer(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}
const PROMPT = ">> "
func main() {
user, err := user.Current()
if err != nil {
panic(err)
}
fmt.Printf("Hello %s! This is the yourl programming language!\n",
user.Username)
fmt.Printf("Feel free to type in commands\n")
scanner := bufio.NewScanner(os.Stdin)
for {
fmt.Printf(PROMPT)
scanned := scanner.Scan()
if !scanned {
return
}
line := scanner.Text()
l := NewLexer(line)
for tok := l.NextToken(); tok.Type != EOF; tok = l.NextToken() {
fmt.Printf("%+v\n", tok)
}
}
}
func (l *Lexer) NextToken() Token {
var tok Token
l.skipWhitespace()
switch l.ch {
case '=':
if l.peekChar() == '=' {
ch := l.ch
l.readChar()
literal := string(ch) + string(l.ch)
tok = Token{Type: EQ, Literal: literal}
} else {
tok = newToken(ASSIGN, l.ch)
}
case '+':
tok = newToken(PLUS, l.ch)
case '-':
tok = newToken(MINUS, l.ch)
case '!':
if l.peekChar() == '=' {
ch := l.ch
l.readChar()
literal := string(ch) + string(l.ch)
tok = Token{Type: NOT_EQ, Literal: literal}
} else {
tok = newToken(BANG, l.ch)
}
case '/':
tok = newToken(SLASH, l.ch)
case '*':
tok = newToken(ASTERISK, l.ch)
case '<':
tok = newToken(LT, l.ch)
case '>':
tok = newToken(GT, l.ch)
case ';':
tok = newToken(SEMICOLON, l.ch)
case ',':
tok = newToken(COMMA, l.ch)
case '{':
tok = newToken(LBRACE, l.ch)
case '}':
tok = newToken(RBRACE, l.ch)
case '(':
tok = newToken(LPAREN, l.ch)
case ')':
tok = newToken(RPAREN, l.ch)
case 0:
tok.Literal = ""
tok.Type = EOF
default:
if isLetter(l.ch) {
tok.Literal = l.readIdentifier()
tok.Type = LookupIdent(tok.Literal)
return tok
} else if isDigit(l.ch) {
tok.Type = INT
tok.Literal = l.readNumber()
return tok
} else {
tok = newToken(ILLEGAL, l.ch)
}
}
l.readChar()
return tok
}
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
func (l *Lexer) readChar() {
if l.nextPosition >= len(l.input) {
l.ch = 0
} else {
l.ch = l.input[l.nextPosition]
}
l.position = l.nextPosition
l.nextPosition += 1
}
func (l *Lexer) peekChar() byte {
if l.nextPosition >= len(l.input) {
return 0
} else {
return l.input[l.nextPosition]
}
}
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
func (l *Lexer) readNumber() string {
position := l.position
for isDigit(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
func newToken(Type TokenType, ch byte) Token {
return Token{Type: Type, Literal: string(ch)}
}
type TokenType string
const (
ILLEGAL = "ILLEGAL"
EOF = "EOF"
// Identifiers + literals
IDENT = "IDENT" // add, foobar, x, y, ...
INT = "INT" // 1343456
// Operators
ASSIGN = "="
PLUS = "+"
MINUS = "-"
BANG = "!"
ASTERISK = "*"
SLASH = "/"
LT = "<"
GT = ">"
EQ = "=="
NOT_EQ = "!="
// Delimiters
COMMA = ","
SEMICOLON = ";"
LPAREN = "("
RPAREN = ")"
LBRACE = "{"
RBRACE = "}"
// Keywords
FUNCTION = "FUNCTION"
LET = "LET"
TRUE = "TRUE"
FALSE = "FALSE"
IF = "IF"
ELSE = "ELSE"
RETURN = "RETURN"
)
var keywords = map[string]TokenType{
"fn": FUNCTION,
"let": LET,
"true": TRUE,
"false": FALSE,
"if": IF,
"else": ELSE,
"return": RETURN,
}
func LookupIdent(ident string) TokenType {
if tok, ok := keywords[ident]; ok {
return tok
}
return IDENT
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment