Skip to content

Instantly share code, notes, and snippets.

@yangyuqian
Last active September 21, 2017 08:17
Show Gist options
  • Save yangyuqian/d3c1c74e5773fe1844147d073367a12b to your computer and use it in GitHub Desktop.
Save yangyuqian/d3c1c74e5773fe1844147d073367a12b to your computer and use it in GitHub Desktop.
lexical parser example to parse and verify a expression `<int64> + <int64>`
/*
A simple lexical parser for mathematical expression,
i.e.
1 + 2 => {1, 2, "+"}
1+2 => {1, 2, "+"}
1 => error
1 + => error
1 + A => error
a A C => error
To make it simple only "+" is supported as a operator,
only integer is supported.
*/
package main
import (
"fmt"
"log"
"os"
"reflect"
"strconv"
"strings"
"sync"
"unicode/utf8"
)
type (
tokenType int
Pos int
)
const (
ILLEGAL tokenType = iota
END
NUMBER
OPERATOR
)
type token struct {
typ tokenType
pos Pos
text string
}
const (
EOF rune = -1
_exprRunes = "0123456789+"
_numberRunes = "0123456789"
_opRunes = "+"
_ignoredRunes = " "
_endRune = ';'
)
type (
lexFn func(*lexer) lexFn
Operator string
Number int64
)
type lexer struct {
start Pos
pos Pos
lastPos Pos
width Pos
input string
state lexFn
tokens chan token
}
func (l *lexer) peek() (r rune) {
r = l.next()
l.backup()
return
}
func (l *lexer) backup() {
l.pos -= l.width
}
func (l *lexer) next() (r rune) {
if int(l.pos) >= len(l.input) {
return EOF
}
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.width = Pos(w)
l.pos += l.width
return
}
func (l *lexer) ignore() {
l.start = l.pos
}
func (l *lexer) forget() {
l.pos = l.start
}
func (l *lexer) emit(t tokenType) {
l.tokens <- token{t, l.start, l.input[l.start:l.pos]}
l.start = l.pos
}
func (l *lexer) accept(valid string) (v bool) {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
func (l *lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *lexer) errorf(format string, args ...interface{}) lexFn {
l.tokens <- token{ILLEGAL, l.start, fmt.Sprintf(format, args...)}
return nil
}
func (l *lexer) shutdown() {
close(l.tokens)
}
func (l *lexer) tokenChan() <-chan token {
return l.tokens
}
func (l *lexer) run() {
for l.state = lexText; l.state != nil; {
l.state = l.state(l)
}
l.shutdown()
}
func newLexer(exp string) (l *lexer) {
if !strings.HasSuffix(exp, ";") {
exp = exp + ";"
}
return &lexer{
start: Pos(0),
pos: Pos(0),
input: exp,
tokens: make(chan token, 3),
}
}
func lexText(l *lexer) (fn lexFn) {
// ignore meaningless runes, such as whitespaces
l.acceptRun(_ignoredRunes)
l.ignore()
l.acceptRun(_numberRunes)
if isNumber(l.input[l.start:l.pos]) {
l.forget()
return lexNumber
}
l.acceptRun(_opRunes)
if isOp(l.input[l.start:l.pos]) {
l.forget()
return lexOp
}
if l.next() == _endRune {
return nil
} else {
l.forget()
}
return l.errorf("Illegal expression `%s`, start:pos => %d:%d", l.input[l.start:], l.start, l.pos)
}
func lexNumber(l *lexer) (fn lexFn) {
l.acceptRun(_numberRunes)
word := l.input[l.start:l.pos]
if isNumber(word) {
l.emit(NUMBER)
} else {
// do nothing if word is not a valid number
l.forget()
}
return lexText
}
func lexOp(l *lexer) (fn lexFn) {
l.acceptRun(_opRunes)
word := l.input[l.start:l.pos]
if isOp(word) {
l.emit(OPERATOR)
} else {
// do nothing if not a valid operator
l.forget()
}
return lexText
}
func isNumber(text string) bool {
_, err := strconv.ParseInt(text, 10, 64)
if err != nil {
return false
}
return true
}
func isOp(text string) bool {
switch text {
case "+":
return true
}
return false
}
type expr struct {
left Number
right Number
op Operator
}
func (e *expr) String() string {
return fmt.Sprintf("Left(%d) %s Right(%d)", e.left, e.op, e.right)
}
type parseFn func(*parser) parseFn
type parser struct {
*lexer
state parseFn
buf []token
bufSize int
expr *expr
}
func (p *parser) run() {
for p.state = parseExpr; p.state != nil; {
p.state = p.state(p)
}
}
func (p *parser) fill() {
n := p.bufSize - len(p.buf)
for i := 0; i < n; i++ {
p.buf = append(p.buf, <-p.tokenChan())
}
}
func (p *parser) peek() (t token) {
p.fill()
if len(p.buf) > 0 {
t = p.buf[0]
}
return
}
// buf is an array acts as a FIFO queue
func (p *parser) next() (t token) {
p.fill()
if len(p.buf) > 0 {
t = p.buf[0]
p.buf = p.buf[1:]
}
p.fill()
return
}
func newParser(l *lexer) *parser {
return &parser{
lexer: l,
bufSize: 3,
}
}
func parseExpr(p *parser) parseFn {
t := p.peek()
if emptyToken(t) {
return nil
}
switch t.typ {
case NUMBER:
return parseNumber
case OPERATOR:
return parseOp
}
log.Fatalf("wrong format: %s", p.lexer.input)
return nil
}
func parseNumber(p *parser) parseFn {
t := p.next()
if emptyToken(t) {
log.Fatalf("bad format: %+v", t)
return nil
}
n, err := strconv.ParseInt(t.text, 10, 64)
if err != nil {
panic(err)
}
nextToken := p.peek()
if emptyToken(nextToken) {
if p.expr == nil {
log.Fatalf("right number must be followed by EOF")
}
p.expr.right = Number(n)
return nil
return nil
}
switch nextToken.typ {
case OPERATOR:
if p.expr != nil {
panic("wrong format")
}
p.expr = &expr{left: Number(n)}
return parseOp
default:
log.Fatalf("number must be followed by operator or END: %+v", nextToken)
}
return parseExpr
}
func parseOp(p *parser) parseFn {
t := p.next()
if emptyToken(t) {
return nil
}
if t.typ != OPERATOR {
log.Fatalf("wrong operator token")
}
if p.expr == nil {
log.Fatalf("wrong format")
}
if p.expr.op != "" {
log.Fatalf("operator exist, wrong format")
}
p.expr.op = Operator(t.text)
nextToken := p.peek()
if emptyToken(nextToken) {
return nil
}
// operator must be followed by a number
if nextToken.typ == NUMBER {
if p.expr == nil {
log.Fatalf("wrong format")
}
return parseNumber
} else {
log.Fatalf("an operator must be followed by a number: %+v", nextToken)
}
return parseExpr
}
func main() {
if len(os.Args) < 2 {
log.Fatalf("input a A + B to see the parsed result")
}
wg := sync.WaitGroup{}
l := newLexer(os.Args[1])
p := newParser(l)
wg.Add(1)
go func() {
p.run()
wg.Done()
}()
go l.run()
wg.Wait()
log.Printf("expression: %+v", p.expr)
}
func emptyToken(t token) bool {
return reflect.DeepEqual(reflect.ValueOf(t).Interface(), reflect.Zero(reflect.TypeOf(t)).Interface())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment