Created
March 8, 2015 20:57
-
-
Save currymj/52f3093e9e6acff1a8ae to your computer and use it in GitHub Desktop.
lisp parser for hacker school interview
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module SEXPParser | |
# patterns for identifiers, and both kinds of numbers | |
#const ident = r"[[:punct:][:alpha:]][[:alnum:]]*" | |
const integer = r"^-*\d+$" | |
const floating = r"^-*\d*\.\d+$" | |
# converts numbers to their proper forms, and could | |
# be extended | |
function converttoken(tok) | |
if ismatch(integer, tok) | |
return int(tok) | |
elseif ismatch(floating, tok) | |
return float(tok) | |
else | |
return tok | |
end | |
end | |
# a simple parser for s-expressions | |
function parseexpr(instring) | |
rem = instring | |
while length(rem) > 0 | |
tok, rem = eattoken(rem) | |
if tok == "(" | |
ll, rem = parselist(rem) | |
return ll | |
else | |
return converttoken(tok) | |
end | |
end | |
end | |
# parses creating a list i.e. {a b c ... } | |
# until a ) is seen. | |
# if a ( is seen, creates another list recursively | |
# returns a tuple of the parsed list and remaining | |
# input | |
function parselist(instring) | |
token, rem = eattoken(instring) | |
ll = {} | |
while token != ")" | |
if token == "(" | |
r, rem = parselist(rem) | |
push!(ll, r) | |
else | |
push!(ll, converttoken(token)) | |
end | |
token, rem = eattoken(rem) | |
end | |
return (ll, rem) | |
end | |
# eats one token from input | |
# returns a tuple of token and remaining input | |
function eattoken(instring) | |
# note that the ordering of subexpressions matters here; | |
# floats have to be checked before ints | |
tokenRE = r"([\(\)]|-*\d*\.\d+|-*\d+|[^\W\d\s]+[\d[:alpha:]]*|[^\w\(\)\s])" | |
# now i have two problems :( | |
m = match(tokenRE, instring) | |
if (m == nothing) | |
error("Error on parsing. Perhaps you forgot a )?") | |
end | |
return (m.match, string(instring[m.offset+length(m.match):end])) | |
end | |
function repl() | |
while true | |
print("> ") | |
# blocks here waiting for input | |
if eof(STDIN) | |
return | |
else | |
# if a non-EOF character is seen, we get here | |
# readline reads all characters (including the one | |
# tested by EOF). | |
r = readline(STDIN) | |
println(parseexpr(chomp(r))) | |
end | |
end | |
end | |
export repl | |
end | |
module Tests | |
using FactCheck | |
using SEXPParser | |
function runtests() | |
facts("tokens eaten correctly") do | |
@fact SEXPParser.eattoken("aaa bbb") => ("aaa", " bbb") | |
@fact SEXPParser.eattoken("i2dent bbb") => ("i2dent", " bbb") | |
end | |
facts("tokens parse correctly") do | |
@fact SEXPParser.parseexpr("i2dent") => "i2dent" | |
@fact SEXPParser.parseexpr("23") => 23 | |
@fact SEXPParser.parseexpr("-23") => -23 | |
@fact SEXPParser.parseexpr("0.234") => 0.234 | |
@fact SEXPParser.parseexpr("-0.234") => -0.234 | |
end | |
facts("lists parse") do | |
@fact SEXPParser.parseexpr("(a b c)") => {"a","b","c"} | |
@fact SEXPParser.parseexpr("(first (list 1 (+ 2 3) 9))") => {"first",{"list",1,{"+",2,3},9}} | |
end | |
return | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment