currymj/parse.jl

## parse.jl
module SEXPParser

# patterns for identifiers, and both kinds of numbers
#const ident = r"[[:punct:][:alpha:]][[:alnum:]]*"
const integer = r"^-*\d+$"
const floating = r"^-*\d*\.\d+$"


# converts numbers to their proper forms, and could
# be extended
function converttoken(tok)
    if ismatch(integer, tok)
        return int(tok)
    elseif ismatch(floating, tok)
        return float(tok)
    else
        return tok
    end
end

# a simple parser for s-expressions
function parseexpr(instring)
    rem = instring
    while length(rem) > 0
        tok, rem = eattoken(rem)
        if tok == "("
            ll, rem = parselist(rem)
            return ll
        else
            return converttoken(tok)
        end
    end
end


# parses creating a list i.e. {a b c ... }
# until a ) is seen.
# if a ( is seen, creates another list recursively
# returns a tuple of the parsed list and remaining
# input
function parselist(instring)
    token, rem = eattoken(instring)
    ll = {}
    while token != ")"
        if token == "("
            r, rem = parselist(rem)
            push!(ll, r)
        else
            push!(ll, converttoken(token))
        end
        token, rem = eattoken(rem)
    end
    return (ll, rem)
end

# eats one token from input
# returns a tuple of token and remaining input
function eattoken(instring)
    # note that the ordering of subexpressions matters here;
    # floats have to be checked before ints
    tokenRE = r"([\(\)]|-*\d*\.\d+|-*\d+|[^\W\d\s]+[\d[:alpha:]]*|[^\w\(\)\s])"
    # now i have two problems :(
    m = match(tokenRE, instring)
    if (m == nothing)
        error("Error on parsing. Perhaps you forgot a )?")
    end

    return (m.match, string(instring[m.offset+length(m.match):end]))
end

function repl()
    while true
        print("> ")
        # blocks here waiting for input
        if eof(STDIN)
            return
        else
            # if a non-EOF character is seen, we get here
            # readline reads all characters (including the one
            # tested by EOF).
            r = readline(STDIN)
            println(parseexpr(chomp(r)))
        end
    end
end

export repl

end

module Tests
using FactCheck
using SEXPParser

function runtests()
    facts("tokens eaten correctly") do
        @fact SEXPParser.eattoken("aaa bbb") => ("aaa", " bbb")
        @fact SEXPParser.eattoken("i2dent bbb") => ("i2dent", " bbb")
    end

    facts("tokens parse correctly") do
    @fact SEXPParser.parseexpr("i2dent") => "i2dent"
    @fact SEXPParser.parseexpr("23") => 23
    @fact SEXPParser.parseexpr("-23") => -23
    @fact SEXPParser.parseexpr("0.234") => 0.234
    @fact SEXPParser.parseexpr("-0.234") => -0.234
    end

    facts("lists parse") do
        @fact SEXPParser.parseexpr("(a b c)") => {"a","b","c"}
        @fact SEXPParser.parseexpr("(first (list 1 (+ 2 3) 9))") => {"first",{"list",1,{"+",2,3},9}}
    end
    return
end
end
	module SEXPParser

	# patterns for identifiers, and both kinds of numbers
	#const ident = r"[[:punct:][:alpha:]][[:alnum:]]*"
	const integer = r"^-*\d+$"
	const floating = r"^-\d\.\d+$"


	# converts numbers to their proper forms, and could
	# be extended
	function converttoken(tok)
	if ismatch(integer, tok)
	return int(tok)
	elseif ismatch(floating, tok)
	return float(tok)
	else
	return tok
	end
	end

	# a simple parser for s-expressions
	function parseexpr(instring)
	rem = instring
	while length(rem) > 0
	tok, rem = eattoken(rem)
	if tok == "("
	ll, rem = parselist(rem)
	return ll
	else
	return converttoken(tok)
	end
	end
	end


	# parses creating a list i.e. {a b c ... }
	# until a ) is seen.
	# if a ( is seen, creates another list recursively
	# returns a tuple of the parsed list and remaining
	# input
	function parselist(instring)
	token, rem = eattoken(instring)
	ll = {}
	while token != ")"
	if token == "("
	r, rem = parselist(rem)
	push!(ll, r)
	else
	push!(ll, converttoken(token))
	end
	token, rem = eattoken(rem)
	end
	return (ll, rem)
	end

	# eats one token from input
	# returns a tuple of token and remaining input
	function eattoken(instring)
	# note that the ordering of subexpressions matters here;
	# floats have to be checked before ints
	tokenRE = r"([\(\)]\|-\d\.\d+\|-\d+\|[^\W\d\s]+[\d[:alpha:]]\|[^\w\(\)\s])"
	# now i have two problems :(
	m = match(tokenRE, instring)
	if (m == nothing)
	error("Error on parsing. Perhaps you forgot a )?")
	end

	return (m.match, string(instring[m.offset+length(m.match):end]))
	end

	function repl()
	while true
	print("> ")
	# blocks here waiting for input
	if eof(STDIN)
	return
	else
	# if a non-EOF character is seen, we get here
	# readline reads all characters (including the one
	# tested by EOF).
	r = readline(STDIN)
	println(parseexpr(chomp(r)))
	end
	end
	end

	export repl

	end

	module Tests
	using FactCheck
	using SEXPParser

	function runtests()
	facts("tokens eaten correctly") do
	@fact SEXPParser.eattoken("aaa bbb") => ("aaa", " bbb")
	@fact SEXPParser.eattoken("i2dent bbb") => ("i2dent", " bbb")
	end

	facts("tokens parse correctly") do
	@fact SEXPParser.parseexpr("i2dent") => "i2dent"
	@fact SEXPParser.parseexpr("23") => 23
	@fact SEXPParser.parseexpr("-23") => -23
	@fact SEXPParser.parseexpr("0.234") => 0.234
	@fact SEXPParser.parseexpr("-0.234") => -0.234
	end

	facts("lists parse") do
	@fact SEXPParser.parseexpr("(a b c)") => {"a","b","c"}
	@fact SEXPParser.parseexpr("(first (list 1 (+ 2 3) 9))") => {"first",{"list",1,{"+",2,3},9}}
	end
	return
	end
	end