Skip to content

Instantly share code, notes, and snippets.

@Adzz
Last active November 4, 2022 13:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Adzz/e9ee6764aaa34b813b5238d938b02036 to your computer and use it in GitHub Desktop.
Save Adzz/e9ee6764aaa34b813b5238d938b02036 to your computer and use it in GitHub Desktop.
A lisp lexer / parser for a pairing exercise, written in Elixir. I chose to make this event based for some reason.
# A simple Lisp parser. You could copy paste this into iex (the elixir repl)
# or use a livebook, and you can run the parser like this:
#
# Elexer.parse("(+ 1 -2)")
# A more fully featured prject can be found here: https://github.com/Adzz/elexer
defmodule Elexer do
@moduledoc """
A simple lisp parser.
"""
defmodule SytanxError do
defexception [:message]
end
def parse(source_code, handler \\ Elexer.StackHandler) do
case Elexer.Emitter.read_source_code(source_code, handler, []) do
{:syntax_error, message} -> raise Elexer.SytanxError, message
{:syntax_error, message, arg} -> raise Elexer.SytanxError, message <> inspect(arg)
{:ok, ast} -> ast
{:halt, state} -> {:halt, state}
{:error, state} -> {:error, state}
end
end
end
defmodule Elexer.StackHandler do
@moduledoc """
Creates a stack of elements as the source code is parsed, returning a tree of sorts; a
recursive tuple data structure.
"""
def handle_event(:open_s_expression, fn_name, state) do
{:ok, [{fn_name, []} | state]}
end
def handle_event(:string_arg, string, state) do
[{fn_name, args} | rest_of_state] = state
{:ok, [{fn_name, [string | args]} | rest_of_state]}
end
def handle_event(:number_arg, number, state) do
[{fn_name, args} | rest_of_state] = state
{:ok, [{fn_name, [number | args]} | rest_of_state]}
end
def handle_event(:close_s_expression, [{fn_name, args}]) do
{:ok, {fn_name, Enum.reverse(args)}}
end
def handle_event(
:close_s_expression,
[{fn_name, args}, {parent_fn_name, parent_args} | rest_of_state]
) do
{:ok, [{parent_fn_name, [{fn_name, Enum.reverse(args)} | parent_args]} | rest_of_state]}
end
def handle_event(:close_s_expression, _state) do
raise Elexer.SytanxError, "Could not parse argument, missing closing bracket."
end
# This means we never reached the last paren
def handle_event(:end_file, [{_fn_name, _args}] = _s) do
raise Elexer.SytanxError, "Could not parse argument, missing closing bracket."
end
def handle_event(:end_file, {_fn_name, _args} = ast) do
{:ok, ast}
end
end
defmodule Elexer.Emitter do
@space " "
@speech_mark "\""
@negative_symbol "-"
@open_paren "("
@close_paren ")"
@open_fn_event :open_s_expression
@close_fn_event :close_s_expression
@string_arg_event :string_arg
@number_arg_event :number_arg
@end_file_event :end_file
def read_source_code(<<@open_paren, function_body::binary>>, handler, state) do
# For now we say you always expect there to be a function name.
case function_body |> parse_until(@space, "") do
:terminal_char_never_reached ->
{:syntax_error,
"Could not parse function name. Fn should be separated from args by a space"}
{@space, fn_name, rest} ->
case handler.handle_event(@open_fn_event, fn_name, state) do
{:ok, updated_state} ->
parse_args(rest, handler, updated_state)
end
end
end
def read_source_code(_, _handler, _state) do
{:syntax_error, "Program should start with a comment or an S - expression"}
end
defp parse_args(<<@close_paren, rest::binary>>, handler, state) do
{:ok, new_state} = handler.handle_event(@close_fn_event, state)
parse_args(rest, handler, new_state)
end
defp parse_args(<<>>, handler, state) do
handler.handle_event(@end_file_event, state)
end
# PARSE STRING
# Currently we don't support string escaping.
defp parse_args(<<@speech_mark, function_body::binary>>, handler, state) do
case function_body |> parse_until(@speech_mark, "") do
:terminal_char_never_reached ->
{:syntax_error, "Binary was missing closing \""}
{@speech_mark, string, rest_of_source_code} ->
{:ok, new_state} = handler.handle_event(@string_arg_event, string, state)
parse_args(String.trim(rest_of_source_code), handler, new_state)
end
end
# Parse S expression arg
defp parse_args(<<@open_paren, _function_body::binary>> = nested_expression, handler, state) do
read_source_code(nested_expression, handler, state)
end
# IS NEGATIVE NUMBER
defp parse_args(<<@negative_symbol, value::binary>>, handler, state) do
case value |> parse_until([@space, @close_paren], "") do
:terminal_char_never_reached ->
{:syntax_error, "Binary was missing closing", value}
{@space, number_string, rest_of_source_code} ->
# if it's a space then we found an arg so do our thang
case parse_absolute_number(number_string) do
:error ->
{:syntax_error, "Could not cast value to number: ", @negative_symbol <> number_string}
number ->
{:ok, new_state} = handler.handle_event(@number_arg_event, -number, state)
parse_args(String.trim(rest_of_source_code), handler, new_state)
end
{@close_paren, number_string, rest_of_source_code} ->
case parse_absolute_number(number_string) do
:error ->
{:syntax_error, "Could not cast value to number: ", @negative_symbol <> number_string}
number ->
{:ok, new_state} = handler.handle_event(@number_arg_event, -number, state)
{:ok, new_state} = handler.handle_event(@close_fn_event, new_state)
parse_args(String.trim(rest_of_source_code), handler, new_state)
end
end
end
# IS NUMBER
defp parse_args(value, handler, state) do
case value |> parse_until([@space, @close_paren], "") do
:terminal_char_never_reached ->
{:syntax_error, "Could not parse argument, missing closing bracket."}
{@space, number_string, rest_of_source_code} ->
# if it's a space then we found an arg so do our thang
case parse_absolute_number(number_string) do
:error ->
{:syntax_error, "Could not cast value to number: ", number_string}
number ->
{:ok, new_state} = handler.handle_event(@number_arg_event, number, state)
parse_args(String.trim(rest_of_source_code), handler, new_state)
end
{@close_paren, number_string, rest_of_source_code} ->
case parse_absolute_number(number_string) do
:error ->
{:syntax_error, "Could not cast value to number: ", number_string}
number ->
{:ok, new_state} = handler.handle_event(@number_arg_event, number, state)
{:ok, new_state} = handler.handle_event(@close_fn_event, new_state)
parse_args(String.trim(rest_of_source_code), handler, new_state)
end
end
end
defp parse_absolute_number("") do
# This is "there is a bug in the parser" and not "user typed the wrong source code".
raise "Parser Error!"
end
defp parse_absolute_number(value) do
case parse_integer(value, 0) do
:number_parse_error -> :error
:float -> parse_float(value)
int -> int
end
end
defp parse_integer("", acc), do: acc
# Kind of inefficient to throw away acc but until we know how float parsing works we have to
defp parse_integer(<<".", _rest::binary>>, _acc), do: :float
defp parse_integer(<<?0, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 0)
defp parse_integer(<<?1, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 1)
defp parse_integer(<<?2, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 2)
defp parse_integer(<<?3, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 3)
defp parse_integer(<<?4, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 4)
defp parse_integer(<<?5, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 5)
defp parse_integer(<<?6, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 6)
defp parse_integer(<<?7, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 7)
defp parse_integer(<<?8, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 8)
defp parse_integer(<<?9, rest::binary>>, acc), do: parse_integer(rest, acc * 10 + 9)
defp parse_integer(_binary, _acc), do: :number_parse_error
# We should parse this manually for fun.
defp parse_float(value) do
case Float.parse(value) do
{float, ""} -> float
{_float, _rest} -> :error
:error -> :error
end
end
# could we inline this? Would that be better?
@doc """
Splits a binary into everything up to the a terminating character, the terminating
character and everything after that.
This iterates through the binary one byte at a time which means the terminating char should
be one byte. If multiple terminating chars are provided we stop as soon as we see any one
of them.
It's faster to keep this in this module as the match context gets re-used if we do.
you can see the warnings if you play around with this:
ERL_COMPILER_OPTIONS=bin_opt_info mix compile --force
""" && false
def parse_until(<<>>, _terminal_char, _acc), do: :terminal_char_never_reached
def parse_until(<<head::binary-size(1), rest::binary>>, [_ | _] = terminal_chars, acc) do
if head in terminal_chars do
{head, acc, rest}
else
parse_until(rest, terminal_chars, acc <> head)
end
end
def parse_until(<<head::binary-size(1), rest::binary>>, terminal_char, acc) do
case head do
^terminal_char -> {head, acc, rest}
char -> parse_until(rest, terminal_char, acc <> char)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment