Skip to content

Instantly share code, notes, and snippets.

@VideoCarp
Last active April 11, 2022 00:43
Show Gist options
  • Save VideoCarp/36bca88c6ef61f0a0d993d70ef6df760 to your computer and use it in GitHub Desktop.
Save VideoCarp/36bca88c6ef61f0a0d993d70ef6df760 to your computer and use it in GitHub Desktop.
A tiny lexer in Elixir.
# A small, unoptimized lexer in the Elixir programming language.
# No mutable values are used.
# This lexer was much, much easier for me to write and debug than when I had done it
# using imperative programming concepts such as loops and mutable values.
# This can be optimized by consuming the input string, instead of indexing.
defmodule Lexer do
# char info
def numeric(char) do
char >= "0" and char <= "9"
end
def alphanumeric(char) do
numeric(char) || (char >= "a" && char <= "z") || (char >= "A" && char <= "Z") || char == "_"
end
def arithmetic(char) do
Enum.member?(["+", "-", "*", "/", "%"], char)
end
# Handlers
def handlenum(cursor, temp, input_strr) do
character = String.at(input_strr, cursor)
if numeric(character) do
handlenum(cursor + 1, temp <> character, input_strr)
else
{temp, cursor}
end
end
def handlestring(cursor, temp, input_strr) do
character = String.at(input_strr, cursor)
if character != "\"" do
handlestring(cursor + 1, temp <> character, input_strr)
else
{temp, cursor}
end
end
def handlealpha(cursor, temp, input_strr) do
character = String.at(input_strr, cursor)
if alphanumeric(character) do
handlealpha(cursor + 1, temp <> character, input_strr)
else
{temp, cursor}
end
end
# lex int, list, int, string -> [{string, atom}]
def lex(current \\ 0, tokenstream \\ [], len, input_str) do
char = String.at(input_str, current)
unless current >= len do
cond do
# single-char tokens
char == ")" ->
lex(current + 1, [{char, :cparen} | tokenstream], len, input_str)
char == "(" ->
lex(current + 1, [{char, :oparen} | tokenstream], len, input_str)
char == "{" ->
lex(current + 1, [{char, :obrace} | tokenstream], len, input_str)
char == "}" ->
lex(current + 1, [{char, :cbrace} | tokenstream], len, input_str)
char == "=" ->
lex(current + 1, [{char, :assignment} | tokenstream], len, input_str)
# Tokens requiring repetition
char == "\"" ->
{token, cursor} = handlestring(current + 1, "", input_str)
# Increments to manage "" existing but not needed.
lex(cursor + 1, [{token, :str} | tokenstream], len, input_str)
numeric(char) ->
{token, cursor} = handlenum(current, "", input_str)
lex(cursor, [{token, :number} | tokenstream], len, input_str)
alphanumeric(char) ->
{token, cursor} = handlealpha(current, "", input_str)
lex(cursor, [{token, :identifier} | tokenstream], len, input_str)
# Conditional tokens
arithmetic(char) ->
unless String.at(input_str, current + 1) == "=" do
lex(current + 1, [{char, :arithmetic} | tokenstream], len, input_str)
else
lex(current + 2, [{char <> "=", :assignment} | tokenstream], len, input_str)
end
true ->
# ignore all other characters
lex(current + 1, tokenstream, len, input_str)
end
else
# we do this because tokens were prepended rather than appended
# this is due to some complications within Elixir.
Enum.reverse(tokenstream)
end
end
end
# interfacing
inp = IO.gets("Input your program: ")
Lexer.lex(String.length(inp), inp) |> IO.inspect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment