Skip to content

Instantly share code, notes, and snippets.

@inoas

inoas/parser.ex Secret

Created July 5, 2022 15:36
Show Gist options
  • Save inoas/7f90c7309b620daf1d07d96d5999c38c to your computer and use it in GitHub Desktop.
Save inoas/7f90c7309b620daf1d07d96d5999c38c to your computer and use it in GitHub Desktop.
parser.ex
# Script for parsing input. You can run it as:
#
# mix run priv/parser.exs
#
defmodule SearchInputParser do
import NimbleParsec
@space 0x0020
whitespace_char = ascii_char([@space])
quotation_mark = ascii_char([?"])
sentence =
ignore(quotation_mark)
|> utf8_string([not: ?"], min: 1)
|> ignore(quotation_mark)
|> post_traverse({:sentence_token, []})
defp sentence_token(_rest, chars, context, _line, _offset) do
{[
sentence:
chars |> Enum.reverse() |> List.to_string() |> String.split(" ") |> Enum.join(" <-> ")
], context}
end
word =
utf8_string(
[
not: @space,
not: ?",
not: ?(,
not: ?),
not: ?&,
not: ?|
],
min: 1
)
|> post_traverse({:word_token, []})
defp word_token(_rest, chars, context, _line, _offset) do
{[word: List.to_string(chars |> Enum.reverse())], context}
end
input_string =
choice([
sentence,
word
])
# quotation_mark ::= ?"
# left_bracket ::= ?(
# right_bracket ::= ?)
# or_ ::= ?|
# and_ ::= ?&
# input_string ::= string_except([quotation_mark, left_bracket, right_bracket, or, and]) | quoted_string_except([quotation_mark])
# Based upon knowledge @ https://www.youtube.com/watch?v=dDtZLm7HIJs#t=15m54s
# expression ::= term or_ expression | term
# term ::= factor and_ term | factor
# factor ::= left_bracket expression right_bracket | input_string
left_bracket = ascii_char([?(])
right_bracket = ascii_char([?)])
or_ = ascii_char([?|])
space_or_ = ascii_char([@space])
and_ = ascii_char([?&])
defparsec(
:expression,
choice([
parsec(:term)
|> ignore(optional(whitespace_char))
|> ignore(times(or_, 1))
|> ignore(optional(whitespace_char))
|> parsec(:expression)
|> tag(:or_by_pipe),
parsec(:term)
|> ignore(repeat(space_or_))
|> parsec(:expression)
|> tag(:or_by_space),
parsec(:term)
])
)
defparsec(
:term,
choice([
parsec(:factor)
|> ignore(optional(whitespace_char))
|> ignore(times(and_, 1))
|> ignore(optional(whitespace_char))
|> parsec(:term)
|> tag(:and),
parsec(:factor)
])
)
defparsec(
:factor,
choice([
ignore(times(left_bracket, 1))
|> ignore(optional(whitespace_char))
|> parsec(:expression)
|> ignore(optional(whitespace_char))
|> ignore(times(right_bracket, 1))
|> tag(:brackets),
input_string
])
)
parser =
parsec(:expression)
|> ignore(eos())
defparsec(:parse_sanitized_user_input, parser, debug: false)
end
inputs = [
"\"1 quote\"",
"\"2 quote\" | \"quote\"",
"\"3 quote\" & foo | \"quote\"",
"\"4 quote\" & (foo | bar) & \"quote\"",
"5 & (foo | (bar | \"qu( & )ux\" & batz))",
"6&(foo|(bar|\"qu( & )ux\"&batz))",
"7bar&\"qu( & )ux\"",
"8 | foo & (\"b a a r\" & quux) | batz",
"99 | ab & \"c d\"",
"10aa & bb | cc",
"11aa | bb & cc",
"(12aa & bb) | cc",
"(13aa | bb) & cc",
"14aa & (bb | cc)",
"15aa | (bb & cc)",
"(16aa) & bb | cc",
"(17aa) | bb & cc",
"a & b",
"a | b",
"10aa & bb cc",
"11aa bb & cc",
"(12aa & bb) cc",
"(13aa bb) & cc",
"14aa & (bb cc)",
"15aa (bb & cc)",
"(16aa) & bb cc",
"(17aa) bb & cc",
"a & b",
"a b"
]
inputs
|> Enum.with_index()
|> Enum.each(fn {input, idx} ->
("input " <>
(((idx + 1) |> Integer.to_string() |> String.pad_leading(2, "0")) <> " => ") <>
Enum.at(inputs, idx) <> "\n")
|> IO.write()
input
|> SearchInputParser.parse_sanitized_user_input()
|> then(fn value ->
# if is_binary(value), do: IO.puts(value), else: IO.inspect(value)
IO.inspect(value)
end)
"\n" |> IO.write()
end)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment