-
-
Save inoas/7f90c7309b620daf1d07d96d5999c38c to your computer and use it in GitHub Desktop.
parser.ex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script for parsing input. You can run it as: | |
# | |
# mix run priv/parser.exs | |
# | |
defmodule SearchInputParser do | |
import NimbleParsec | |
@space 0x0020 | |
whitespace_char = ascii_char([@space]) | |
quotation_mark = ascii_char([?"]) | |
sentence = | |
ignore(quotation_mark) | |
|> utf8_string([not: ?"], min: 1) | |
|> ignore(quotation_mark) | |
|> post_traverse({:sentence_token, []}) | |
defp sentence_token(_rest, chars, context, _line, _offset) do | |
{[ | |
sentence: | |
chars |> Enum.reverse() |> List.to_string() |> String.split(" ") |> Enum.join(" <-> ") | |
], context} | |
end | |
word = | |
utf8_string( | |
[ | |
not: @space, | |
not: ?", | |
not: ?(, | |
not: ?), | |
not: ?&, | |
not: ?| | |
], | |
min: 1 | |
) | |
|> post_traverse({:word_token, []}) | |
defp word_token(_rest, chars, context, _line, _offset) do | |
{[word: List.to_string(chars |> Enum.reverse())], context} | |
end | |
input_string = | |
choice([ | |
sentence, | |
word | |
]) | |
# quotation_mark ::= ?" | |
# left_bracket ::= ?( | |
# right_bracket ::= ?) | |
# or_ ::= ?| | |
# and_ ::= ?& | |
# input_string ::= string_except([quotation_mark, left_bracket, right_bracket, or, and]) | quoted_string_except([quotation_mark]) | |
# Based upon knowledge @ https://www.youtube.com/watch?v=dDtZLm7HIJs#t=15m54s | |
# expression ::= term or_ expression | term | |
# term ::= factor and_ term | factor | |
# factor ::= left_bracket expression right_bracket | input_string | |
left_bracket = ascii_char([?(]) | |
right_bracket = ascii_char([?)]) | |
or_ = ascii_char([?|]) | |
space_or_ = ascii_char([@space]) | |
and_ = ascii_char([?&]) | |
defparsec( | |
:expression, | |
choice([ | |
parsec(:term) | |
|> ignore(optional(whitespace_char)) | |
|> ignore(times(or_, 1)) | |
|> ignore(optional(whitespace_char)) | |
|> parsec(:expression) | |
|> tag(:or_by_pipe), | |
parsec(:term) | |
|> ignore(repeat(space_or_)) | |
|> parsec(:expression) | |
|> tag(:or_by_space), | |
parsec(:term) | |
]) | |
) | |
defparsec( | |
:term, | |
choice([ | |
parsec(:factor) | |
|> ignore(optional(whitespace_char)) | |
|> ignore(times(and_, 1)) | |
|> ignore(optional(whitespace_char)) | |
|> parsec(:term) | |
|> tag(:and), | |
parsec(:factor) | |
]) | |
) | |
defparsec( | |
:factor, | |
choice([ | |
ignore(times(left_bracket, 1)) | |
|> ignore(optional(whitespace_char)) | |
|> parsec(:expression) | |
|> ignore(optional(whitespace_char)) | |
|> ignore(times(right_bracket, 1)) | |
|> tag(:brackets), | |
input_string | |
]) | |
) | |
parser = | |
parsec(:expression) | |
|> ignore(eos()) | |
defparsec(:parse_sanitized_user_input, parser, debug: false) | |
end | |
inputs = [ | |
"\"1 quote\"", | |
"\"2 quote\" | \"quote\"", | |
"\"3 quote\" & foo | \"quote\"", | |
"\"4 quote\" & (foo | bar) & \"quote\"", | |
"5 & (foo | (bar | \"qu( & )ux\" & batz))", | |
"6&(foo|(bar|\"qu( & )ux\"&batz))", | |
"7bar&\"qu( & )ux\"", | |
"8 | foo & (\"b a a r\" & quux) | batz", | |
"99 | ab & \"c d\"", | |
"10aa & bb | cc", | |
"11aa | bb & cc", | |
"(12aa & bb) | cc", | |
"(13aa | bb) & cc", | |
"14aa & (bb | cc)", | |
"15aa | (bb & cc)", | |
"(16aa) & bb | cc", | |
"(17aa) | bb & cc", | |
"a & b", | |
"a | b", | |
"10aa & bb cc", | |
"11aa bb & cc", | |
"(12aa & bb) cc", | |
"(13aa bb) & cc", | |
"14aa & (bb cc)", | |
"15aa (bb & cc)", | |
"(16aa) & bb cc", | |
"(17aa) bb & cc", | |
"a & b", | |
"a b" | |
] | |
inputs | |
|> Enum.with_index() | |
|> Enum.each(fn {input, idx} -> | |
("input " <> | |
(((idx + 1) |> Integer.to_string() |> String.pad_leading(2, "0")) <> " => ") <> | |
Enum.at(inputs, idx) <> "\n") | |
|> IO.write() | |
input | |
|> SearchInputParser.parse_sanitized_user_input() | |
|> then(fn value -> | |
# if is_binary(value), do: IO.puts(value), else: IO.inspect(value) | |
IO.inspect(value) | |
end) | |
"\n" |> IO.write() | |
end) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment