Last active
August 29, 2015 14:22
-
-
Save padde/d31b1a1028d7d61f5bf4 to your computer and use it in GitHub Desktop.
Elixir Regex Interpolation – do not use in production
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Regex.InterpolationError do | |
defexception [:message] | |
end | |
defmodule Regex.Interpolation do | |
def wrap(regex = %Regex{opts: modifiers}) do | |
Regex.source(regex) | |
|> do_wrap(modifiers) | |
end | |
defp do_wrap(source, modifiers) do | |
pcre_options = pcre_options(modifiers) | |
{active, inactive} = complement_pcre_options(pcre_options) | |
"(?#{active}-#{inactive}:#{source})" | |
end | |
defp pcre_options(modifiers) do | |
String.codepoints(modifiers) | |
|> Enum.sort | |
|> Enum.uniq | |
|> Enum.map(&pcre_option/1) | |
end | |
defp pcre_option("i"), do: "i" | |
defp pcre_option("m"), do: "m" | |
defp pcre_option("r"), do: "U" | |
defp pcre_option("s"), do: "s" | |
defp pcre_option("x"), do: "x" | |
defp pcre_option(mod) do | |
raise Regex.InterpolationError, message: "cannot interpolate regex using modifier #{mod}" | |
end | |
@available_pcre_options ~w[i m s U x] | |
defp complement_pcre_options(active_options) do | |
inactive_options = @available_pcre_options -- active_options | |
{active_options, inactive_options} | |
end | |
end | |
defimpl String.Chars, for: Regex do | |
def to_string(re), do: Regex.Interpolation.wrap(re) | |
end | |
ExUnit.start | |
defmodule RegexInterpolationTest do | |
use ExUnit.Case, async: true | |
test "regex without any modifiers" do | |
re = ~r/foo/ | |
assert ~r/#{re}/ == ~r/(?-imsUx:foo)/ | |
end | |
test "regex with caseless modifier (i)" do | |
re = ~r/foo/i | |
assert ~r/#{re}/ == ~r/(?i-msUx:foo)/ | |
end | |
test "regex with dotall modifier (s)" do | |
re = ~r/foo/s | |
assert ~r/#{re}/ == ~r/(?s-imUx:foo)/ | |
end | |
test "regex with multiline modifier (m)" do | |
re = ~r/foo/m | |
assert ~r/#{re}/ == ~r/(?m-isUx:foo)/ | |
end | |
test "regex with extended modifier (x)" do | |
re = ~r/foo/x | |
assert ~r/#{re}/ == ~r/(?x-imsU:foo)/ | |
end | |
test "regex with ungreedy modifier (r) " do | |
re = ~r/foo/r | |
assert ~r/#{re}/ == ~r/(?U-imsx:foo)/ | |
end | |
test "regex with unicode modifier (u)" do | |
expected_message = "cannot interpolate regex using modifier u" | |
assert_raise Regex.InterpolationError, expected_message, fn -> | |
~r/#{~r/foo/u}/ | |
end | |
end | |
test "regex with firstline modifier (f)" do | |
expected_message = "cannot interpolate regex using modifier f" | |
assert_raise Regex.InterpolationError, expected_message, fn -> | |
~r/#{~r/foo/f}/ | |
end | |
end | |
test "regex with multiple modifiers" do | |
re = ~r/foo/mi | |
assert ~r/#{re}/ == ~r/(?im-sUx:foo)/ | |
end | |
test "regex with multiple modifiers in non-default order" do | |
re = ~r/foo/rxi | |
assert ~r/#{re}/ == ~r/(?iUx-ms:foo)/ | |
end | |
test "regex with duplicate modifiers" do | |
re = ~r/foo/ii | |
assert ~r/#{re}/ == ~r/(?i-msUx:foo)/ | |
end | |
test "regex with start-of-pattern item" do | |
assert_raise Regex.CompileError, fn -> | |
~r/#{~r/(*UTF8)/}/ | |
end | |
end | |
test "interpolate a caseless regex in a non-caseless regex" do | |
regex = ~r/foo/i | |
assert "foobar" =~ ~r/#{regex}bar/ | |
assert "FOObar" =~ ~r/#{regex}bar/ | |
refute "fooBAR" =~ ~r/#{regex}bar/ | |
end | |
test "interpolate a non-caseless regex in a caseless regex" do | |
regex = ~r/foo/ | |
assert "foobar" =~ ~r/#{regex}bar/i | |
assert "fooBAR" =~ ~r/#{regex}bar/i | |
refute "FOObar" =~ ~r/#{regex}bar/i | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment