Created
August 6, 2014 14:53
-
-
Save Nicd/c98cb9ae18124fd0edf5 to your computer and use it in GitHub Desktop.
A URI parser written in Elixir pattern matching. Written in Elixir 0.10 so won't compile anymore (uses records)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Nurina do | |
defrecord Info, | |
scheme: nil, | |
hier: nil, | |
query: nil, | |
fragment: nil, | |
valid: true, | |
authority: nil, | |
path: nil, | |
host: nil, | |
port: nil, | |
userinfo: nil, | |
is_ipv6: false | |
@doc """ | |
Parse an URI into components. Will return a Nurina.Info record. | |
Tries to follow RFC 3986. | |
""" | |
def parse(uri) do | |
parsed = Info.new | |
parsed = parse uri, parsed, "", :scheme | |
if parsed.valid and parsed.port == nil do | |
parsed = parsed.port URI.default_port parsed.scheme | |
end | |
parsed | |
end | |
# Hier part parsing | |
def parse(<< "//", rest :: binary >>, parsed, :hier_parse), do: parse(rest, parsed, :hier_auth) | |
def parse(hier, parsed, :hier_parse), do: parse(hier, parsed, :hier_no_auth) | |
def parse(hier, parsed, :hier_no_auth), do: parsed.path nil_or hier | |
def parse(hier, parsed, :hier_auth) do | |
parsed = parse(hier, parsed, "", :authority) | |
# Go inside authority to parse parts | |
if parsed.authority != nil do | |
parsed = parse(parsed.authority, parsed, "", :userinfo) | |
end | |
parsed | |
end | |
# Host part parsing | |
# Split into IPv6 parsing if needed, :host_4 will handle IPv4 and domains | |
def parse(<< "[", rest :: binary >>, parsed, :host), do: parse(rest, parsed, "", :host_6) | |
def parse(hier, parsed, :host), do: parse(hier, parsed, "", :host_4) | |
# Port part parsing | |
# XXX: This will trim all : from start of port number and not mark it as invalid | |
def parse(<< ":", rest :: binary >>, parsed, :port) do | |
port = String.to_integer rest | |
if port == :error do | |
parsed.valid false | |
else | |
parsed.port nil_or elem port, 0 | |
end | |
end | |
def parse(_, parsed, :port), do: parsed.port nil | |
# Scheme part parsing | |
# If URI stops at scheme, it's not valid | |
def parse("", parsed, _, :scheme), do: parsed.valid false | |
def parse(<< ":", rest :: binary >>, parsed, current_part, :scheme) do | |
parsed = parsed.scheme String.downcase current_part | |
parse(rest, parsed, "", :hier) | |
end | |
# Hier part parsing | |
# Hier is the hierarchical sequence of the URI. In RFC 3986 it is 'hier-part'. | |
def parse("", parsed, current_part, :hier) do | |
parsed = parsed.hier nil_or current_part | |
# Go inside hierarchy to parse parts | |
parse(current_part, parsed, :hier_parse) | |
end | |
def parse(<< "?", rest :: binary >>, parsed, current_part, :hier) do | |
parsed = parsed.hier nil_or current_part | |
# Go inside hierarchy to parse parts | |
parsed = parse(current_part, parsed, :hier_parse) | |
parse(rest, parsed, "", :query) | |
end | |
# Query part parsing | |
def parse("", parsed, current_part, :query), do: parsed.query nil_or current_part | |
def parse(<< "#", rest :: binary >>, parsed, current_part, :query) do | |
parsed = parsed.query nil_or current_part | |
# All the rest is just fragment | |
parsed.fragment nil_or rest | |
end | |
# Authority part parsing | |
def parse("", parsed, current_part, :authority), do: parsed.authority nil_or current_part | |
def parse(<< "/", rest :: binary >>, parsed, current_part, :authority) do | |
parsed = parsed.authority nil_or current_part | |
parsed.path "/" <> rest | |
end | |
# Userinfo part parsing | |
# If no userinfo was found, start from the beginning and look for host instead | |
def parse("", parsed, current_part, :userinfo), do: parse(current_part, parsed, :host) | |
def parse(<< "@", rest :: binary >>, parsed, current_part, :userinfo) do | |
parsed = parsed.userinfo nil_or current_part | |
parse(rest, parsed, :host) | |
end | |
# IPv6 host parsing | |
# Host must not end without closing ] | |
def parse("", parsed, _, :host_6), do: parsed.valid false | |
def parse(<< "]", rest :: binary >>, parsed, current_part, :host_6) do | |
parsed = parsed.host nil_or current_part | |
parsed = parsed.is_ipv6 true | |
parse(rest, parsed, :port) | |
end | |
# "Normal" host parsing | |
def parse("", parsed, current_part, :host_4), do: parsed.host nil_or current_part | |
def parse(<< ":", rest :: binary >>, parsed, current_part, :host_4) do | |
parsed = parsed.host nil_or current_part | |
parse(":" <> rest, parsed, :port) | |
end | |
# Default walking function for all parsing modes, just walk through all | |
# non recognised characters | |
def parse(<<char, rest :: binary>>, parsed, current_part, mode) do | |
current_part = current_part <> << char :: utf8 >> | |
parse(rest, parsed, current_part, mode) | |
end | |
# Convert "" into nil | |
defp nil_or(str) when str == "", do: nil | |
defp nil_or(str), do: str | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment