Skip to content

Instantly share code, notes, and snippets.

@Nicd
Created August 6, 2014 14:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Nicd/c98cb9ae18124fd0edf5 to your computer and use it in GitHub Desktop.
Save Nicd/c98cb9ae18124fd0edf5 to your computer and use it in GitHub Desktop.
A URI parser written in Elixir pattern matching. Written in Elixir 0.10 so won't compile anymore (uses records)
defmodule Nurina do
defrecord Info,
scheme: nil,
hier: nil,
query: nil,
fragment: nil,
valid: true,
authority: nil,
path: nil,
host: nil,
port: nil,
userinfo: nil,
is_ipv6: false
@doc """
Parse an URI into components. Will return a Nurina.Info record.
Tries to follow RFC 3986.
"""
def parse(uri) do
parsed = Info.new
parsed = parse uri, parsed, "", :scheme
if parsed.valid and parsed.port == nil do
parsed = parsed.port URI.default_port parsed.scheme
end
parsed
end
# Hier part parsing
def parse(<< "//", rest :: binary >>, parsed, :hier_parse), do: parse(rest, parsed, :hier_auth)
def parse(hier, parsed, :hier_parse), do: parse(hier, parsed, :hier_no_auth)
def parse(hier, parsed, :hier_no_auth), do: parsed.path nil_or hier
def parse(hier, parsed, :hier_auth) do
parsed = parse(hier, parsed, "", :authority)
# Go inside authority to parse parts
if parsed.authority != nil do
parsed = parse(parsed.authority, parsed, "", :userinfo)
end
parsed
end
# Host part parsing
# Split into IPv6 parsing if needed, :host_4 will handle IPv4 and domains
def parse(<< "[", rest :: binary >>, parsed, :host), do: parse(rest, parsed, "", :host_6)
def parse(hier, parsed, :host), do: parse(hier, parsed, "", :host_4)
# Port part parsing
# XXX: This will trim all : from start of port number and not mark it as invalid
def parse(<< ":", rest :: binary >>, parsed, :port) do
port = String.to_integer rest
if port == :error do
parsed.valid false
else
parsed.port nil_or elem port, 0
end
end
def parse(_, parsed, :port), do: parsed.port nil
# Scheme part parsing
# If URI stops at scheme, it's not valid
def parse("", parsed, _, :scheme), do: parsed.valid false
def parse(<< ":", rest :: binary >>, parsed, current_part, :scheme) do
parsed = parsed.scheme String.downcase current_part
parse(rest, parsed, "", :hier)
end
# Hier part parsing
# Hier is the hierarchical sequence of the URI. In RFC 3986 it is 'hier-part'.
def parse("", parsed, current_part, :hier) do
parsed = parsed.hier nil_or current_part
# Go inside hierarchy to parse parts
parse(current_part, parsed, :hier_parse)
end
def parse(<< "?", rest :: binary >>, parsed, current_part, :hier) do
parsed = parsed.hier nil_or current_part
# Go inside hierarchy to parse parts
parsed = parse(current_part, parsed, :hier_parse)
parse(rest, parsed, "", :query)
end
# Query part parsing
def parse("", parsed, current_part, :query), do: parsed.query nil_or current_part
def parse(<< "#", rest :: binary >>, parsed, current_part, :query) do
parsed = parsed.query nil_or current_part
# All the rest is just fragment
parsed.fragment nil_or rest
end
# Authority part parsing
def parse("", parsed, current_part, :authority), do: parsed.authority nil_or current_part
def parse(<< "/", rest :: binary >>, parsed, current_part, :authority) do
parsed = parsed.authority nil_or current_part
parsed.path "/" <> rest
end
# Userinfo part parsing
# If no userinfo was found, start from the beginning and look for host instead
def parse("", parsed, current_part, :userinfo), do: parse(current_part, parsed, :host)
def parse(<< "@", rest :: binary >>, parsed, current_part, :userinfo) do
parsed = parsed.userinfo nil_or current_part
parse(rest, parsed, :host)
end
# IPv6 host parsing
# Host must not end without closing ]
def parse("", parsed, _, :host_6), do: parsed.valid false
def parse(<< "]", rest :: binary >>, parsed, current_part, :host_6) do
parsed = parsed.host nil_or current_part
parsed = parsed.is_ipv6 true
parse(rest, parsed, :port)
end
# "Normal" host parsing
def parse("", parsed, current_part, :host_4), do: parsed.host nil_or current_part
def parse(<< ":", rest :: binary >>, parsed, current_part, :host_4) do
parsed = parsed.host nil_or current_part
parse(":" <> rest, parsed, :port)
end
# Default walking function for all parsing modes, just walk through all
# non recognised characters
def parse(<<char, rest :: binary>>, parsed, current_part, mode) do
current_part = current_part <> << char :: utf8 >>
parse(rest, parsed, current_part, mode)
end
# Convert "" into nil
defp nil_or(str) when str == "", do: nil
defp nil_or(str), do: str
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment