Skip to content

Instantly share code, notes, and snippets.

@flaviut
Created August 13, 2014 22:32
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flaviut/66b28c32a3d07a7add78 to your computer and use it in GitHub Desktop.
Save flaviut/66b28c32a3d07a7add78 to your computer and use it in GitHub Desktop.
import parseutils, pegs, myutils, strutils, re
type
Url* = object
scheme*, hostname*: String
port*: Option[Int]
username*, password*: Option[String]
path*: String
parameters*, fragments*: Option[String]
let uriValidator = peg"""
Uri <- ^ scheme ":" hier_part ("?" query)? ("#" fragment)? $
hier_part <- "//" authority path_abempty
/ path_absolute # begins with "/" but not "//"
/ path_rootless # begins with a non-colon segment
/ path_empty # begins with a segment
scheme <- \a ( \a / \d / "+" / "-" / "." )*
authority <- ( userinfo "@" )? host ( ":" port )?
userinfo <- ( unreserved / pct_encoded / sub_delims / ":" )*
host <- IP_literal / IPv4address / uri_char*
port <- \d*
IP_literal <- "[" ( IPv6address / IPvFuture ) "]"
IPvFuture <- "v" [0-9A-Fa-f]+ "." ( unreserved / sub_delims / ":" )+
IPv6address <- h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32
/ "::" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32
/ ( h16 )? "::" h16 ":" h16 ":" h16 ":" h16 ":" ls32
/ ( (h16 ":")? h16 )? "::" h16 ":" h16 ":" h16 ":" ls32
/ ( (h16 ":")? (h16 ":")? h16 )? "::" h16 ":" h16 ":" ls32
/ ( (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::" h16 ":" ls32
/ ( (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::" ls32
/ ( (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::" h16
/ ( (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::"
h16 <- [0-9A-Fa-f] [0-9A-Fa-f]? [0-9A-Fa-f]? [0-9A-Fa-f]?
ls32 <- h16 ":" h16 / IPv4address
IPv4address <- dec_octet "." dec_octet "." dec_octet "." dec_octet
dec_octet <- [0-9]
/ [1-9][0-9]
/ '1' [0-9][0-9]
/ '2' [0-4][0-9]
/ '25' [0-5]
path_abempty <- ( "/" pchar* )*
path_absolute <- "/" ( pchar+ ( "/" pchar* )* )?
path_rootless <- pchar+ *( "/" pchar* )
path_empty <- ''
pchar <- uri_char / [:@]
query <- ( pchar / [/?] )*
fragment <- ( pchar / [/?] )*
pct_encoded <- "%" [0-9A-Fa-f] [0-9A-Fa-f]
sub_delims <- [!$&'()*+,;=]
unreserved <- [a-zA-Z0-9-/._~]
uri_char <- unreserved
/ pct_encoded
/ sub_delims
"""
proc isValidUri*(uri: String): Bool =
## Checks if the given URI is valid according to the grammar in IETF RFC 3986,
## which is the current standard for URIs
##
## Does not check that the URI is valid according to the specifications for
## any protocol
return uri =~ uriValidator
let schemeParser = peg"^{(![:] .)+} ':' .*$"
proc getScheme(url: String): String =
if url =~ schemeParser:
return matches[0].toLower
else:
raise newException(EInvalidValue, "`"& url &"` is either invalid or missing a scheme name")
let httpUrlParser = re"""
^
(?'scheme' (?i) https?)
:\/\/
(?:
(?'username' [^:@]*+)?
(?:
: (?'password' [^@]*+)
)?
@
)?
(?'hostname' \[ [^\]]++ \]
| [^:\/]++)
(?:
:
(?'port' \d++)
)?
(?'path' [^?#]*+)
(?:
\?
(?'params' [^#]*+)
)?
(?:
\#
(?'fragmts' .*+)
)?
$"""
proc parseUrl*(url: String): Url =
## Assumes that the URL is well formed and breaks it down into its component
## parts. However, does not do any validation so some non-well formed URLs
## may still be parsed successfully
##
## Schemes and their syntax:
##
## HTTP[S]: Note that the prefix username and password are not part of
## RFC 2616, but they are parsed due to usage frequency
## Examples:
## https://foo:bar@host.com:23/path1/path2.php?q=fd#5
## http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]?%20%20foo
##
let scheme = url.getScheme
case scheme
of "http", "https":
if url =~ httpUrlParser:
result.scheme = matches[0] # Scheme
if matches[1] != nil: # Username
result.username = Some(matches[1])
if matches[2] != nil: # Password
result.password = Some(matches[2])
result.hostname = matches[3] # Hostname
if matches[4] != nil: # Port number
var number = 0
if parseInt(matches[4], number) == 0:
raise newException(EInvalidValue, "Port number `" & matches[4] & "` is invalid")
result.port = Some(number)
result.path = matches[5] # Path
if matches[6] != nil: # Parameters
result.parameters = Some(matches[6])
if matches[7] != nil: # Fragments
result.fragments = Some(matches[7])
else:
raise newException(EInvalidValue, "Malformed http URL `" & url & "`")
else:
raise newException(EInvalidValue, "Unknown scheme `" & scheme & "`")
when true:
proc nUrl(scheme: String,
username, password: String, # Optional
hostname: String,
port: Int,
path: String,
parameters, fragments: String): # Optional
Url =
Url(scheme : scheme,
username : if username == nil: None[String]() else: Some(username),
password : if password == nil: None[String]() else: Some(password),
port : if port == -1: None[Int]() else: Some(port),
hostname : hostname,
path : path,
parameters : if parameters == nil: None[String]() else: Some(parameters),
fragments : if fragments == nil: None[String]() else: Some(fragments))
let testcases = {
r"https://host.com" : nUrl("https", nil, nil, "host.com", -1, "", nil, nil),
r"https://foo:bar@host.com:23/path1/path2.php?q=fd" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", "q=fd", nil),
r"https://foo:bar@host.com:23/path1/path2.php#5" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", nil, "5"),
r"https://foo:bar@host.com:23?q=fd#5" : nUrl("https", "foo", "bar", "host.com", 23, "", "q=fd", "5"),
r"https://foo:bar@host.com/path1/path2.php?q=fd#5" : nUrl("https", "foo", "bar", "host.com", -1, "/path1/path2.php", "q=fd", "5"),
r"https://foo@host.com:23/path1/path2.php?q=fd#5" : nUrl("https", "foo", nil, "host.com", 23, "/path1/path2.php", "q=fd", "5"),
r"https://host.com:23/path1/path2.php?q=fd#5" : nUrl("https", nil, nil, "host.com", 23, "/path1/path2.php", "q=fd", "5"),
r"https://foo:bar@host.com:23/path1/path2.php?q=fd#5" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", "q=fd", "5"),
r"https://foo:bar@host.com:23/path1/path2.php?q=fd#5" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", "q=fd", "5"),
r"http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]?%20%20foo" : nUrl("http", nil, nil, "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]", -1, "", "%20%20foo", nil),
r"HtTpS://%20%30r1:o%32@f.cs:23/P%20/23.23#%20123" : nUrl("HtTpS", "%20%30r1", "o%32", "f.cs", 23, "/P%20/23.23", nil, "%20123")
}
for v in testcases:
let parsed = parseUrl(v[0])
if parsed != v[1]:
echo "Mismatch between \n", parsed, " and\n", $v[1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment