Created
August 13, 2014 22:32
-
-
Save flaviut/66b28c32a3d07a7add78 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import parseutils, pegs, myutils, strutils, re | |
type | |
Url* = object | |
scheme*, hostname*: String | |
port*: Option[Int] | |
username*, password*: Option[String] | |
path*: String | |
parameters*, fragments*: Option[String] | |
let uriValidator = peg""" | |
Uri <- ^ scheme ":" hier_part ("?" query)? ("#" fragment)? $ | |
hier_part <- "//" authority path_abempty | |
/ path_absolute # begins with "/" but not "//" | |
/ path_rootless # begins with a non-colon segment | |
/ path_empty # begins with a segment | |
scheme <- \a ( \a / \d / "+" / "-" / "." )* | |
authority <- ( userinfo "@" )? host ( ":" port )? | |
userinfo <- ( unreserved / pct_encoded / sub_delims / ":" )* | |
host <- IP_literal / IPv4address / uri_char* | |
port <- \d* | |
IP_literal <- "[" ( IPv6address / IPvFuture ) "]" | |
IPvFuture <- "v" [0-9A-Fa-f]+ "." ( unreserved / sub_delims / ":" )+ | |
IPv6address <- h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32 | |
/ "::" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32 | |
/ ( h16 )? "::" h16 ":" h16 ":" h16 ":" h16 ":" ls32 | |
/ ( (h16 ":")? h16 )? "::" h16 ":" h16 ":" h16 ":" ls32 | |
/ ( (h16 ":")? (h16 ":")? h16 )? "::" h16 ":" h16 ":" ls32 | |
/ ( (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::" h16 ":" ls32 | |
/ ( (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::" ls32 | |
/ ( (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::" h16 | |
/ ( (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? (h16 ":")? h16 )? "::" | |
h16 <- [0-9A-Fa-f] [0-9A-Fa-f]? [0-9A-Fa-f]? [0-9A-Fa-f]? | |
ls32 <- h16 ":" h16 / IPv4address | |
IPv4address <- dec_octet "." dec_octet "." dec_octet "." dec_octet | |
dec_octet <- [0-9] | |
/ [1-9][0-9] | |
/ '1' [0-9][0-9] | |
/ '2' [0-4][0-9] | |
/ '25' [0-5] | |
path_abempty <- ( "/" pchar* )* | |
path_absolute <- "/" ( pchar+ ( "/" pchar* )* )? | |
path_rootless <- pchar+ *( "/" pchar* ) | |
path_empty <- '' | |
pchar <- uri_char / [:@] | |
query <- ( pchar / [/?] )* | |
fragment <- ( pchar / [/?] )* | |
pct_encoded <- "%" [0-9A-Fa-f] [0-9A-Fa-f] | |
sub_delims <- [!$&'()*+,;=] | |
unreserved <- [a-zA-Z0-9-/._~] | |
uri_char <- unreserved | |
/ pct_encoded | |
/ sub_delims | |
""" | |
proc isValidUri*(uri: String): Bool = | |
## Checks if the given URI is valid according to the grammar in IETF RFC 3986, | |
## which is the current standard for URIs | |
## | |
## Does not check that the URI is valid according to the specifications for | |
## any protocol | |
return uri =~ uriValidator | |
let schemeParser = peg"^{(![:] .)+} ':' .*$" | |
proc getScheme(url: String): String = | |
if url =~ schemeParser: | |
return matches[0].toLower | |
else: | |
raise newException(EInvalidValue, "`"& url &"` is either invalid or missing a scheme name") | |
let httpUrlParser = re""" | |
^ | |
(?'scheme' (?i) https?) | |
:\/\/ | |
(?: | |
(?'username' [^:@]*+)? | |
(?: | |
: (?'password' [^@]*+) | |
)? | |
@ | |
)? | |
(?'hostname' \[ [^\]]++ \] | |
| [^:\/]++) | |
(?: | |
: | |
(?'port' \d++) | |
)? | |
(?'path' [^?#]*+) | |
(?: | |
\? | |
(?'params' [^#]*+) | |
)? | |
(?: | |
\# | |
(?'fragmts' .*+) | |
)? | |
$""" | |
proc parseUrl*(url: String): Url = | |
## Assumes that the URL is well formed and breaks it down into its component | |
## parts. However, does not do any validation so some non-well formed URLs | |
## may still be parsed successfully | |
## | |
## Schemes and their syntax: | |
## | |
## HTTP[S]: Note that the prefix username and password are not part of | |
## RFC 2616, but they are parsed due to usage frequency | |
## Examples: | |
## https://foo:bar@host.com:23/path1/path2.php?q=fd#5 | |
## http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]?%20%20foo | |
## | |
let scheme = url.getScheme | |
case scheme | |
of "http", "https": | |
if url =~ httpUrlParser: | |
result.scheme = matches[0] # Scheme | |
if matches[1] != nil: # Username | |
result.username = Some(matches[1]) | |
if matches[2] != nil: # Password | |
result.password = Some(matches[2]) | |
result.hostname = matches[3] # Hostname | |
if matches[4] != nil: # Port number | |
var number = 0 | |
if parseInt(matches[4], number) == 0: | |
raise newException(EInvalidValue, "Port number `" & matches[4] & "` is invalid") | |
result.port = Some(number) | |
result.path = matches[5] # Path | |
if matches[6] != nil: # Parameters | |
result.parameters = Some(matches[6]) | |
if matches[7] != nil: # Fragments | |
result.fragments = Some(matches[7]) | |
else: | |
raise newException(EInvalidValue, "Malformed http URL `" & url & "`") | |
else: | |
raise newException(EInvalidValue, "Unknown scheme `" & scheme & "`") | |
when true: | |
proc nUrl(scheme: String, | |
username, password: String, # Optional | |
hostname: String, | |
port: Int, | |
path: String, | |
parameters, fragments: String): # Optional | |
Url = | |
Url(scheme : scheme, | |
username : if username == nil: None[String]() else: Some(username), | |
password : if password == nil: None[String]() else: Some(password), | |
port : if port == -1: None[Int]() else: Some(port), | |
hostname : hostname, | |
path : path, | |
parameters : if parameters == nil: None[String]() else: Some(parameters), | |
fragments : if fragments == nil: None[String]() else: Some(fragments)) | |
let testcases = { | |
r"https://host.com" : nUrl("https", nil, nil, "host.com", -1, "", nil, nil), | |
r"https://foo:bar@host.com:23/path1/path2.php?q=fd" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", "q=fd", nil), | |
r"https://foo:bar@host.com:23/path1/path2.php#5" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", nil, "5"), | |
r"https://foo:bar@host.com:23?q=fd#5" : nUrl("https", "foo", "bar", "host.com", 23, "", "q=fd", "5"), | |
r"https://foo:bar@host.com/path1/path2.php?q=fd#5" : nUrl("https", "foo", "bar", "host.com", -1, "/path1/path2.php", "q=fd", "5"), | |
r"https://foo@host.com:23/path1/path2.php?q=fd#5" : nUrl("https", "foo", nil, "host.com", 23, "/path1/path2.php", "q=fd", "5"), | |
r"https://host.com:23/path1/path2.php?q=fd#5" : nUrl("https", nil, nil, "host.com", 23, "/path1/path2.php", "q=fd", "5"), | |
r"https://foo:bar@host.com:23/path1/path2.php?q=fd#5" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", "q=fd", "5"), | |
r"https://foo:bar@host.com:23/path1/path2.php?q=fd#5" : nUrl("https", "foo", "bar", "host.com", 23, "/path1/path2.php", "q=fd", "5"), | |
r"http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]?%20%20foo" : nUrl("http", nil, nil, "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]", -1, "", "%20%20foo", nil), | |
r"HtTpS://%20%30r1:o%32@f.cs:23/P%20/23.23#%20123" : nUrl("HtTpS", "%20%30r1", "o%32", "f.cs", 23, "/P%20/23.23", nil, "%20123") | |
} | |
for v in testcases: | |
let parsed = parseUrl(v[0]) | |
if parsed != v[1]: | |
echo "Mismatch between \n", parsed, " and\n", $v[1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment