Skip to content

Instantly share code, notes, and snippets.

@dpk
Last active January 29, 2016 13:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dpk/4757681 to your computer and use it in GitHub Desktop.
Save dpk/4757681 to your computer and use it in GitHub Desktop.
RFC 3986 URI BNF translated to a regular expression. (Uses Ruby/Oniguruma/Onigmo named capture syntax)
(?xi)
\A
# unreserved: [a-z0-9\-._~]
# sub-delims: [!$&'()*+,;=]
# pct-encoded: %[0-9a-f]{2}
# dec-octet: (?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
# pchar: (?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})
# h16: [0-9a-f]{1,4}
# ls32: (?:[0-9a-f]{1,4}:[0-9a-f]{4}|
# (?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
# \.
# (?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
# \.
# (?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
# \.
# (?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
# segment: (?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})*
# segment-nz: (?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})+
(?<scheme>[a-z][a-z0-9+\-.]*):
(?<hier-part>
(?:
//(?<authority>
(?:(?:(?<userinfo>(?:[a-z0-9\-._~!$&'()*+,;=:]|%[0-9a-f]{2})*)@)?)
(?<host>
(?<IP-literal>\[(?:
(?<IPv6address>
# OHSHI-
(?:[0-9a-f]{1,4}){6}(?:[0-9a-f]{1,4}:[0-9a-f]{4}|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
|
::(?:[0-9a-f]{1,4}){5}(?:[0-9a-f]{1,4}:[0-9a-f]{4}|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
|
(?:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}){4}(?:[0-9a-f]{1,4}:[0-9a-f]{4}|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
|
(?:(?:[0-9a-f]{1,4}:){0,1}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}){3}(?:[0-9a-f]{1,4}:[0-9a-f]{4}|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
|
(?:(?:[0-9a-f]{1,4}:){0,2}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}){2}(?:[0-9a-f]{1,4}:[0-9a-f]{4}|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
|
(?:(?:[0-9a-f]{1,4}:){0,3}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4})(?:[0-9a-f]{1,4}:[0-9a-f]{4}|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
|
(?:(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:[0-9a-f]{4}|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
|
(?:(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4})?::[0-9a-f]{1,4}
|
(?:(?:[0-9a-f]{1,4}:){0,6}[0-9a-f]{1,4})::
)
|
(?<IPvFuture>v[0-9a-f]\.[a-z0-9\-._~!$&'()*+,;=:]+)
)\])
|
(?<IPv4address>
(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
\.
(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
\.
(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
\.
(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])
)
|
(?<reg-name>(?:[a-z0-9\-._~!$&'()*+,;=]|%[0-9a-f]{2})*)
)
(?::(?<port>[0-9]+))?
)
(?<path-abempty> (?:/(?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})*)*)
|
(?<path-absolute>
/(?:
(?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})+
(?:/(?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})*)*
)?
)
|
(?<path-rootless>
(?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})+
(?:/(?:[a-z0-9\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})*)*
)
|
(?<path-empty>)
)
)
(?:\?(?<query> (?:[a-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9a-f]{2})*))?
(?:\#(?<fragment> (?:[a-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9a-f]{2})*))?
\Z
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment