Skip to content

Instantly share code, notes, and snippets.

@ato
Last active November 5, 2017 05:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ato/b9875c45171d082ca6c6738640347ecb to your computer and use it in GitHub Desktop.
Save ato/b9875c45171d082ca6c6738640347ecb to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# coding=utf-8
import re
SSURT_RE = r"""
\A
(?P<scheme> [a-zA-Z] [a-zA-Z0-9+.-]* : )?
(?P<authority>
(?P<slashes> /* )
(?P<userinfo> [^/?#\[\]@]* @ )?
(?P<host>
(?P<ip6> \[ [^\]]* \] )
| (?P<ip4> [0-9]+ \. [0-9]+ \. [0-9]+ \. [0-9]+ )
| (?P<domain> [^:/?#\[\]@]+ )
)
(?P<port> : [0-9]+ )?
)?
(?P<path> .*? )
\Z
"""
def ssurt(input):
m = re.match(SSURT_RE, input, re.VERBOSE | re.DOTALL)
assert m
if m.group('domain'):
host = revdomain(m.group('domain'))
else:
host = m.group('host')
return ((host or '')
+ (m.group('port') or '')
+ (m.group('slashes') or '')
+ ":" +
(m.group('scheme') or '')
+ (m.group('userinfo') or '')
+ m.group("path"))
def revdomain(domain):
return '(' + ','.join(reversed(domain.split('.'))) + ',' + ')'
UNSSURT_RE = r"""
\A
(?P<authority>
(?P<host>
(?P<ip6> \[ [^\]]* \] )
| (?P<ip4> [0-9]+ \. [0-9]+ \. [0-9]+ \. [0-9]+ )
| (?P<revdomain> \( [^:/?#\[\]@]+ , \) )
)
(?P<port> : [0-9]+ )?
(?P<slashes> /* )
)?
:
(?P<scheme> [a-zA-Z] [a-zA-Z0-9+.-]* :)?
(?P<userinfo> [^/?#\[\]@]* @ )?
(?P<path> .*? )
\Z
"""
def unssurt(input):
m = re.match(UNSSURT_RE, input, re.VERBOSE | re.DOTALL)
assert m
if m.group('revdomain'):
host = unrevdomain(m.group('revdomain'))
else:
host = m.group('host')
return ((m.group('scheme') or '')
+ (m.group('slashes') or '')
+ (m.group('userinfo') or '')
+ (host or '')
+ (m.group('port') or '')
+ m.group('path'))
def unrevdomain(revdomain):
return '.'.join(reversed(revdomain[1:-2].split(',')))
urls = ["https://user:pass@www.example.org:1443/path",
"random!!garbage",
"screenshot+http://user:pass@www.example.org:1443/path",
"http:/日本.jp:80//.././[FÜNKY]",
"https://[2001:db8::1:0:0:1]:8080/",
"ssh://git@github.com/smola/galimatias.git",
"dns:example.com",
":",
"::",
"x:[/]/foo",
"//noscheme.tld/foo.html?query#frag"]
for url in urls:
print(url, ssurt(url))
assert unssurt(ssurt(url)) == url
# https://user:pass@www.example.org:1443/path (org,example,www,):1443//:https:user:pass@/path
# random!!garbage (random!!garbage,):
# screenshot+http://user:pass@www.example.org:1443/path (org,example,www,):1443//:screenshot+http:user:pass@/path
# http:/日本.jp:80//.././[FÜNKY] (jp,日本,):80/:http://.././[FÜNKY]
# https://[2001:db8::1:0:0:1]:8080/ [2001:db8::1:0:0:1]:8080//:https:/
# ssh://git@github.com/smola/galimatias.git (com,github,)//:ssh:git@/smola/galimatias.git
# dns:example.com (com,example,):dns:
# : ::
# :: :::
# x:[/]/foo [/]:x:/foo
# //noscheme.tld/foo.html?query#frag (tld,noscheme,)//:/foo.html?query#frag
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment