Skip to content

Instantly share code, notes, and snippets.

@kxepal
Created July 8, 2018 10:43
Show Gist options
  • Save kxepal/bceffb0f909a2339449f1e258d0fd433 to your computer and use it in GitHub Desktop.
Save kxepal/bceffb0f909a2339449f1e258d0fd433 to your computer and use it in GitHub Desktop.
Hypothesis url strategy
import ipaddress
import re
import unittest
from urllib.parse import unquote, urlsplit, urlunsplit
import hypothesis
import hypothesis.strategies as st
from .. import url
class UrlTestCase(unittest.TestCase):
@hypothesis.given(url.scheme())
def test_scheme(self, value):
self.assertIsInstance(value, str)
self.assertTrue(set(value[0]).issubset(url.ALPHA))
self.assertTrue(set(value).issubset(url.ALPHA_DIGIT + '+-.'))
def test_scheme_custom(self):
self.assertEqual('a',
hypothesis.find(url.scheme(st.text('ab', min_size=1)),
lambda _: True))
@hypothesis.given(url.username())
def test_username(self, value):
self.assertIsInstance(value, str)
self.assertTrue(set(value).issubset(url.UNRESERVED + '%'))
self.assertEqual(value, url.quote(unquote(value)))
@hypothesis.given(url.password())
def test_password(self, value):
self.assertIsInstance(value, str)
self.assertTrue(set(value).issubset(url.UNRESERVED + '%'))
self.assertEqual(value, url.quote(unquote(value)))
@hypothesis.given(url.username(), url.password())
def test_userinfo(self, username, password):
userinfo = url.build_userinfo(username, password)
if not username:
self.assertEqual(userinfo, '')
elif not password:
self.assertEqual(userinfo, username)
else:
self.assertIn(':', userinfo)
self.assertEqual([username, password], userinfo.split(':', 1))
@hypothesis.given(url.ipv4())
def test_ipv4(self, value):
self.assertIsInstance(value, str)
dec_octets = value.split('.')
self.assertEqual(4, len(dec_octets))
for dec_octet in dec_octets:
self.assertTrue(0 <= int(dec_octet) <= 255, dec_octet)
ipaddress.IPv4Address(value)
def test_ipv4_minimal(self):
self.assertEqual('0.0.0.0',
hypothesis.find(url.ipv4(), lambda _: True))
@hypothesis.given(url.ipv6())
def test_ipv6(self, value):
self.check_ipv6(value)
def check_ipv6(self, value):
self.assertIsInstance(value, str)
ipaddress.IPv6Address(value)
def test_ipv6_minimal(self):
self.assertEqual('::', hypothesis.find(url.ipv6(), lambda _: True))
@hypothesis.given(url.ipvfuture())
def test_ipvfuture(self, value):
self.check_ipvfuture(value)
def check_ipvfuture(self, value):
self.assertIsInstance(value, str)
self.assertGreater(len(value), 3)
self.assertTrue(value.startswith('v'))
self.assertTrue(set(value[1]).issubset(url.HEXDIG))
self.assertEqual(value[2], '.')
valid_chars = url.UNRESERVED + url.SUB_DEMIMS + ':'
self.assertTrue(set(value[3:]).issubset(valid_chars))
@hypothesis.given(url.ip_literal())
def test_ip_literal(self, value):
self.assertIsInstance(value, str)
self.assertTrue(value.startswith('[') and value.endswith(']'))
try:
self.check_ipv6(value[1:-1])
except (AssertionError, ipaddress.AddressValueError):
self.check_ipvfuture(value[1:-1])
@hypothesis.given(url.domainlabel())
def test_domainlabel(self, value):
self.check_domainlabel(value)
def check_domainlabel(self, value):
self.assertIsInstance(value, str)
self.assertTrue(0 < len(value) < 64)
self.assertTrue(set(value[0]).issubset(url.ALPHA_DIGIT))
self.assertTrue(set(value).issubset(url.ALPHA_DIGIT + '-'))
self.assertTrue(set(value[-1]).issubset(url.ALPHA_DIGIT))
@hypothesis.given(url.toplabel())
def test_toplabel(self, value):
self.check_toplabel(value)
def check_toplabel(self, value):
self.assertIsInstance(value, str)
self.assertTrue(0 < len(value) < 64)
self.assertTrue(set(value[0]).issubset(url.ALPHA))
self.assertTrue(set(value).issubset(url.ALPHA_DIGIT + '-'))
self.assertTrue(set(value[-1]).issubset(url.ALPHA_DIGIT))
@hypothesis.given(url.hostname())
def test_hostname(self, value):
self.assertIsInstance(value, str)
self.assertTrue(0 < len(value) < 256)
*domainlabels, toplabel = value.rsplit('.', 1)
self.check_toplabel(toplabel)
for domainlabel in domainlabels[0].split('.'):
self.check_domainlabel(domainlabel)
@hypothesis.given(url.port())
def test_port(self, value):
self.assertIsInstance(value, str)
if value:
self.assertTrue(set(value).issubset(url.DIGIT))
self.assertFalse(len(value) > 1 and value.startswith('0'))
self.assertTrue(0 <= int(value) <= 65535)
@hypothesis.given(url.userinfo(), url.host(), url.port())
def test_authority(self, userinfo, host, port):
authority = url.build_authority(userinfo, host, port)
self.assertIsInstance(authority, str)
if userinfo:
self.assertIn('@', authority)
self.assertEqual(userinfo, authority.split('@', 1)[0])
if port:
self.assertEqual(port, authority.rsplit(':', 1)[-1])
@hypothesis.given(url.segment())
def test_segment(self, value):
self.check_segment(value)
def check_segment(self, value):
self.assertIsInstance(value, str)
if value:
valid_chars = url.UNRESERVED + '%:@' + url.SUB_DEMIMS
self.assertTrue(set(value).issubset(valid_chars))
self.assertEqual(value, url.quote_pchar(unquote(value)))
@hypothesis.given(url.segment_nz())
def test_segment_nz(self, value):
self.check_segment_nz(value)
def check_segment_nz(self, value):
self.assertIsInstance(value, str)
self.assertGreater(len(value), 0)
chars = url.UNRESERVED + '%:@' + url.SUB_DEMIMS
self.assertTrue(set(value).issubset(chars))
self.assertEqual(value, url.quote_pchar(unquote(value)))
@hypothesis.given(url.segment_nz_nc())
def test_segment_nz_nc(self, value):
self.check_segment_nz_nc(value)
def check_segment_nz_nc(self, value):
self.assertIsInstance(value, str)
self.assertGreater(len(value), 0)
valid_chars = url.UNRESERVED + '%:@' + url.SUB_DEMIMS
self.assertTrue(set(value).issubset(valid_chars))
self.assertEqual(value, url.quote_segment_nz_nc(unquote(value)))
@hypothesis.given(url.path_abempty())
def test_path_abempty(self, value):
self.assertIsInstance(value, str)
if value:
self.assertTrue(value.startswith('/'))
for segment in value.split('/'):
self.check_segment(segment)
@hypothesis.given(url.path_absolute())
def test_path_absolute(self, value):
self.assertIsInstance(value, str)
self.assertGreater(len(value), 0)
self.assertTrue(value.startswith('/'))
segment_nz, *segments = value[1:].split('/')
self.check_segment_nz(segment_nz)
for segment in segments:
self.check_segment(segment)
@hypothesis.given(url.path_noscheme())
def test_path_noscheme(self, value):
self.assertIsInstance(value, str)
self.assertGreater(len(value), 0)
self.assertFalse(value.startswith('/'))
segment_nz_nc, *segments = value.split('/')
self.check_segment_nz_nc(segment_nz_nc)
for segment in segments:
self.check_segment(segment)
@hypothesis.given(url.path_rootless())
def test_path_rootless(self, value):
self.assertIsInstance(value, str)
self.assertGreater(len(value), 0)
self.assertFalse(value.startswith('/'))
segment_nz, *segments = value.split('/')
self.check_segment_nz(segment_nz)
for segment in segments:
self.check_segment(segment)
@hypothesis.given(url.path_empty())
def test_path_empty(self, value):
self.assertIsInstance(value, str)
self.assertEqual(len(value), 0)
@hypothesis.given(url.query())
def test_query(self, value):
self.assertIsInstance(value, str)
valid_chars = url.UNRESERVED + url.SUB_DEMIMS + ':@/?%'
for pair in value.split('&'):
if not pair:
continue
key, val = pair.split('=', 1)
if key:
self.assertTrue(set(key).issubset(valid_chars))
if val:
self.assertTrue(set(val).issubset(valid_chars))
@hypothesis.given(url.scheme(),
url.username(),
url.password(),
url.host(),
url.port(),
url.path(),
url.query(),
url.fragment())
def test_against_stdlib(self, scheme, username, password, host, port,
path, query, fragment):
userinfo = url.build_userinfo(username, password)
authority = url.build_authority(userinfo, host, port)
our = url.build_url(scheme, authority, path, query, fragment)
stdlib = urlunsplit((scheme, authority, path, query, fragment))
self.assertEqual(our, stdlib)
@hypothesis.given(url.scheme(),
url.username(),
url.password(),
url.host(),
url.port(),
url.path(),
url.query(),
url.fragment())
def test_parse(self, scheme, username, password, host, port,
path, query, fragment):
userinfo = url.build_userinfo(username, password)
authority = url.build_authority(userinfo, host, port)
result = urlsplit(urlunsplit(
(scheme, authority, path, query, fragment)))
scheme = scheme.lower()
if username == '':
username = None
if username is None or password == '':
password = None
if host.startswith('[') and host.endswith(']'):
host = host[1:-1]
host = host.lower()
if port == '':
port = None
else:
port = int(port)
self.assertEqual(scheme, result.scheme)
self.assertEqual(username, result.username)
self.assertEqual(password, result.password)
self.assertEqual(host, result.hostname)
self.assertEqual(port, result.port)
self.assertEqual(path, result.path)
self.assertEqual(query, result.query)
self.assertEqual(fragment, result.fragment)
@hypothesis.given(url.scheme(),
url.username(),
url.password(),
url.host(),
url.port(),
url.path(),
url.query(),
url.fragment())
def test_regexp(self, scheme, username, password, host, port,
path, query, fragment):
# http://tools.ietf.org/html/rfc3986#appendix-B
userinfo = url.build_userinfo(username, password)
authority = url.build_authority(userinfo, host, port)
match = re.match(
'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?',
# 12 3 4 5 6 7 8 9
# scheme = $2
# authority = $4
# path = $5
# query = $7
# fragment = $9
url.build_url(scheme, authority, path, query, fragment))
self.assertEqual(match.group(2), scheme)
self.assertEqual(match.group(4), authority)
self.assertEqual(match.group(5), path)
self.assertEqual(match.group(7), (query or None))
self.assertEqual(match.group(9), (fragment or None))
def test_http(self):
self.assertEqual('http://0.0.0.0:0',
hypothesis.find(url.http(),
lambda v: v.startswith('http')))
self.assertEqual('https://0.0.0.0:0',
hypothesis.find(url.http(),
lambda v: v.startswith('https')))
def test_url(self):
self.assertEqual('a://0.0.0.0:0',
hypothesis.find(url.url(), lambda _: True))
import ipaddress
import operator
import string
import urllib.parse
from functools import partial
import hypothesis
import hypothesis.strategies as st
__all__ = (
'http',
'url',
'scheme',
'hierarchical_part',
'authority',
'userinfo',
'username',
'password',
'host',
'ipv4',
'ipv6',
'ipvfuture',
'hostname',
'domainlabel',
'toplabel',
'port',
'path',
'query',
'fragment',
'build_url',
'build_authority',
'build_userinfo'
)
ALPHA = string.ascii_letters
DIGIT = string.digits
ALPHA_DIGIT = ALPHA + DIGIT
HEXDIG = string.hexdigits
UNRESERVED = ALPHA + DIGIT + '-._~'
GEN_DELIMS = ':/?#[]@'
SUB_DEMIMS = '!$^\'()*+,;='
RESERVED = GEN_DELIMS + SUB_DEMIMS
def http(username_strategy=None,
password_strategy=None,
host_strategy=None,
port_strategy=None,
path_strategy=None,
segment_strategy=None,
query_strategy=None,
fragment_strategy=None):
"""Generates HTTP/HTTPS URLs."""
return url(scheme_strategy=st.sampled_from(('http', 'https')),
username_strategy=username_strategy,
password_strategy=password_strategy,
host_strategy=host_strategy,
port_strategy=port_strategy,
path_strategy=path_strategy,
segment_strategy=segment_strategy,
query_strategy=query_strategy,
fragment_strategy=fragment_strategy)
def url(scheme_strategy=None,
username_strategy=None,
password_strategy=None,
host_strategy=None,
port_strategy=None,
path_strategy=None,
segment_strategy=None,
query_strategy=None,
fragment_strategy=None):
"""Generates random URLs by given strategies of each part."""
return st.builds(build_url,
scheme(scheme_strategy),
authority(username_strategy=username_strategy,
password_strategy=password_strategy,
host_strategy=host_strategy,
port_strategy=port_strategy),
path(path_strategy, segment_strategy),
query(query_strategy),
fragment(fragment_strategy))
def scheme(strategy=None):
"""Generates URL schemes per :rfc:`3986#3.1`::
scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
"""
if strategy is None:
return st.tuples(st.text(alphabet=ALPHA, min_size=1),
st.text(alphabet=ALPHA_DIGIT + '+-.')).map(''.join)
return strategy
def authority(username_strategy=None,
password_strategy=None,
host_strategy=None,
port_strategy=None):
return st.builds(build_authority,
userinfo(username_strategy=username_strategy,
password_strategy=password_strategy),
host(host_strategy),
port(port_strategy))
def userinfo(username_strategy=None, password_strategy=None):
return st.builds(build_userinfo,
username(username_strategy),
password(password_strategy))
def username(strategy=None):
if strategy is None:
strategy = st.text()
return strategy.map(quote)
def password(strategy=None):
if strategy is None:
strategy = st.text()
return strategy.map(quote)
def host(strategy=None):
"""Generates hosts per :rfc:`3986#3.2.2`::
host = IP-literal / IPv4address / reg-name
"""
if strategy is None:
return ipv4() | ip_literal() | hostname()
return strategy
def ipv4():
"""Generates IPv4 addresses using ``ipaddress.IPv4Address`` object::
IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
dec-octet = DIGIT ; 0-9
/ %x31-39 DIGIT ; 10-99
/ "1" 2DIGIT ; 100-199
/ "2" %x30-34 DIGIT ; 200-249
/ "25" %x30-35 ; 250-255
"""
return st.builds(
ipaddress.IPv4Address,
st.integers(min_value=0, max_value=(2 ** ipaddress.IPV4LENGTH) - 1)
).map(str)
def ip_literal():
"""Generates IP literals (IPv6 and IPvFuture) per :rfc:`3986#3.2.2`."""
return (ipv6() | ipvfuture()).map(lambda s: '[%s]' % s)
def ipv6():
"""Generates IPv6 addresses using ``ipaddress.IPv6Address`` object::
IPv6address = 6( h16 ":" ) ls32
/ "::" 5( h16 ":" ) ls32
/ [ h16 ] "::" 4( h16 ":" ) ls32
/ [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
/ [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
/ [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
/ [ *4( h16 ":" ) h16 ] "::" ls32
/ [ *5( h16 ":" ) h16 ] "::" h16
/ [ *6( h16 ":" ) h16 ] "::"
ls32 = ( h16 ":" h16 ) / IPv4address
; least-significant 32 bits of address
h16 = 1*4HEXDIG
; 16 bits of address represented in hexadecimal
"""
return st.builds(
ipaddress.IPv6Address,
st.integers(min_value=0, max_value=(2 ** ipaddress.IPV6LENGTH) - 1)
).map(str)
def ipvfuture():
"""Generates IPvFuture addresses per :rfc:`3986#3.2.2`::
IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
"""
return st.tuples(
st.just('v'),
st.text(alphabet=HEXDIG, min_size=1, max_size=1),
st.just('.'),
st.text(alphabet=UNRESERVED + SUB_DEMIMS + ':', min_size=1)
).map(''.join)
def hostname():
"""Generates domain names per :rfc:`1738`::
hostname = *[ domainlabel "." ] toplabel
IDNA host names are not implemented.
"""
return st.tuples(
st.lists(domainlabel(), min_size=1).map('.'.join),
toplabel()
).map('.'.join).filter(lambda s: len(s) < 256)
@st.composite
def domainlabel(draw):
"""Generates domain labels per :rfc:`1738`::
domainlabel = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit
"""
head = draw(st.text(alphabet=ALPHA_DIGIT, min_size=1, max_size=1))
body = draw(st.text(alphabet=ALPHA_DIGIT + '-', max_size=61))
if body and body[-1] == '-':
tail = draw(st.text(alphabet=ALPHA_DIGIT, min_size=1, max_size=1))
else:
tail = ''
return ''.join((head, body, tail))
@st.composite
def toplabel(draw):
"""Generates top labels per :rfc:`1738`::
toplabel = alpha | alpha *[ alphadigit | "-" ] alphadigit
"""
head = draw(st.text(alphabet=ALPHA, min_size=1, max_size=1))
body = draw(st.text(alphabet=ALPHA_DIGIT + '-', max_size=61))
if body and body[-1] == '-':
tail = draw(st.text(alphabet=ALPHA_DIGIT, min_size=1, max_size=1))
else:
tail = ''
return ''.join((head, body, tail))
def port(strategy=None):
"""Generates port number per :rfc:`3986#3.2.3`::
port = *DIGIT
Produced string values are in range ``[0..65535]`` or empty string
for the cases when implicit port assumes.
"""
if strategy is None:
return st.integers(min_value=-65535, max_value=65535).map(
# Negative value is for a balance between implicit and explicit
# port definition
lambda i: '' if i < 0 else str(i))
return strategy
def path(strategy=None, segment_strategy=None):
if strategy is None:
return path_abempty(segment_strategy)
return strategy
def path_abempty(segment_strategy=None):
"""Generates absolute paths allowing empty ones per :rfc:`3986#3.3`::
path-abempty = *( "/" segment )
"""
return st.lists(
segment(segment_strategy).map(partial(operator.add, '/'))
).map(''.join)
@st.composite
def path_absolute(draw, segment_strategy=None):
"""Generates strictly absolute paths per :rfc:`3986#3.3`::
path-absolute = "/" [ segment-nz *( "/" segment ) ]
"""
acc = ['/']
num_segments = draw(st.integers(min_value=1, max_value=255))
acc.append(draw(segment_nz(segment_strategy)))
acc.extend(draw(st.lists(
segment(segment_strategy).map(partial(operator.add, '/')),
max_size=num_segments - 1 or 0,
)))
return ''.join(acc)
def path_noscheme(segment_strategy=None):
"""Generates paths for scheme-less URL's per :rfc:`3986#3.3`::
path-noscheme = segment-nz-nc *( "/" segment )
"""
return st.tuples(segment_nz_nc(segment_strategy),
path_abempty(segment_strategy)).map(''.join)
def path_rootless(segment_strategy=None):
"""Generates rootless paths per :rfc:`3986#3.3`::
path-rootless = segment-nz *( "/" segment )
"""
return st.tuples(segment_nz(segment_strategy),
path_abempty(segment_strategy)).map(''.join)
def path_empty(segment_strategy=None):
"""Generates empty paths per :rfc:`3986#3.3`::
path-empty = 0<pchar>
"""
return st.just('')
def segment(strategy=None):
"""Generates path segments per :rfc:`3986#3.3`::
segment = *pchar
"""
if strategy is None:
strategy = st.text()
return strategy.map(quote_pchar)
def segment_nz(strategy=None):
"""Generates non-empty path segments per :rfc:`3986#3.3`::
segment_nz = 1*pchar
"""
if strategy is None:
strategy = st.text(min_size=1)
return strategy.map(quote_pchar)
def segment_nz_nc(strategy=None):
"""Generates non-empty path segments without colon per :rfc:`3986#3.3`::
segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
"""
if strategy is None:
strategy = st.text(min_size=1)
return strategy.map(quote_segment_nz_nc)
def query(strategy=None):
"""Generates query parameters per :rfc:`3986#3.4`::
query = *( pchar / "/" / "?" )
"""
if strategy is None:
return st.lists(st.tuples(st.text().map(quote_query),
st.text().map(quote_query)).map('='.join)
).map('&'.join)
return strategy
def fragment(strategy=None):
"""Generates fragments per :rfc:`3986#3.5`::
query = *( pchar / "/" / "?" )
"""
if strategy is None:
strategy = st.text()
return strategy.map(quote_fragment)
def build_url(scheme, authority, path, query, fragment):
acc = []
acc.append(scheme)
acc.append(':')
if authority:
acc.append('//')
acc.append(authority)
if path:
if not authority:
hypothesis.assume(not path.startswith('//'))
acc.append(path)
if query:
acc.append('?')
acc.append(query)
if fragment:
acc.append('#')
acc.append(fragment)
return ''.join(acc)
def build_hierarchical_part(authority, path):
return ''.join((authority, path))
def build_authority(userinfo, host, port):
acc = []
if userinfo:
acc.append(userinfo)
acc.append('@')
acc.append(host)
if port:
acc.append(':')
acc.append(port)
return ''.join(acc)
def build_userinfo(username, password):
acc = []
if username:
acc.append(username)
if password:
acc.append(':')
acc.append(password)
return ''.join(acc)
def quote(s):
return urllib.parse.quote(s, safe='')
def quote_pchar(s):
return urllib.parse.quote(s, safe=pchar())
def quote_segment_nz_nc(s):
return urllib.parse.quote(s, safe='@' + SUB_DEMIMS)
def quote_query(s):
return urllib.parse.quote(s, safe=pchar() + '/?')
def quote_fragment(s):
return urllib.parse.quote(s, safe=pchar() + '/?')
def pchar():
return SUB_DEMIMS + ':@'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment