Created
August 30, 2016 20:33
-
-
Save alexwlchan/1956efe1acb1f2947cbd575651a3d529 to your computer and use it in GitHub Desktop.
Helper functions for dealing with query strings in URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- encoding: utf-8 -*- | |
""" | |
Utility functions for dealing with URL query strings in Python, | |
i.e., URLs of the form | |
http://example.net?field1=value1&field2=value2 | |
This module includes a pair of helper functions: one for getting the values | |
associated with a particular field; another for setting the query string | |
values within a URL. | |
""" | |
try: # Python 3+ | |
from urllib.parse import ( | |
parse_qs, parse_qsl, urlencode, urlparse, urlunparse | |
) | |
except ImportError: # Python 2 | |
from urllib import urlencode | |
from urlparse import parse_qs, parse_qsl, urlparse, urlunparse | |
def get_query_field(url, field): | |
""" | |
Given a URL, return a list of values for the given ``field`` in the | |
URL's query string. | |
>>> get_query_field('http://example.net', field='foo') | |
[] | |
>>> get_query_field('http://example.net?foo=bar', field='foo') | |
['bar'] | |
>>> get_query_field('http://example.net?foo=bar&foo=baz', field='foo') | |
['bar', 'baz'] | |
""" | |
try: | |
return parse_qs(urlparse(url).query)[field] | |
except KeyError: | |
return [] | |
def set_query_field(url, field, value, replace=False): | |
""" | |
Given a URL and a new field/value pair, add the new field/value to | |
the URL's query string. If ``replace`` is True, replace any existing | |
instances of this field. e.g. | |
>>> set_query_field('http://example.net', field='hello', value='world') | |
'http://example.net?hello=world' | |
>>> set_query_field('http://example.net?hello=world', 'hello', 'alex') | |
'http://example.net?hello=world&hello=alex' | |
>>> set_query_field('http://example.net?hello=world', | |
field='hello', value='alex', replace=True) | |
'http://example.net?hello=alex' | |
>>> set_query_field('http://example.net?hello=world&foo=bar', | |
field='hello', value='alex', replace=True) | |
'http://example.net?hello=alex&foo=bar' | |
""" | |
# Parse out the different parts of the URL. | |
components = urlparse(url) | |
query_pairs = parse_qsl(urlparse(url).query) | |
if replace: | |
query_pairs = [(f, v) for (f, v) in query_pairs if f != field] | |
query_pairs.append((field, value)) | |
new_query_str = urlencode(query_pairs) | |
# Finally, construct the new URL | |
new_components = ( | |
components.scheme, | |
components.netloc, | |
components.path, | |
components.params, | |
new_query_str, | |
components.fragment | |
) | |
return urlunparse(new_components) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- encoding: utf-8 -*- | |
""" | |
Tests for my utility functions for dealing with URL query strings. | |
""" | |
import string | |
from hypothesis import given | |
from hypothesis.strategies import text | |
import pytest | |
from myurlparse import get_query_field, set_query_field | |
@pytest.mark.parametrize( | |
'url,field,expected', [ | |
('http://example.net', 'foo', []), | |
('http://example.net?foo=bar', 'foo', ['bar']), | |
('http://example.net?foo=bar&foo=baz', 'foo', ['bar', 'baz']) | |
] | |
) | |
def test_simple_examples_of_query_get(url, field, expected): | |
""" | |
Test some simple examples with get_query_field(). | |
""" | |
assert get_query_field(url, field) == expected | |
@pytest.mark.parametrize( | |
'url,field,value,expected', [ | |
('http://example.net', 'hello', 'world', 'http://example.net?hello=world'), | |
('http://example.net?hello=world', 'hello', 'alex', 'http://example.net?hello=world&hello=alex') | |
] | |
) | |
def test_simple_examples_of_query_set(url, field, value, expected): | |
""" | |
Test some simple examples with set_query_field() and replace=False. | |
""" | |
assert set_query_field(url, field, value, replace=False) == expected | |
def test_replacing_url_component(): | |
""" | |
Test changing an query string with an existing field with replace=True. | |
""" | |
url = 'http://example.net?hello=world' | |
expected = 'http://example.net?hello=alex' | |
assert set_query_field(url, 'hello', 'alex', replace=True) == expected | |
@given(text(min_size=1)) | |
def test_a_url_with_no_query_string_always_has_empty_values(field): | |
""" | |
If a URL doesn't have a query string, then retrieving field values | |
always yields an empty list. | |
""" | |
url = 'http://example.net' | |
assert get_query_field(url, field) == [] | |
@given( | |
text(min_size=1, alphabet=string.ascii_letters), | |
text(min_size=1, alphabet=string.ascii_letters) | |
) | |
def test_adding_a_field_is_included_in_the_string(field, value): | |
""" | |
If we add a (field, value) pair to a URL, then they are both present | |
in the final URL. | |
""" | |
url = set_query_field('http://example.net', field, value) | |
assert field in url | |
assert value in url |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment