Skip to content

Instantly share code, notes, and snippets.

@alexwlchan
Created August 30, 2016 20:33
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save alexwlchan/1956efe1acb1f2947cbd575651a3d529 to your computer and use it in GitHub Desktop.
Save alexwlchan/1956efe1acb1f2947cbd575651a3d529 to your computer and use it in GitHub Desktop.
Helper functions for dealing with query strings in URLs
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
Utility functions for dealing with URL query strings in Python,
i.e., URLs of the form
http://example.net?field1=value1&field2=value2
This module includes a pair of helper functions: one for getting the values
associated with a particular field; another for setting the query string
values within a URL.
"""
try: # Python 3+
from urllib.parse import (
parse_qs, parse_qsl, urlencode, urlparse, urlunparse
)
except ImportError: # Python 2
from urllib import urlencode
from urlparse import parse_qs, parse_qsl, urlparse, urlunparse
def get_query_field(url, field):
"""
Given a URL, return a list of values for the given ``field`` in the
URL's query string.
>>> get_query_field('http://example.net', field='foo')
[]
>>> get_query_field('http://example.net?foo=bar', field='foo')
['bar']
>>> get_query_field('http://example.net?foo=bar&foo=baz', field='foo')
['bar', 'baz']
"""
try:
return parse_qs(urlparse(url).query)[field]
except KeyError:
return []
def set_query_field(url, field, value, replace=False):
"""
Given a URL and a new field/value pair, add the new field/value to
the URL's query string. If ``replace`` is True, replace any existing
instances of this field. e.g.
>>> set_query_field('http://example.net', field='hello', value='world')
'http://example.net?hello=world'
>>> set_query_field('http://example.net?hello=world', 'hello', 'alex')
'http://example.net?hello=world&hello=alex'
>>> set_query_field('http://example.net?hello=world',
field='hello', value='alex', replace=True)
'http://example.net?hello=alex'
>>> set_query_field('http://example.net?hello=world&foo=bar',
field='hello', value='alex', replace=True)
'http://example.net?hello=alex&foo=bar'
"""
# Parse out the different parts of the URL.
components = urlparse(url)
query_pairs = parse_qsl(urlparse(url).query)
if replace:
query_pairs = [(f, v) for (f, v) in query_pairs if f != field]
query_pairs.append((field, value))
new_query_str = urlencode(query_pairs)
# Finally, construct the new URL
new_components = (
components.scheme,
components.netloc,
components.path,
components.params,
new_query_str,
components.fragment
)
return urlunparse(new_components)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
Tests for my utility functions for dealing with URL query strings.
"""
import string
from hypothesis import given
from hypothesis.strategies import text
import pytest
from myurlparse import get_query_field, set_query_field
@pytest.mark.parametrize(
'url,field,expected', [
('http://example.net', 'foo', []),
('http://example.net?foo=bar', 'foo', ['bar']),
('http://example.net?foo=bar&foo=baz', 'foo', ['bar', 'baz'])
]
)
def test_simple_examples_of_query_get(url, field, expected):
"""
Test some simple examples with get_query_field().
"""
assert get_query_field(url, field) == expected
@pytest.mark.parametrize(
'url,field,value,expected', [
('http://example.net', 'hello', 'world', 'http://example.net?hello=world'),
('http://example.net?hello=world', 'hello', 'alex', 'http://example.net?hello=world&hello=alex')
]
)
def test_simple_examples_of_query_set(url, field, value, expected):
"""
Test some simple examples with set_query_field() and replace=False.
"""
assert set_query_field(url, field, value, replace=False) == expected
def test_replacing_url_component():
"""
Test changing an query string with an existing field with replace=True.
"""
url = 'http://example.net?hello=world'
expected = 'http://example.net?hello=alex'
assert set_query_field(url, 'hello', 'alex', replace=True) == expected
@given(text(min_size=1))
def test_a_url_with_no_query_string_always_has_empty_values(field):
"""
If a URL doesn't have a query string, then retrieving field values
always yields an empty list.
"""
url = 'http://example.net'
assert get_query_field(url, field) == []
@given(
text(min_size=1, alphabet=string.ascii_letters),
text(min_size=1, alphabet=string.ascii_letters)
)
def test_adding_a_field_is_included_in_the_string(field, value):
"""
If we add a (field, value) pair to a URL, then they are both present
in the final URL.
"""
url = set_query_field('http://example.net', field, value)
assert field in url
assert value in url
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment