Skip to content

Instantly share code, notes, and snippets.

@internaut
Last active November 7, 2019 15:24
Show Gist options
  • Save internaut/3ab8823856ff5d1f47bd24afe5eaac45 to your computer and use it in GitHub Desktop.
Save internaut/3ab8823856ff5d1f47bd24afe5eaac45 to your computer and use it in GitHub Desktop.
Split a string by multiple characters/strings. Test the function with pytest and hypothesis.
def str_multisplit(s, sep):
"""
Split string `s` by all characters/strings in `sep`.
:param s: a string to split
:param sep: sequence or set of characters to use for splitting
:return: list of split string parts
"""
if not isinstance(s, (str, bytes)):
raise ValueError('`s` must be of type `str` or `bytes`')
if not isinstance(sep, (list, tuple, set)):
raise ValueError('`sep` must be of type `list`, `tuple` or `set`')
if '' in sep:
raise ValueError('`sep` must not contain an empty string')
parts = [s]
for c in sep:
parts_ = []
for p in parts:
parts_.extend(p.split(c))
parts = parts_
return parts
import string
import pytest
from hypothesis import given
from hypothesis import strategies as st
from multisplit import str_multisplit
punct = list(string.punctuation)
@pytest.mark.parametrize('s, sep, res', [
('Te;s,t', {';', ','}, ['Te', 's', 't']),
('US-Student', punct, ['US', 'Student']),
('-main_file.exe,', punct, ['', 'main', 'file', 'exe', '']),
])
def test_str_multisplit(s, sep, res):
assert str_multisplit(s, sep) == res
#@given(s=st.text(), sep=st.lists(st.text(min_size=1, max_size=10))) # <- try this and see how it fails
@given(s=st.text(), sep=st.lists(st.characters()))
def test_str_multisplit_hypothesis(s, sep):
res = str_multisplit(s, sep)
# 1. always return a list
assert type(res) is list
# 2. if argument s is an empty string, result must be ['']
if len(s) == 0:
assert res == ['']
# 3. if sep is an empty sequence, result must be a list containing only the input s, i.e. [s]
if len(sep) == 0:
assert res == [s]
# 4. each substring must ...
for p in res:
assert p in s # ... be a substring of s, too
assert all(c not in p for c in sep) # ... not contain any of separator strings sep
# 5. number of substrings in the result equals sum of the occurrences of each *unique* sep-item c in s plus 1
n_asserted_parts = 0
for c in set(sep):
n_asserted_parts += s.count(c)
assert len(res) == n_asserted_parts + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment