Split a string by multiple characters/strings. Test the function with pytest and hypothesis.
def str_multisplit(s, sep): | |
""" | |
Split string `s` by all characters/strings in `sep`. | |
:param s: a string to split | |
:param sep: sequence or set of characters to use for splitting | |
:return: list of split string parts | |
""" | |
if not isinstance(s, (str, bytes)): | |
raise ValueError('`s` must be of type `str` or `bytes`') | |
if not isinstance(sep, (list, tuple, set)): | |
raise ValueError('`sep` must be of type `list`, `tuple` or `set`') | |
if '' in sep: | |
raise ValueError('`sep` must not contain an empty string') | |
parts = [s] | |
for c in sep: | |
parts_ = [] | |
for p in parts: | |
parts_.extend(p.split(c)) | |
parts = parts_ | |
return parts |
import string | |
import pytest | |
from hypothesis import given | |
from hypothesis import strategies as st | |
from multisplit import str_multisplit | |
punct = list(string.punctuation) | |
@pytest.mark.parametrize('s, sep, res', [ | |
('Te;s,t', {';', ','}, ['Te', 's', 't']), | |
('US-Student', punct, ['US', 'Student']), | |
('-main_file.exe,', punct, ['', 'main', 'file', 'exe', '']), | |
]) | |
def test_str_multisplit(s, sep, res): | |
assert str_multisplit(s, sep) == res | |
#@given(s=st.text(), sep=st.lists(st.text(min_size=1, max_size=10))) # <- try this and see how it fails | |
@given(s=st.text(), sep=st.lists(st.characters())) | |
def test_str_multisplit_hypothesis(s, sep): | |
res = str_multisplit(s, sep) | |
# 1. always return a list | |
assert type(res) is list | |
# 2. if argument s is an empty string, result must be [''] | |
if len(s) == 0: | |
assert res == [''] | |
# 3. if sep is an empty sequence, result must be a list containing only the input s, i.e. [s] | |
if len(sep) == 0: | |
assert res == [s] | |
# 4. each substring must ... | |
for p in res: | |
assert p in s # ... be a substring of s, too | |
assert all(c not in p for c in sep) # ... not contain any of separator strings sep | |
# 5. number of substrings in the result equals sum of the occurrences of each *unique* sep-item c in s plus 1 | |
n_asserted_parts = 0 | |
for c in set(sep): | |
n_asserted_parts += s.count(c) | |
assert len(res) == n_asserted_parts + 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment