internaut/multisplit.py

## multisplit.py
def str_multisplit(s, sep):
    """
    Split string `s` by all characters/strings in `sep`.

    :param s: a string to split
    :param sep: sequence or set of characters to use for splitting
    :return: list of split string parts
    """
    if not isinstance(s, (str, bytes)):
        raise ValueError('`s` must be of type `str` or `bytes`')

    if not isinstance(sep, (list, tuple, set)):
        raise ValueError('`sep` must be of type `list`, `tuple` or `set`')

    if '' in sep:
        raise ValueError('`sep` must not contain an empty string')

    parts = [s]
    for c in sep:
        parts_ = []
        for p in parts:
            parts_.extend(p.split(c))
        parts = parts_

    return parts

## test_multisplit.py
import string

import pytest
from hypothesis import given
from hypothesis import strategies as st

from multisplit import str_multisplit

punct = list(string.punctuation)

@pytest.mark.parametrize('s, sep, res', [
    ('Te;s,t', {';', ','}, ['Te', 's', 't']),
    ('US-Student', punct, ['US', 'Student']),
    ('-main_file.exe,', punct, ['', 'main', 'file', 'exe', '']),
])
def test_str_multisplit(s, sep, res):
    assert str_multisplit(s, sep) == res


#@given(s=st.text(), sep=st.lists(st.text(min_size=1, max_size=10)))  # <- try this and see how it fails
@given(s=st.text(), sep=st.lists(st.characters()))
def test_str_multisplit_hypothesis(s, sep):
    res = str_multisplit(s, sep)

    # 1. always return a list
    assert type(res) is list

    # 2. if argument s is an empty string, result must be ['']
    if len(s) == 0:
        assert res == ['']

    # 3. if sep is an empty sequence, result must be a list containing only the input s, i.e. [s]
    if len(sep) == 0:
        assert res == [s]

    # 4. each substring must ...
    for p in res:
        assert p in s                          # ... be a substring of s, too
        assert all(c not in p for c in sep)    # ... not contain any of separator strings sep

    # 5. number of substrings in the result equals sum of the occurrences of each *unique* sep-item c in s plus 1
    n_asserted_parts = 0
    for c in set(sep):
        n_asserted_parts += s.count(c)
    assert len(res) == n_asserted_parts + 1
	def str_multisplit(s, sep):
	"""
	Split string `s` by all characters/strings in `sep`.

	:param s: a string to split
	:param sep: sequence or set of characters to use for splitting
	:return: list of split string parts
	"""
	if not isinstance(s, (str, bytes)):
	raise ValueError('`s` must be of type `str` or `bytes`')

	if not isinstance(sep, (list, tuple, set)):
	raise ValueError('`sep` must be of type `list`, `tuple` or `set`')

	if '' in sep:
	raise ValueError('`sep` must not contain an empty string')

	parts = [s]
	for c in sep:
	parts_ = []
	for p in parts:
	parts_.extend(p.split(c))
	parts = parts_

	return parts
	import string

	import pytest
	from hypothesis import given
	from hypothesis import strategies as st

	from multisplit import str_multisplit

	punct = list(string.punctuation)

	@pytest.mark.parametrize('s, sep, res', [
	('Te;s,t', {';', ','}, ['Te', 's', 't']),
	('US-Student', punct, ['US', 'Student']),
	('-main_file.exe,', punct, ['', 'main', 'file', 'exe', '']),
	])
	def test_str_multisplit(s, sep, res):
	assert str_multisplit(s, sep) == res


	#@given(s=st.text(), sep=st.lists(st.text(min_size=1, max_size=10))) # <- try this and see how it fails
	@given(s=st.text(), sep=st.lists(st.characters()))
	def test_str_multisplit_hypothesis(s, sep):
	res = str_multisplit(s, sep)

	# 1. always return a list
	assert type(res) is list

	# 2. if argument s is an empty string, result must be ['']
	if len(s) == 0:
	assert res == ['']

	# 3. if sep is an empty sequence, result must be a list containing only the input s, i.e. [s]
	if len(sep) == 0:
	assert res == [s]

	# 4. each substring must ...
	for p in res:
	assert p in s # ... be a substring of s, too
	assert all(c not in p for c in sep) # ... not contain any of separator strings sep

	# 5. number of substrings in the result equals sum of the occurrences of each unique sep-item c in s plus 1
	n_asserted_parts = 0
	for c in set(sep):
	n_asserted_parts += s.count(c)
	assert len(res) == n_asserted_parts + 1