Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
python string split iterator
#! /usr/bin/env python3
import sys
import re
def isplit(string, delimiter = None):
"""Like string.split but returns an iterator (lazy)
Multiple character delimters are not handled.
"""
if delimiter is None:
# Handle whitespace by default
delim = r"\s"
elif len(delimiter) != 1:
raise ValueError("Can only handle single character delimiters", delimiter)
else:
# Escape, incase it's "\", "*" etc.
delim = re.escape(delimiter)
return (x.group(0) for x in re.finditer(r"[^{}]+".format(delim), string))
def main():
# Wrapper to make it a list
def helper(*args, **kwargs):
return list(isplit(*args, **kwargs))
# Normal delimiters
assert helper("1,2,3", ",") == ["1", "2", "3"]
assert helper("1;2;3,", ";") == ["1", "2", "3,"]
assert helper("1;2 ;3, ", ";") == ["1", "2 ", "3, "]
# Whitespace
assert helper("1 2 3") == ["1", "2", "3"]
assert helper("1\t2\t3") == ["1", "2", "3"]
assert helper("1\t2 \t3") == ["1", "2", "3"]
assert helper("1\n2\n3") == ["1", "2", "3"]
# Surrounding whitespace dropped
assert helper(" 1 2 3 ") == ["1", "2", "3"]
# Regex special characters
assert helper(r"1\2\3", "\\") == ["1", "2", "3"]
assert helper(r"1*2*3", "*") == ["1", "2", "3"]
# No multi-char delimiters allowed
try:
helper(r"1,.2,.3", ",.")
assert False
except ValueError:
pass
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.