Created
March 27, 2020 14:24
-
-
Save cupdike/2d3ce5b3aa31a77f6b27d400d7c531b9 to your computer and use it in GitHub Desktop.
Python string.partition() example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Demonstrates string.partition() to split a string by a sequence of delimiters. | |
# Not terribly useful, can do with regex pretty easily. | |
s = "apple AND banana AND cherry AND date OR elderberry BUT fig" | |
delims = [" AND "]*3 + [" OR ", " BUT "] | |
# [' AND ', ' AND ', ' AND ', ' OR ', ' BUT '] | |
def splitByDelimList(str, delimList): | |
delims = delimList.copy() | |
p = str.partition(delims.pop(0)) | |
print("p:\t", p) | |
tokens = [p[0]] | |
while p[2] and delims: | |
p = p[2].partition(delims.pop(0)) | |
print("p:\t", p) | |
tokens.append(p[0]) | |
else: | |
# Get p[2] if no more delims | |
tokens.append(p[2]) | |
return tokens | |
tokensByPartition = splitByDelimList(s, delims) | |
p: ('apple', ' AND ', 'banana AND cherry AND date OR elderberry BUT fig') | |
p: ('banana', ' AND ', 'cherry AND date OR elderberry BUT fig') | |
p: ('cherry', ' AND ', 'date OR elderberry BUT fig') | |
p: ('date', ' OR ', 'elderberry BUT fig') | |
p: ('elderberry', ' BUT ', 'fig') | |
assert tokensByPartition == ["apple", "banana", "cherry", "date" , "elderberry", "fig"] | |
### Using regex | |
import re | |
regexDelims = '|'.join([re.escape(delim) for delim in delims]) | |
print(regexDelims) | |
# \ AND\ |\ AND\ |\ AND\ |\ OR\ |\ BUT\ | |
tokensRegex = re.split(regexDelims, s) | |
assert tokensRegex == tokensByPartition |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment