Last active
September 27, 2019 22:57
-
-
Save TimSC/ded453db47598207d650d704492ccd45 to your computer and use it in GitHub Desktop.
Split a quoted string by delimiter using python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import unicode_literals | |
from __future__ import print_function | |
def SplitQuoted(inStr, delim=','): | |
if '"' in delim or '\\' in delim: | |
raise ValueError("Delimiter not supported") | |
l = len(inStr) | |
#Find escaped characters (so they can be ignored) | |
c = 0 | |
escapedPos = set() | |
while c < l: | |
p = inStr.find("\\", c) | |
if p != -1: | |
escapedPos.add(p+1) | |
c = p+2 | |
else: | |
c = l | |
#Identify quoted sections | |
c = 0 | |
inquoted = False | |
startPos = None | |
quotedRanges = [] | |
while c < l: | |
p = inStr.find('"', c) | |
if p != -1: | |
if p not in escapedPos: | |
inquoted = not inquoted | |
if inquoted: | |
startPos = p | |
else: | |
quotedRanges.append((startPos, p)) | |
c = p+1 | |
else: | |
c = l | |
if inquoted: #Quote not closed | |
quotedRanges.append((startPos, l)) | |
#Find delimiter markers | |
c = 0 | |
rc = 0 | |
out = [] | |
prevP = 0 | |
while c < l: | |
p = inStr.find(delim, c) | |
inquoted = False | |
if len(quotedRanges) > 0: | |
while p > quotedRanges[rc][1] and rc < len(quotedRanges)-1: | |
rc += 1 | |
inquoted = p > quotedRanges[rc][0] and p < quotedRanges[rc][1] | |
escaped = p in escapedPos | |
if not inquoted and not escaped and p!=-1: | |
out.append(inStr[prevP:p]) | |
prevP = p+len(delim) | |
if p != -1: | |
c = p+len(delim) | |
else: | |
c = l | |
out.append(inStr[prevP:l]) | |
return out | |
if __name__=="__main__": | |
t = 'a = foo, b = bar, c = "foo, bar", d = false, e = "false", f = "foo\\", bar", "xxx' | |
r = SplitQuoted(t) | |
chk = r == ['a = foo', ' b = bar', ' c = "foo, bar"', ' d = false', ' e = "false"', ' f = "foo\\", bar"', ' "xxx'] | |
print (chk) | |
t = 'a = foo, b = bar, c = "foo, bar", d = false, e = "false", f = "foo\\", bar"' | |
r = SplitQuoted(t) | |
chk = r == ['a = foo', ' b = bar', ' c = "foo, bar"', ' d = false', ' e = "false"', ' f = "foo\\", bar"'] | |
print (chk) | |
t = '' | |
r = SplitQuoted(t) | |
chk = r == [''] | |
print (chk) | |
t = ',' | |
r = SplitQuoted(t) | |
chk = r == ['', ''] | |
print (chk) | |
t = '",",""' | |
r = SplitQuoted(t) | |
chk = r == ['","', '""'] | |
print (chk) | |
t = 'a=foo; b=bar; c="foo; bar"; d=false; e="false"; f="foo\\"; bar"' | |
r = SplitQuoted(t, ';') | |
chk = r == ['a=foo', ' b=bar', ' c="foo; bar"', ' d=false', ' e="false"', ' f="foo\\"; bar"'] | |
print (chk) | |
t = "\\," | |
r = SplitQuoted(t) | |
chk = r == ["\\,"] | |
print (chk) | |
t = ',"' | |
r = SplitQuoted(t) | |
chk = r == ['', '"'] | |
print (chk) | |
t = '",' | |
r = SplitQuoted(t) | |
chk = r == ['",'] | |
print (chk) | |
t = 'a' | |
r = SplitQuoted(t) | |
chk = r == ['a'] | |
print (chk) | |
t = 'gr<>rg' | |
r = SplitQuoted(t, '<>') | |
chk = r == ['gr', 'rg'] | |
print (chk) | |
t = 'gr<>rg"<>"foo<>ed>ple<fwr' | |
r = SplitQuoted(t, '<>') | |
chk = r == ['gr', 'rg"<>"foo', 'ed>ple<fwr'] | |
print (chk) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment