Skip to content

Instantly share code, notes, and snippets.

@TimSC
Last active September 27, 2019 22:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TimSC/ded453db47598207d650d704492ccd45 to your computer and use it in GitHub Desktop.
Save TimSC/ded453db47598207d650d704492ccd45 to your computer and use it in GitHub Desktop.
Split a quoted string by delimiter using python
from __future__ import unicode_literals
from __future__ import print_function
def SplitQuoted(inStr, delim=','):
if '"' in delim or '\\' in delim:
raise ValueError("Delimiter not supported")
l = len(inStr)
#Find escaped characters (so they can be ignored)
c = 0
escapedPos = set()
while c < l:
p = inStr.find("\\", c)
if p != -1:
escapedPos.add(p+1)
c = p+2
else:
c = l
#Identify quoted sections
c = 0
inquoted = False
startPos = None
quotedRanges = []
while c < l:
p = inStr.find('"', c)
if p != -1:
if p not in escapedPos:
inquoted = not inquoted
if inquoted:
startPos = p
else:
quotedRanges.append((startPos, p))
c = p+1
else:
c = l
if inquoted: #Quote not closed
quotedRanges.append((startPos, l))
#Find delimiter markers
c = 0
rc = 0
out = []
prevP = 0
while c < l:
p = inStr.find(delim, c)
inquoted = False
if len(quotedRanges) > 0:
while p > quotedRanges[rc][1] and rc < len(quotedRanges)-1:
rc += 1
inquoted = p > quotedRanges[rc][0] and p < quotedRanges[rc][1]
escaped = p in escapedPos
if not inquoted and not escaped and p!=-1:
out.append(inStr[prevP:p])
prevP = p+len(delim)
if p != -1:
c = p+len(delim)
else:
c = l
out.append(inStr[prevP:l])
return out
if __name__=="__main__":
t = 'a = foo, b = bar, c = "foo, bar", d = false, e = "false", f = "foo\\", bar", "xxx'
r = SplitQuoted(t)
chk = r == ['a = foo', ' b = bar', ' c = "foo, bar"', ' d = false', ' e = "false"', ' f = "foo\\", bar"', ' "xxx']
print (chk)
t = 'a = foo, b = bar, c = "foo, bar", d = false, e = "false", f = "foo\\", bar"'
r = SplitQuoted(t)
chk = r == ['a = foo', ' b = bar', ' c = "foo, bar"', ' d = false', ' e = "false"', ' f = "foo\\", bar"']
print (chk)
t = ''
r = SplitQuoted(t)
chk = r == ['']
print (chk)
t = ','
r = SplitQuoted(t)
chk = r == ['', '']
print (chk)
t = '",",""'
r = SplitQuoted(t)
chk = r == ['","', '""']
print (chk)
t = 'a=foo; b=bar; c="foo; bar"; d=false; e="false"; f="foo\\"; bar"'
r = SplitQuoted(t, ';')
chk = r == ['a=foo', ' b=bar', ' c="foo; bar"', ' d=false', ' e="false"', ' f="foo\\"; bar"']
print (chk)
t = "\\,"
r = SplitQuoted(t)
chk = r == ["\\,"]
print (chk)
t = ',"'
r = SplitQuoted(t)
chk = r == ['', '"']
print (chk)
t = '",'
r = SplitQuoted(t)
chk = r == ['",']
print (chk)
t = 'a'
r = SplitQuoted(t)
chk = r == ['a']
print (chk)
t = 'gr<>rg'
r = SplitQuoted(t, '<>')
chk = r == ['gr', 'rg']
print (chk)
t = 'gr<>rg"<>"foo<>ed>ple<fwr'
r = SplitQuoted(t, '<>')
chk = r == ['gr', 'rg"<>"foo', 'ed>ple<fwr']
print (chk)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment