Skip to content

Instantly share code, notes, and snippets.

@zain
Forked from anonymous/gist:4291721
Created December 15, 2012 06:29
Show Gist options
  • Save zain/4291724 to your computer and use it in GitHub Desktop.
Save zain/4291724 to your computer and use it in GitHub Desktop.
import re
import string
# find the first quote in a string
quotere = re.compile(
r"""(?P<quote>"[^\"\\]*(?:\\"|[^"])*") # Quote, possibly containing encoded
# quotation mark
\s*(?P<rest>.*)$ """,
re.VERBOSE)
def debug(s):
print s
def imapsplit(imapstring):
"""Takes a string from an IMAP conversation and returns a list containing
its components"""
if not isinstance(imapstring, basestring):
debug("imapsplit() got a non-string input; working around.")
# Sometimes, imaplib will throw us a tuple if the input
# contains a literal. See Python bug
# #619732 at https://sourceforge.net/tracker/index.php?func=detail&aid=619732&group_id=5470&atid=105470
# One example is:
# result[0] = '() "\\\\" Admin'
# result[1] = ('() "\\\\" {19}', 'Folder\\2')
#
# This function will effectively get result[0] or result[1], so
# if we get the result[1] version, we need to parse apart the tuple
# and figure out what to do with it. Each even-numbered
# part of it should end with the {} number, and each odd-numbered
# part should be directly a part of the result. We'll
# artificially quote it to help out.
retval = []
for i in range(len(imapstring)):
if i % 2: # Odd: quote then append.
arg = imapstring[i]
# Quote code lifted from imaplib
arg = arg.replace('\\', '\\\\')
arg = arg.replace('"', '\\"')
arg = '"%s"' % arg
debug("imapsplit() non-string [%d]: Appending %s" %\
(i, arg))
retval.append(arg)
else:
# Even -- we have a string that ends with a literal
# size specifier. We need to strip off that, then run
# what remains through the regular imapsplit parser.
# Recursion to the rescue.
arg = imapstring[i]
arg = re.sub('\{\d+\}$', '', arg)
debug("imapsplit() non-string [%d]: Feeding %s to recursion" %\
(i, arg))
retval.extend(imapsplit(arg))
debug("imapsplit() non-string: returning %s" % str(retval))
return retval
workstr = imapstring.strip()
retval = []
while len(workstr):
# handle parenthized fragments (...()...)
if workstr[0] == '(':
rparenc = 1 # count of right parenthesis to match
rpareni = 1 # position to examine
while rparenc: # Find the end of the group.
if workstr[rpareni] == ')': # end of a group
rparenc -= 1
elif workstr[rpareni] == '(': # start of a group
rparenc += 1
rpareni += 1 # Move to next character.
parenlist = workstr[0:rpareni]
workstr = workstr[rpareni:].lstrip()
retval.append(parenlist)
elif workstr[0] == '"':
# quoted fragments '"...\"..."'
m = quotere.match(workstr)
retval.append(m.group('quote'))
workstr = m.group('rest')
else:
splits = string.split(workstr, maxsplit = 1)
splitslen = len(splits)
# The unquoted word is splits[0]; the remainder is splits[1]
if splitslen == 2:
# There's an unquoted word, and more string follows.
retval.append(splits[0])
workstr = splits[1] # split will have already lstripped it
continue
elif splitslen == 1:
# We got a last unquoted word, but nothing else
retval.append(splits[0])
# Nothing remains. workstr would be ''
break
elif splitslen == 0:
# There was not even an unquoted word.
break
return retval
# In: X-GM-THRID 1329675145120615090 X-GM-MSGID 1329675145120615090 X-GM-LABELS ("Smiley Label :)") UID 236563 FLAGS (\\Seen)
# Out: ['X-GM-THRID', '1329675145120615090', 'X-GM-MSGID', '1329675145120615090', 'X-GM-LABELS', '("Smiley Label :)")', 'UID', '236563', 'FLAGS', '(\\Seen)']
# In: (\\HasNoChildren) "." "INBOX.Sent"
# Out: ['(\\HasNoChildren)', '"."', '"INBOX.Sent"']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment