Skip to content

Instantly share code, notes, and snippets.

@devdave
Forked from miohtama/gist:5389146
Created March 11, 2014 21:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save devdave/9495505 to your computer and use it in GitHub Desktop.
Save devdave/9495505 to your computer and use it in GitHub Desktop.
import email
def get_decoded_email_body(message_body):
""" Decode email body.
Detect character set if the header is not set.
We try to get text/plain, but if there is not one then fallback to text/html.
:param message_body: Raw 7-bit message body input e.g. from imaplib. Double encoded in quoted-printable and latin-1
:return: Message body as unicode string
"""
msg = email.message_from_string(message_body)
text = ""
if msg.is_multipart():
html = None
for part in msg.get_payload():
print "%s, %s" % (part.get_content_type(), part.get_content_charset())
if part.get_content_charset() is None:
# We cannot know the character set, so return decoded "something"
text = part.get_payload(decode=True)
continue
charset = part.get_content_charset()
if part.get_content_type() == 'text/plain':
text = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
if part.get_content_type() == 'text/html':
html = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
if text is not None:
return text.strip()
else:
return html.strip()
else:
text = unicode(msg.get_payload(decode=True), msg.get_content_charset(), 'ignore').encode('utf8', 'replace')
return text.strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment