Skip to content

Instantly share code, notes, and snippets.

@xtao
Forked from sdkn104/decode_email.py
Created February 24, 2024 17:44
Show Gist options
  • Save xtao/1f9852c17157d81a4af5fab969a4ad9a to your computer and use it in GitHub Desktop.
Save xtao/1f9852c17157d81a4af5fab969a4ad9a to your computer and use it in GitHub Desktop.
Python script to decode email
import imaplib, email, email.parser, email.policy
import html2text
import pprint, sys
#
# Utilities for Debug
#
pp = pprint.PrettyPrinter(indent=4, width=80)
def myprint(str):
print(str.encode('cp932','replace').decode('cp932'))
#
# search mails in IMAP
#
def search_imap():
#mail = imaplib.IMAP4_SSL("imap.mail.com", 993)
mail = imaplib.IMAP4_SSL("imap.mail.yahoo.co.jp")
mail.login("sdkn104@yahoo.co.jp","xxxxxx")
mail.list()
mail.select('Inbox') # specify inbox
#mail.select('register') # specify label
#typ, [data] = mail.search(None, "UNSEEN")
typ, [data] = mail.search(None, "(ALL)")
print("searched")
print(typ)
pp.pprint(data.split())
#check
if typ == "OK":
if data != '':
print("New Mail")
else:
print("Non")
# for each mail searched
for num in data.split():
print("START " + str(num) + "--------------------------------------------------")
# fetch whole message as RFC822 format
result, d = mail.fetch(num, "(RFC822)")
# save to file
f = open("mail_" + str(num) + ".txt","bw")
f.write(d[0][1])
f.close()
print("####################################")
msg = email2Text(d[0][1])
print("------------------------------------")
myprint(msg["subject"])
myprint(msg["date"])
myprint(msg["from"])
myprint(msg["body"])
# closing
mail.close()
mail.logout()
#
# Get subject, date, from and body as text from email RFC822 style string
#
def email2Text(rfc822mail):
# parse the message
msg_data = email.message_from_bytes(rfc822mail, policy=email.policy.default)
mail_value = {}
# Get From, Date, Subject
mail_value["from"] = header_decode(msg_data.get('From'))
mail_value["date"] = header_decode(msg_data.get('Date'))
mail_value["subject"] = header_decode(msg_data.get('Subject'))
#print( mail_value["date"] )
#print( mail_value["from"] )
#print( mail_value["subject"] )
# Get Body
#print("--- body ---")
mail_value["body"] = ""
if msg_data.is_multipart():
for part in msg_data.walk():
#print("--- part ---")
ddd = msg2bodyText(part)
if ddd is not None:
mail_value["body"] = mail_value["body"] + ddd
else:
#print("--- single ---")
ddd = msg2bodyText(msg_data)
mail_value["body"] = ddd
return mail_value
#
# get body text from a message (EmailMessage instance)
#
def msg2bodyText(msg):
ct = msg.get_content_type()
cc = msg.get_content_charset() # charset in Content-Type header
cte = msg.get("Content-Transfer-Encoding")
print("part: " + str(ct) + " " + str(cc) + " : " + str(cte))
# skip non-text part/msg
if msg.get_content_maintype() != "text":
return None
# get text
ddd = msg.get_content()
# html to text
if msg.get_content_subtype() == "html":
try:
ddd = html2text.html2text(ddd)
except:
print("error in html2text")
return ddd
def header_decode(header):
hdr = ""
for text, encoding in email.header.decode_header(header):
if isinstance(text, bytes):
text = text.decode(encoding or "us-ascii")
hdr += text
return hdr
if __name__ == "__main__":
search_imap()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment