Skip to content

Instantly share code, notes, and snippets.

@bhochieng
Created July 17, 2017 11:36
Show Gist options
  • Save bhochieng/fc21a8e22bc46d47dc94d86472de1178 to your computer and use it in GitHub Desktop.
Save bhochieng/fc21a8e22bc46d47dc94d86472de1178 to your computer and use it in GitHub Desktop.
Fetch emails from Gmail using python (modified code)
import os
import email
import cPickle
import getpass
import imaplib
import datetime
IMAP_SERVER = 'imap.gmail.com'
def process_inbox(mail, email_address):
return_value, emails = mail.search(None, "ALL")
if return_value != 'OK':
print "No messages."
return
if os.path.exists('./pickled_attachments.pkl'):
pickled_emails = open('./pickled_attachments.pkl', 'r')
older_emails = cPickle.load(pickled_emails)
pickled_emails.close()
new_emails = [my_mail for my_mail in emails[0].split() if my_mail not in older_emails]
try:
for my_mail in new_emails:
return_value, raw_data = mail.fetch(my_mail, '(RFC822)')
if return_value != 'OK':
print "Error getting message", my_mail
return
message = email.message_from_string(raw_data[0][1])
for part in message.walk():
if part.get_content_type() in ['image/jpeg', 'image/png', 'image/bmp', 'application/msword', 'image/gif', 'image/x-icon', 'video/x-mpeg', 'application/mspowerpoint', 'application/pdf']:#sample attachment mimetypes, more can be added
body = part.get_payload(decode=True)
save_string = str("./" + email_address + "_attachments" + "/" + str(part.get_filename()))
myfile = open(save_string, 'wb')
myfile.write(body)
myfile.close()
else:
continue
older_emails.append(my_mail)
pickled_emails = open("./pickled_attachments.pkl", 'w')
cPickle.dump(older_emails, pickled_emails)
pickled_emails.close()
except:
pickled_emails = open("./pickled_attachments.pkl", 'w')
cPickle.dump(older_emails, pickled_emails)
pickled_emails.close()
else:
stored_emails = list()
try:
for my_mail in emails[0].split():
return_value, raw_data = mail.fetch(my_mail, '(RFC822)')
if return_value != 'OK':
print "Error getting message", my_mail
return
message = email.message_from_string(raw_data[0][1])
for part in message.walk():
if part.get_content_type() in ['image/jpeg', 'image/png', 'image/bmp', 'application/msword', 'image/gif', 'image/x-icon', 'video/x-mpeg', 'application/mspowerpoint', 'application/pdf']:#sample attachment mimetypes, more can be added
body = part.get_payload(decode=True)
save_string = str("./" + email_address + "_attachments" + "/" + str(part.get_filename()))
myfile = open(save_string, 'wb')
myfile.write(body)
myfile.close()
else:
continue
stored_emails.append(my_mail)
pickled_emails = open("./pickled_attachments.pkl", 'w')
cPickle.dump(stored_emails, pickled_emails)
pickled_emails.close()
except:
pickled_emails = open("./pickled_attachments.pkl", 'w')
cPickle.dump(stored_emails, pickled_emails)
pickled_emails.close()
def get_inbox(mail):
return_value, inbox_mail = mail.select("INBOX")
return return_value
def main():
mail = imaplib.IMAP4_SSL(IMAP_SERVER)
return_value = 0
while return_value == 0:
email_address = raw_input('Email:')
try:
return_value, data = mail.login(email_address, getpass.getpass())
except imaplib.IMAP4.error:
print "Login failed."
if os.path.exists("./" + email_address + "_attachments"):
ret = get_inbox(mail)
if ret == "OK":
process_inbox(mail, email_address)
mail.close()
else:
os.makedirs("./" + email_address + "_attachments")
ret = get_inbox(mail)
if ret == "OK":
process_inbox(mail, email_address)
mail.close()
mail.logout()
main()
import os
import email
import cPickle
import getpass
import imaplib
import datetime
IMAP_SERVER = 'imap.gmail.com'
def process_inbox(mail, email_address):
return_value, emails = mail.search(None, "ALL")
if return_value != 'OK':
print "No messages."
return
if os.path.exists('./pickled_emails.pkl'):
pickled_emails = open('./pickled_emails.pkl', 'r')
older_emails = cPickle.load(pickled_emails)
pickled_emails.close()
new_emails = [my_mail for my_mail in emails[0].split() if my_mail not in older_emails]
try:
for my_mail in new_emails:
return_value, raw_data = mail.fetch(my_mail, '(RFC822)')
if return_value != 'OK':
print "Error getting message", my_mail
return
message = email.message_from_string(raw_data[0][1])
date_tuple = email.utils.parsedate_tz(message['Date'])
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
formatted_date = local_date.strftime("%d %b %Y %H:%M:%S")
for part in message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
save_string = str("./" + email_address + "_data" + "/" + str(my_mail) + ".(" + str(formatted_date) + ")-" + str(message['Subject']).replace("/", "-") + ".txt")
myfile = open(save_string, 'a')
myfile.write(body)
myfile.close()
else:
continue
older_emails.append(my_mail)
pickled_emails = open("./pickled_emails.pkl", 'w')
cPickle.dump(older_emails, pickled_emails)
pickled_emails.close()
except:
pickled_emails = open("./pickled_emails.pkl", 'w')
cPickle.dump(older_emails, pickled_emails)
pickled_emails.close()
else:
stored_emails = list()
try:
for my_mail in emails[0].split():
return_value, raw_data = mail.fetch(my_mail, '(RFC822)')
if return_value != 'OK':
print "Error getting message", my_mail
return
message = email.message_from_string(raw_data[0][1])
date_tuple = email.utils.parsedate_tz(message['Date'])
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
formatted_date = local_date.strftime("%d %b %Y %H:%M:%S")
for part in message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
save_string = str("./" + email_address + "_data" + "/" + str(my_mail) + ".(" + str(formatted_date) + ")-" + str(message['Subject']) + ".txt")
myfile = open(save_string, 'a')
myfile.write(body)
myfile.close()
else:
continue
stored_emails.append(my_mail)
pickled_emails = open("./pickled_emails.pkl", 'w')
cPickle.dump(stored_emails, pickled_emails)
pickled_emails.close()
except:
pickled_emails = open("./pickled_emails.pkl", 'w')
cPickle.dump(stored_emails, pickled_emails)
pickled_emails.close()
def get_inbox(mail):
return_value, inbox_mail = mail.select("INBOX")
return return_value
def main():
mail = imaplib.IMAP4_SSL(IMAP_SERVER)
return_value = 0
while return_value == 0:
email_address = raw_input('Email:')
try:
return_value, data = mail.login(email_address, getpass.getpass())
except imaplib.IMAP4.error:
print "Login failed."
if os.path.exists("./" + email_address + "_data"):
ret = get_inbox(mail)
if ret == "OK":
process_inbox(mail, email_address)
mail.close()
else:
os.makedirs("./" + email_address + "_data")
ret = get_inbox(mail)
if ret == "OK":
process_inbox(mail, email_address)
mail.close()
mail.logout()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment