Skip to content

Instantly share code, notes, and snippets.

@PandaWhoCodes
Created April 1, 2019 06:01
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save PandaWhoCodes/f7adce3bff9bb1f508b3ab42db05a6bf to your computer and use it in GitHub Desktop.
Save PandaWhoCodes/f7adce3bff9bb1f508b3ab42db05a6bf to your computer and use it in GitHub Desktop.
A script to download emails from gmail - http://www.ashish.ch
import imaplib
import email
import os
def create_folder(folder_name="backup"):
"""
Creates a folder to store all mails
"""
if not os.path.exists(folder_name):
os.makedirs(folder_name)
def extract_body(payload):
"""
returns the email body from the payload
"""
if isinstance(payload, str):
return payload
else:
return '\n'.join([extract_body(part.get_payload()) for part in payload])
def create_connection(user_name, password):
"""
logs into the email service provider using the provided username and password
:return: connection object for imaplib
"""
conn = imaplib.IMAP4_SSL("imap.gmail.com", 993)
try:
conn.login(user_name, password)
except Exception as e:
print("Could not login", e)
return conn
def show_mailBoxes(conn):
"""
Displays the mail boxes in your email client
:param conn: imaplib object
"""
mailboxes = []
for n, item in enumerate(conn.list()[1]):
mailbox = item.decode("utf-8")
mailboxes.append(mailbox[mailbox.index("\"/\"") + 4:])
print(n, mailbox[mailbox.index("\"/\"") + 4:])
return mailboxes
def select_mailbox(conn, mailbox):
"""
select a mailbox to search through
:param conn: imaplib object
:param mailbox: name of the mailbox
"""
conn.select(mailbox)
def save_email(num, subject, body):
with open("backup/" + str(num.decode('utf-8')) + ".txt", "w") as f:
f.write(subject)
f.write("\n\n")
f.write(body)
def get_emails(conn, search_param):
count = 0
typ, data = conn.search(None, search_param)
try:
for num in data[0].split():
try:
_, msg_data = conn.fetch(num, '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1].decode("utf-8"))
subject = msg['subject']
payload = msg.get_payload()
body = extract_body(payload)
save_email(num, subject, body)
count += 1
if count % 100 == 0:
print(count, "Messages saved")
except Exception as e:
print("Couldnt Parse message:", num, e)
pass
except Exception as e:
print("EXCEPTION OCCURED:", e)
pass
conn.logout()
finally:
conn.close()
if __name__ == '__main__':
conn = create_connection("username@gmail.com", "password")
mailboxes = show_mailBoxes(conn)
mailbox_choice = int(input("Select a mailbox: "))
select_mailbox(conn, mailboxes[mailbox_choice])
create_folder()
search_param = input("Enter the filter- Leave blank for ALL")
if len(search_param) == 0:
search_param = "ALL"
get_emails(conn, search_param)
@kshitijcode
Copy link

What do you mean by filter over here?

@kshitijcode
Copy link

Also do you know how to move something from spam to inbox?

@hongyi-zhao
Copy link

hongyi-zhao commented Aug 21, 2021

I tried with your above script, but meet the following error:

$ python download_emails.py 
0 "INBOX"
1 "Junk"
2 "[Gmail]"
3 "[Gmail]/All Mail"
4 "[Gmail]/Drafts"
5 "[Gmail]/Important"
6 "[Gmail]/Sent Mail"
7 "[Gmail]/Spam"
8 "[Gmail]/Starred"
9 "[Gmail]/Trash"
10 "&XeVPXJCuTvY-"
11 "&ZTZjbg-"
12 "&ZcWITHb4UXM-"
13 "&ecFOupCuTvY-"
Select a mailbox: 0
Enter the filter- Leave blank for ALL
Traceback (most recent call last):
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 1047, in _command_complete
    typ, data = self._get_tagged_response(tag, expect_bye=logout)
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 1173, in _get_tagged_response
    self._get_response()
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 1075, in _get_response
    resp = self._get_line()
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 1183, in _get_line
    line = self.readline()
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 325, in readline
    raise self.error("got more than %d bytes" % _MAXLINE)
imaplib.error: got more than 1000000 bytes

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/werner/download_emails.py", line 108, in <module>
    get_emails(conn, search_param)
  File "/home/werner/download_emails.py", line 73, in get_emails
    typ, data = conn.search(None, search_param)
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 734, in search
    typ, dat = self._simple_command(name, *criteria)
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 1230, in _simple_command
    return self._command_complete(name, self._command(name, *args))
  File "/home/werner/.pyenv/versions/3.9.1/lib/python3.9/imaplib.py", line 1051, in _command_complete
    raise self.error('command: %s => %s' % (name, val))
imaplib.error: command: SEARCH => got more than 1000000 bytes

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment