Skip to content

Instantly share code, notes, and snippets.

@ikks
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikks/0075a7bcf8fe526ab4c3 to your computer and use it in GitHub Desktop.
Save ikks/0075a7bcf8fe526ab4c3 to your computer and use it in GitHub Desktop.
Download headers from a label and filter them
"""With the Gmail API is easy to download all your messages or filter them,
is a good practice to have filters and put them in your own messages, so later you
can get some statistics.
You need to make all the auth dance in order to connect to the API, for development
purposes you'll be ok with the instructions provided by Gmail
https://developers.google.com/gmail/api/v1/reference/users/messages
"""
# Path to the output filename
OUT_FILENAME = '/tmp/flisol_emails.txt'
# Label Id, this corresponds to the label id you want to fetch
LABEL_ID = 'Label_17'
# substring containing the word Flisol
MAIL_PIECE = 'FLISOL'
# Path to the client_secret.json file downloaded from the Developer Console
CLIENT_SECRET_FILE = 'client_secret.json'
def get_mail_headers_by_label(gmail_service, label_id):
"""Returns a list of message ids that have the label_id using the gmail api,
follow the instructions in https://developers.google.com/gmail/api/quickstart/quickstart-python
to get your gmail_service
"""
resp = gmail_service.users().messages().list(userId='me', labelIds=label_id).execute()
messages = resp['messages']
while 'nextPageToken' in resp:
page_token = resp['nextPageToken']
resp = gmail_service.users().messages().list(userId='me', labelIds=label_id, pageToken=page_token).execute()
messages.extend(resp['messages'])
return messages
def fetch_and_save_message_headers(gmail_service, out_filename, messages, mail_piece):
"""Gets the name of a file to store email information,
A list of message ids
A substring to filter the messages that have mail_piece as substring on CC or TO
Stores in the file a sequence of json including the fields FROM, TO and CC
"""
# Messages has the list of messages
to_list = []
save_to = open(out_filename, mode='w+')
done = 0
for id_msg in messages:
msg = gmail_service.users().messages().get(userId='me', id=id_msg['id'], fields='payload').execute()
resp = [obj for obj in msg['payload']['headers'] if obj['name'] in ['To', 'Cc'] and obj['value'].upper().find(mail_piece) >= 0]
if resp:
resp.extend([obj for obj in msg['payload']['headers'] if obj['name'] == 'From'])
to_list.append(resp)
save_to.write(json.dumps(resp))
done += 1
if done % 100 == 0:
print done
save_to.close()
return to_list
messages = get_mail_headers_by_label(gmail_service, LABEL_ID)
fetch_and_save_message_headers(gmail_service, OUT_FILENAME, messages, MAIL_PIECE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment