Skip to content

Instantly share code, notes, and snippets.

@tbbooher
Created April 26, 2023 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tbbooher/e00cd3ddc1d03462057400e7d59f67df to your computer and use it in GitHub Desktop.
Save tbbooher/e00cd3ddc1d03462057400e7d59f67df to your computer and use it in GitHub Desktop.
from __future__ import print_function
import base64
import os
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import sys
import email
import datetime
import pytz
from email import policy
from email.parser import BytesParser
from bs4 import BeautifulSoup
from weasyprint import HTML
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
def get_email_body(email_message):
if email_message.is_multipart():
for part in email_message.walk():
if part.get_content_type() == 'text/html':
return part.get_payload(decode=True)
else:
if email_message.get_content_type() == 'text/html':
return email_message.get_payload(decode=True)
return None
def main():
creds = None
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
try:
service = build('gmail', 'v1', credentials=creds)
query = 'from:"Uber Receipts"'
results = service.users().messages().list(userId='me', q=query, maxResults=10).execute()
messages = results.get('messages', [])
if not messages:
print('No messages found.')
return
for message in messages:
msg = service.users().messages().get(userId='me', id=message['id'], format='raw').execute()
msg_str = base64.urlsafe_b64decode(msg['raw'].encode('ASCII'))
email_message = BytesParser(policy=policy.default).parsebytes(msg_str)
email_body = get_email_body(email_message)
if email_body:
soup = BeautifulSoup(email_body, 'html.parser')
pdf_file_name = message['id']
pdf_file_path = os.path.join('.', pdf_file_name+'.pdf')
subject = email_message['subject']
received_time_str = email_message['Date']
received_time = datetime.datetime.fromisoformat(received_time_str).replace(tzinfo=pytz.utc).astimezone(pytz.timezone('US/Pacific'))
received_time_str = received_time.strftime('%Y%m%d_%H%M')
pdf_file_path = os.path.join('.', received_time_str + '_' + subject + '.pdf')
HTML(string=str(soup)).write_pdf(pdf_file_path)
print(f"PDF file saved as {pdf_file_path}")
else:
print("No HTML content found in the email")
except HttpError as error:
print(f'An error occurred: {error}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment