from __future__ import print_function
import base64
import os
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import sys
import email
import datetime
import pytz
from email import policy
from email.parser import BytesParser
from bs4 import BeautifulSoup
from weasyprint import HTML
SCOPES = ['']
def get_email_body(email_message):
if email_message.is_multipart():
for part in email_message.walk():
if part.get_content_type() == 'text/html':
return part.get_payload(decode=True)
if email_message.get_content_type() == 'text/html':
return email_message.get_payload(decode=True)
return None
def main():
creds = None
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
service = build('gmail', 'v1', credentials=creds)
query = 'from:"Uber Receipts"'
results = service.users().messages().list(userId='me', q=query, maxResults=10).execute()
messages = results.get('messages', [])
if not messages:
print('No messages found.')
for message in messages:
msg = service.users().messages().get(userId='me', id=message['id'], format='raw').execute()
msg_str = base64.urlsafe_b64decode(msg['raw'].encode('ASCII'))
email_message = BytesParser(policy=policy.default).parsebytes(msg_str)
email_body = get_email_body(email_message)
if email_body:
soup = BeautifulSoup(email_body, 'html.parser')
pdf_file_name = message['id']
pdf_file_path = os.path.join('.', pdf_file_name+'.pdf')
subject = email_message['subject']
received_time_str = email_message['Date']
received_time = datetime.datetime.fromisoformat(received_time_str).replace(tzinfo=pytz.utc).astimezone(pytz.timezone('US/Pacific'))
received_time_str = received_time.strftime('%Y%m%d_%H%M')
pdf_file_path = os.path.join('.', received_time_str + '_' + subject + '.pdf')
print(f"PDF file saved as {pdf_file_path}")
print("No HTML content found in the email")
except HttpError as error:
print(f'An error occurred: {error}')
if __name__ == '__main__':
