tbbooher/combined.py

## combined.py
from __future__ import print_function
import base64
import os
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import sys
import email
import datetime
import pytz
from email import policy
from email.parser import BytesParser
from bs4 import BeautifulSoup
from weasyprint import HTML

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def get_email_body(email_message):
    if email_message.is_multipart():
        for part in email_message.walk():
            if part.get_content_type() == 'text/html':
                return part.get_payload(decode=True)
    else:
        if email_message.get_content_type() == 'text/html':
            return email_message.get_payload(decode=True)
    return None

def main():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)

        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    try:
        service = build('gmail', 'v1', credentials=creds)
        query = 'from:"Uber Receipts"'
        results = service.users().messages().list(userId='me', q=query, maxResults=10).execute()
        messages = results.get('messages', [])

        if not messages:
            print('No messages found.')
            return

        for message in messages:
            msg = service.users().messages().get(userId='me', id=message['id'], format='raw').execute()
            msg_str = base64.urlsafe_b64decode(msg['raw'].encode('ASCII'))

            email_message = BytesParser(policy=policy.default).parsebytes(msg_str)
            email_body = get_email_body(email_message)

            if email_body:
                soup = BeautifulSoup(email_body, 'html.parser')
                pdf_file_name = message['id']
                pdf_file_path = os.path.join('.', pdf_file_name+'.pdf')

                subject = email_message['subject']
                received_time_str = email_message['Date']
                received_time = datetime.datetime.fromisoformat(received_time_str).replace(tzinfo=pytz.utc).astimezone(pytz.timezone('US/Pacific'))
                received_time_str = received_time.strftime('%Y%m%d_%H%M')
                pdf_file_path = os.path.join('.', received_time_str + '_' + subject + '.pdf')

                HTML(string=str(soup)).write_pdf(pdf_file_path)

                print(f"PDF file saved as {pdf_file_path}")
            else:
                print("No HTML content found in the email")

    except HttpError as error:
        print(f'An error occurred: {error}')

if __name__ == '__main__':
    main()
	from __future__ import print_function
	import base64
	import os
	import os.path
	from google.auth.transport.requests import Request
	from google.oauth2.credentials import Credentials
	from google_auth_oauthlib.flow import InstalledAppFlow
	from googleapiclient.discovery import build
	from googleapiclient.errors import HttpError
	import sys
	import email
	import datetime
	import pytz
	from email import policy
	from email.parser import BytesParser
	from bs4 import BeautifulSoup
	from weasyprint import HTML

	SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

	def get_email_body(email_message):
	if email_message.is_multipart():
	for part in email_message.walk():
	if part.get_content_type() == 'text/html':
	return part.get_payload(decode=True)
	else:
	if email_message.get_content_type() == 'text/html':
	return email_message.get_payload(decode=True)
	return None

	def main():
	creds = None
	if os.path.exists('token.json'):
	creds = Credentials.from_authorized_user_file('token.json', SCOPES)

	if not creds or not creds.valid:
	if creds and creds.expired and creds.refresh_token:
	creds.refresh(Request())
	else:
	flow = InstalledAppFlow.from_client_secrets_file(
	'credentials.json', SCOPES)
	creds = flow.run_local_server(port=0)

	with open('token.json', 'w') as token:
	token.write(creds.to_json())

	try:
	service = build('gmail', 'v1', credentials=creds)
	query = 'from:"Uber Receipts"'
	results = service.users().messages().list(userId='me', q=query, maxResults=10).execute()
	messages = results.get('messages', [])

	if not messages:
	print('No messages found.')
	return

	for message in messages:
	msg = service.users().messages().get(userId='me', id=message['id'], format='raw').execute()
	msg_str = base64.urlsafe_b64decode(msg['raw'].encode('ASCII'))

	email_message = BytesParser(policy=policy.default).parsebytes(msg_str)
	email_body = get_email_body(email_message)

	if email_body:
	soup = BeautifulSoup(email_body, 'html.parser')
	pdf_file_name = message['id']
	pdf_file_path = os.path.join('.', pdf_file_name+'.pdf')

	subject = email_message['subject']
	received_time_str = email_message['Date']
	received_time = datetime.datetime.fromisoformat(received_time_str).replace(tzinfo=pytz.utc).astimezone(pytz.timezone('US/Pacific'))
	received_time_str = received_time.strftime('%Y%m%d_%H%M')
	pdf_file_path = os.path.join('.', received_time_str + '_' + subject + '.pdf')

	HTML(string=str(soup)).write_pdf(pdf_file_path)

	print(f"PDF file saved as {pdf_file_path}")
	else:
	print("No HTML content found in the email")

	except HttpError as error:
	print(f'An error occurred: {error}')

	if __name__ == '__main__':
	main()