Skip to content

Instantly share code, notes, and snippets.

@dfeldman
Last active January 4, 2024 05:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dfeldman/cb790287621098fcd84731668404dde2 to your computer and use it in GitHub Desktop.
Save dfeldman/cb790287621098fcd84731668404dde2 to your computer and use it in GitHub Desktop.
Script to send a PDF file to ChatGPT page by page
import sys
import os
import base64
import requests
import io
import hashlib
from PyPDF2 import PdfFileReader
from pdf2image import convert_from_path
# OpenAI API Key
api_key = "API KEY HERE"
prompt = (
"Perform a detailed OCR (Optical Character Recognition) analysis on the provided image. "
"Extract all readable text. Identify and list the date of the document at the beginning, if determinable. "
"Also, provide a comma-separated list of any names mentioned in the text. "
"Then, transcribe the entire content of the page.\n\n")
# Function to encode the image to base64
def encode_image(image):
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
# Function to process each page
def process_page(page, output_file, page_number, progress_file):
print("encode page ", page_number)
base64_image = encode_image(page)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt,
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
print("post page ", page_number)
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
if response.status_code == 200:
response_data = response.json()
content = response_data.get('choices', [{}])[0].get('message', {}).get('content', 'No content available')
with open(output_file, 'a') as f:
f.write(f"Page {page_number}:\n{content}\n\n")
f.write("-"*60)
f.write("\n\n")
with open(progress_file, 'w') as f:
f.write(str(page_number))
print(f"Processed page {page_number}")
else:
print(f"Error processing page {page_number}: {response.status_code}")
# Function to compute file hash
def compute_hash(file_path):
hasher = hashlib.sha256()
with open(file_path, 'rb') as f:
buf = f.read()
hasher.update(buf)
return hasher.hexdigest()
def main(pdf_path, start_page, end_page):
if not os.path.exists(pdf_path):
print("PDF file does not exist.")
return
file_hash = compute_hash(pdf_path)
progress_file = f"{file_hash}_progress.txt"
output_file = f"{os.path.splitext(pdf_path)[0]}_output.txt"
last_processed_page = 0
if os.path.exists(progress_file):
with open(progress_file, 'r') as f:
last_processed_page = int(f.read())
if last_processed_page >= end_page:
print("Processing already completed for this range.")
return
try:
pages = convert_from_path(pdf_path)
for i in range(max(start_page - 1, last_processed_page), end_page):
print("processing page ", i)
process_page(pages[i], output_file, i+1, progress_file)
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Usage: python script.py <path_to_pdf> <start_page> <end_page>")
else:
pdf_path = sys.argv[1]
start_page = int(sys.argv[2])
end_page = int(sys.argv[3])
main(pdf_path, start_page, end_page)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment