Skip to content

Instantly share code, notes, and snippets.

@satish860
Last active March 22, 2024 10:58
Show Gist options
  • Save satish860/e1813cf5e227b64be50ba42ca0764c58 to your computer and use it in GitHub Desktop.
Save satish860/e1813cf5e227b64be50ba42ca0764c58 to your computer and use it in GitHub Desktop.
import json
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
# Load the OCR results from the JSON file
with open('results.json') as f:
ocr_results = json.load(f)
# Create a new PDF document
pdf_filename = 'output.pdf'
c = canvas.Canvas(pdf_filename, pagesize=letter)
# Iterate over each page in the OCR results
for filename, pages in ocr_results.items():
for page_data in pages:
# Create a new page in the PDF
c.setFont("Helvetica", 12)
# Iterate over the bounding boxes and draw the text
for bbox_data in page_data['bboxes']:
bbox = bbox_data['bbox']
text = bbox_data['text']
x, y = bbox[0], bbox[1]
c.drawString(x, y, text)
# Optionally, draw vertical and horizontal lines
for line_data in page_data['vertical_lines']:
line = line_data['bbox']
c.line(line[0], line[1], line[0], line[3])
for line_data in page_data['horizontal_lines']:
line = line_data['bbox']
c.line(line[0], line[1], line[2], line[1])
# Save the page and start a new one
c.showPage()
# Save the PDF document
c.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment