mermelstein/text_to_pdf.py

## text_to_pdf.py
from PIL import Image
import pytesseract
from pdf2image import convert_from_path

# Convert the PDF to a list of images
images = convert_from_path('path_to_pdf.pdf')

# Process each image with Tesseract
for i, img in enumerate(images):
    text = pytesseract.image_to_string(img, lang='eng')
    with open(f'page_{i+1}.txt', 'w') as f:
        f.write(text)
	from PIL import Image
	import pytesseract
	from pdf2image import convert_from_path

	# Convert the PDF to a list of images
	images = convert_from_path('path_to_pdf.pdf')

	# Process each image with Tesseract
	for i, img in enumerate(images):
	text = pytesseract.image_to_string(img, lang='eng')
	with open(f'page_{i+1}.txt', 'w') as f:
	f.write(text)