felixmon/pdf_to_image.py

## pdf_to_image.py
import os
import subprocess

def extract_pages_from_pdf(source_file, page_range, output_file):
    try:
        subprocess.run(['pdftk', source_file, 'cat', page_range, 'output', output_file], check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print(f"Error during 'pdftk' execution:\n{e.stderr}")
        exit(1)

def convert_pdf_to_images(pdf_file, output_prefix):
    try:
        # must input the full path of the command covert because there's another 'convert' in the system directory.
        subprocess.run(['[absolute path of convert.exe]', '-density','300', pdf_file, f"{output_prefix}_%03d.png"], check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print(f"Error during 'convert' execution:\n{e.stderr}")
        exit(1)

def main():
    # Prompt user for inputs
    input_pdf_filename = input("Enter the filename of the input PDF (in the current directory): ").strip()
    pages_to_extract = input("Enter the range of pages to extract (e.g., 1-3, 5, 7-9): ").strip()

    # Determine the extracted PDF filename and image prefix
    base_filename, _ = os.path.splitext(input_pdf_filename)
    extracted_pdf_filename = f"{base_filename}_extracted.pdf"
    output_image_prefix = f"{base_filename}_image"

    # Extract pages from the source PDF
    extract_pages_from_pdf(input_pdf_filename, pages_to_extract, extracted_pdf_filename)

    # Convert the extracted pages to images
    convert_pdf_to_images(extracted_pdf_filename, output_image_prefix)

    print("Done!")

if __name__ == "__main__":
    main()
	import os
	import subprocess

	def extract_pages_from_pdf(source_file, page_range, output_file):
	try:
	subprocess.run(['pdftk', source_file, 'cat', page_range, 'output', output_file], check=True, capture_output=True, text=True)
	except subprocess.CalledProcessError as e:
	print(f"Error during 'pdftk' execution:\n{e.stderr}")
	exit(1)

	def convert_pdf_to_images(pdf_file, output_prefix):
	try:
	# must input the full path of the command covert because there's another 'convert' in the system directory.
	subprocess.run(['[absolute path of convert.exe]', '-density','300', pdf_file, f"{output_prefix}_%03d.png"], check=True, capture_output=True, text=True)
	except subprocess.CalledProcessError as e:
	print(f"Error during 'convert' execution:\n{e.stderr}")
	exit(1)

	def main():
	# Prompt user for inputs
	input_pdf_filename = input("Enter the filename of the input PDF (in the current directory): ").strip()
	pages_to_extract = input("Enter the range of pages to extract (e.g., 1-3, 5, 7-9): ").strip()

	# Determine the extracted PDF filename and image prefix
	base_filename, _ = os.path.splitext(input_pdf_filename)
	extracted_pdf_filename = f"{base_filename}_extracted.pdf"
	output_image_prefix = f"{base_filename}_image"

	# Extract pages from the source PDF
	extract_pages_from_pdf(input_pdf_filename, pages_to_extract, extracted_pdf_filename)

	# Convert the extracted pages to images
	convert_pdf_to_images(extracted_pdf_filename, output_image_prefix)

	print("Done!")

	if __name__ == "__main__":
	main()