Skip to content

Instantly share code, notes, and snippets.

@felixmon
Created October 18, 2023 14:12
Show Gist options
  • Save felixmon/92c52d868e8c956175b9ffcc1e273288 to your computer and use it in GitHub Desktop.
Save felixmon/92c52d868e8c956175b9ffcc1e273288 to your computer and use it in GitHub Desktop.
Use PDFTK and Convert to convert specified pages from a pdf file into images
import os
import subprocess
def extract_pages_from_pdf(source_file, page_range, output_file):
try:
subprocess.run(['pdftk', source_file, 'cat', page_range, 'output', output_file], check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
print(f"Error during 'pdftk' execution:\n{e.stderr}")
exit(1)
def convert_pdf_to_images(pdf_file, output_prefix):
try:
# must input the full path of the command covert because there's another 'convert' in the system directory.
subprocess.run(['[absolute path of convert.exe]', '-density','300', pdf_file, f"{output_prefix}_%03d.png"], check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
print(f"Error during 'convert' execution:\n{e.stderr}")
exit(1)
def main():
# Prompt user for inputs
input_pdf_filename = input("Enter the filename of the input PDF (in the current directory): ").strip()
pages_to_extract = input("Enter the range of pages to extract (e.g., 1-3, 5, 7-9): ").strip()
# Determine the extracted PDF filename and image prefix
base_filename, _ = os.path.splitext(input_pdf_filename)
extracted_pdf_filename = f"{base_filename}_extracted.pdf"
output_image_prefix = f"{base_filename}_image"
# Extract pages from the source PDF
extract_pages_from_pdf(input_pdf_filename, pages_to_extract, extracted_pdf_filename)
# Convert the extracted pages to images
convert_pdf_to_images(extracted_pdf_filename, output_image_prefix)
print("Done!")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment