Created
April 3, 2014 03:31
-
-
Save jrsmith3/9947838 to your computer and use it in GitHub Desktop.
Convert specified pages from a PDF to png
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script was used to create the figures for http://jrsmith3.github.io/sample-logs-the-secret-to-managing-multi-person-projects.html from a PDF file containing some old CMU sample logs. | |
""" | |
import PyPDF2 | |
from wand.image import Image | |
import io | |
import os | |
def pdf_page_to_png(src_pdf, pagenum = 0, resolution = 72,): | |
""" | |
Returns specified PDF page as wand.image.Image png. | |
:param PyPDF2.PdfFileReader src_pdf: PDF from which to take pages. | |
:param int pagenum: Page number to take. | |
:param int resolution: Resolution for resulting png in DPI. | |
""" | |
dst_pdf = PyPDF2.PdfFileWriter() | |
dst_pdf.addPage(src_pdf.getPage(pagenum)) | |
pdf_bytes = io.BytesIO() | |
dst_pdf.write(pdf_bytes) | |
pdf_bytes.seek(0) | |
img = Image(file = pdf_bytes, resolution = resolution) | |
img.convert("png") | |
return img | |
# Main | |
# ==== | |
src_filename = "sample_log.pdf" | |
src_pdf = PyPDF2.PdfFileReader(file(src_filename, "rb")) | |
# What follows is a lookup table of page numbers within sample_log.pdf and the corresponding filenames. | |
pages = [{"pagenum": 22, "filename": "samplelog_jrs0019_p1"}, | |
{"pagenum": 23, "filename": "samplelog_jrs0019_p2"}, | |
{"pagenum": 124, "filename": "samplelog_jrs0075_p3_2011-02-05_18-55"},] | |
# Convert each page to a png image. | |
for page in pages: | |
big_filename = page["filename"] + ".png" | |
small_filename = page["filename"] + "_small" + ".png" | |
img = pdf_page_to_png(src_pdf, pagenum = page["pagenum"], resolution = 300) | |
img.save(filename = big_filename) | |
# Ensmallen | |
img.transform("", "200") | |
img.save(filename = small_filename) | |
# Deal with the cropping for JRS0070. | |
jrs0070 = {"pagenum": 109, "filename": "samplelog_jrs0070_p1"} | |
img = pdf_page_to_png(src_pdf, pagenum = jrs0070["pagenum"], resolution = 300) | |
big_filename = jrs0070["filename"] + ".png" | |
small_filename = jrs0070["filename"] + "_small" + ".png" | |
# Crop | |
img.crop(bottom = 1000) | |
# Save | |
img.save(filename = big_filename) | |
# Ensmallen | |
img.transform("", "200") | |
img.save(filename = small_filename) |
Helpful! Thanks.
Thanks.
Hi thank you for your work, but I get errors...
Traceback (most recent call last):
File "PDF_to_jpg.py", line 45, in <module>
img = pdf_page_to_png(src_pdf, pagenum = page["pagenum"], resolution = 300)
File "PDF_to_jpg.py", line 25, in pdf_page_to_png
img = Image(file = pdf_bytes, resolution = resolution)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/wand/image.py", line 2740, in __init__
self.read(file=file, resolution=resolution)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/wand/image.py", line 2822, in read
self.raise_exception()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/wand/resource.py", line 222, in raise_exception
raise e
wand.exceptions.BlobError: zero-length blob not permitted `' @ error/blob.c/BlobToImage/344
Exception ignored in: <bound method Resource.__del__ of <wand.image.Image: (empty)>>
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/wand/resource.py", line 232, in __del__
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/wand/image.py", line 2767, in destroy
TypeError: object of type 'NoneType' has no len()
I'm using python3
I hope you know how to solve this problem....
I found solution and wrote a script for extracting cover as PNG from PDF... If you are interested: https://github.com/KoStard/PDF-cover-extractor
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Convert specified pages from a PDF to png
Nice share for converting PDF to image!