Last active
October 19, 2024 18:18
-
-
Save tshrinivasan/23d8e4986cbae49b8a8c to your computer and use it in GitHub Desktop.
Split a PDF vertically, used for scanned double sided PDF pages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Source http://stackoverflow.com/a/15741856/1301753 | |
import copy | |
import sys | |
import math | |
import pyPdf | |
def split_pages(src, dst): | |
src_f = file(src, 'r+b') | |
dst_f = file(dst, 'w+b') | |
input = pyPdf.PdfFileReader(src_f) | |
output = pyPdf.PdfFileWriter() | |
for i in range(input.getNumPages()): | |
p = input.getPage(i) | |
q = copy.copy(p) | |
q.mediaBox = copy.copy(p.mediaBox) | |
x1, x2 = p.mediaBox.lowerLeft | |
x3, x4 = p.mediaBox.upperRight | |
x1, x2 = math.floor(x1), math.floor(x2) | |
x3, x4 = math.floor(x3), math.floor(x4) | |
x5, x6 = math.floor(x3/2), math.floor(x4/2) | |
if x3 > x4: | |
# horizontal | |
p.mediaBox.upperRight = (x5, x4) | |
p.mediaBox.lowerLeft = (x1, x2) | |
q.mediaBox.upperRight = (x3, x4) | |
q.mediaBox.lowerLeft = (x5, x2) | |
else: | |
# vertical | |
p.mediaBox.upperRight = (x3, x4) | |
p.mediaBox.lowerLeft = (x1, x6) | |
q.mediaBox.upperRight = (x3, x6) | |
q.mediaBox.lowerLeft = (x1, x2) | |
output.addPage(p) | |
output.addPage(q) | |
output.write(dst_f) | |
src_f.close() | |
dst_f.close() | |
input_file=raw_input("Enter the original PDF file name :") | |
output_file=raw_input("Enter the splitted PDF file name :") | |
split_pages(input_file,output_file) |
Can anyone help me how to do this on linux? I have no idea how to use it. I have installed pdf module and pyPDF module
Hacked up a 2024 update that uses python3 and newer library conventions:
https://gist.github.com/breedx2/60409d2179508dba28c4159b81f26fc2
Thanks for the original! Cheers.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
src_f = file(src, 'r+b')
Where do I get this function from "file"? What is this "file" referencing to?