-
-
Save tshrinivasan/23d8e4986cbae49b8a8c to your computer and use it in GitHub Desktop.
# Source http://stackoverflow.com/a/15741856/1301753 | |
import copy | |
import sys | |
import math | |
import pyPdf | |
def split_pages(src, dst): | |
src_f = file(src, 'r+b') | |
dst_f = file(dst, 'w+b') | |
input = pyPdf.PdfFileReader(src_f) | |
output = pyPdf.PdfFileWriter() | |
for i in range(input.getNumPages()): | |
p = input.getPage(i) | |
q = copy.copy(p) | |
q.mediaBox = copy.copy(p.mediaBox) | |
x1, x2 = p.mediaBox.lowerLeft | |
x3, x4 = p.mediaBox.upperRight | |
x1, x2 = math.floor(x1), math.floor(x2) | |
x3, x4 = math.floor(x3), math.floor(x4) | |
x5, x6 = math.floor(x3/2), math.floor(x4/2) | |
if x3 > x4: | |
# horizontal | |
p.mediaBox.upperRight = (x5, x4) | |
p.mediaBox.lowerLeft = (x1, x2) | |
q.mediaBox.upperRight = (x3, x4) | |
q.mediaBox.lowerLeft = (x5, x2) | |
else: | |
# vertical | |
p.mediaBox.upperRight = (x3, x4) | |
p.mediaBox.lowerLeft = (x1, x6) | |
q.mediaBox.upperRight = (x3, x6) | |
q.mediaBox.lowerLeft = (x1, x2) | |
output.addPage(p) | |
output.addPage(q) | |
output.write(dst_f) | |
src_f.close() | |
dst_f.close() | |
input_file=raw_input("Enter the original PDF file name :") | |
output_file=raw_input("Enter the splitted PDF file name :") | |
split_pages(input_file,output_file) |
This was very helpful!
The pages get added back out of order, it should be:
output.addPage(q)
output.addPage(p)
தேடுபொறி வழியே இதனை அடைந்தேன். செய்து பார்த்த பொழுது, கிடைமட்டமாக (horizandally) வெட்டுகிறது. செங்குத்தாக (vertical) வெட்ட என்ன செய்ய வேண்டும்?
This is the error I get every time:
Python 3.9.0 (v3.9.0:9cf6752276, Oct 5 2020, 11:29:23)
[Clang 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license()" for more information.
= RESTART: /Users/leonardo/Desktop/23d8e4986cbae49b8a8c-a6c29d599bb96b5a84718ee732d330327afb46d0 2/split-page.py
Traceback (most recent call last):
File "/Users/leonardo/Desktop/23d8e4986cbae49b8a8c-a6c29d599bb96b5a84718ee732d330327afb46d0 2/split-page.py", line 6, in
import pyPdf
ModuleNotFoundError: No module named 'pyPdf'
src_f = file(src, 'r+b')
Where do I get this function from "file"? What is this "file" referencing to?
Can anyone help me how to do this on linux? I have no idea how to use it. I have installed pdf module and pyPDF module
Hacked up a 2024 update that uses python3 and newer library conventions:
https://gist.github.com/breedx2/60409d2179508dba28c4159b81f26fc2
Thanks for the original! Cheers.
Awesome, thanks