Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
This program processes a large PDF containing QR code marked/separated documents and breaks them into individual PDFs.
#this app builds off of pdfsplitterv1.py in PDFsplitter folder
import subprocess, shlex
import qrtools, os
from pyPdf import PdfFileWriter, PdfFileReader
from natsort import natsorted
course = "AS Physics"
inputFile = "testscan.pdf"
#read and split input file
cmd = 'gs -dNOPAUSE -dBATCH -sDEVICE=png16m -r300 -sOutputFile=./temp/Pic-%d.png ' +inputFile
args = shlex.split(cmd)
p = subprocess.Popen(args, shell=False, stderr=subprocess.PIPE)
p.wait()
quizList = []
quizCount = 0
i = 0 #page count
qr = qrtools.QR()
os.chdir("./temp")
fileList = os.listdir(".")
for filename in natsorted(fileList):
#print filename + "\t"+str(i)
if(filename[0:3]=="Pic" and qr.decode(filename)):
#print qr.data
print "QR code found in file "+ filename
data = qr.data.split('&')
paper = {}
paper['class'] = data[0]
paper['assignment'] = data[1]
fullName = data[2].split()
paper['fname']= fullName[0]
paper['lname']=fullName[1]
paper['start'] = i
quizList.append(paper)
quizCount = quizCount +1
os.remove(filename)
i=i+1
print "There are " + str(quizCount) + " papers"
os.chdir("..")
inputpdf = PdfFileReader(open(inputFile, "rb"))
output = PdfFileWriter()
output.addPage(inputpdf.getPage(0))
j=0 #document count
save_path = 'Section 1/'
print "quizCount is "+str(quizCount)
if inputpdf.numPages==1: #singlepage document
print "single page document"
foldername = quizList[j]["lname"]+", "+quizList[j]["fname"] + " - "+course + " - Assignment Folder"
filename = quizList[j]["assignment"] + " " + quizList[j]["lname"]+", "+quizList[j]["fname"]
studentSavePath = save_path+foldername
if not os.path.exists(studentSavePath):
os.makedirs(studentSavePath)
completeName = os.path.join(studentSavePath,filename+".pdf")
with open(completeName, "wb") as outputStream:
output.write(outputStream)
else: print "splitting multipage document"
for i in xrange(1, inputpdf.numPages):
print "test i = "+str(i)+" j= "+str(j)
#Now we need to save the papers
print "next start page is " +str(quizList[j]["start"])
if i==inputpdf.numPages-1 or (j!= quizCount-1 and i == quizList[j+1]["start"]):
#new quiz-save the old one
print "writing quiz"
foldername = quizList[j]["lname"]+", "+quizList[j]["fname"] + " - "+course + " - Assignment Folder"
filename = quizList[j]["assignment"] + " " + quizList[j]["lname"]+", "+quizList[j]["fname"]
studentSavePath = save_path+foldername
if not os.path.exists(studentSavePath):
os.makedirs(studentSavePath)
completeName = os.path.join(studentSavePath,filename+".pdf")
with open(completeName, "wb") as outputStream:
output.write(outputStream)
if j < quizCount: #don't increment when you are on the last quiz
j=j+1
output = PdfFileWriter()
print "adding page" + str(i)
output.addPage(inputpdf.getPage(i))
#Be sure to delete the student.lst file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment