Skip to content

Instantly share code, notes, and snippets.

@mohayonao
Created August 29, 2011 02:05
Show Gist options
  • Save mohayonao/1177605 to your computer and use it in GitHub Desktop.
Save mohayonao/1177605 to your computer and use it in GitHub Desktop.
PDFを分割する
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os, re, optparse
import yaml
import pyPdf
def getprofile(profile):
profile = yaml.load(open(profile))
prefix = profile.get('prefix', '')
suffix = profile.get('suffix', '')
fnum = profile.get('fnum', 0)
lastPage = profile.get('lastPage')
indexes = profile.get('indexes', [])
fname = ' '.join([prefix, '%(name)s', suffix]).strip()
return indexes, lastPage, (fname, fnum)
def makeindexes(indexes):
x = re.compile(r'\(\s*([0-9]+)\s*(?:(,)\s*([0-9]+)?\s*)?\)\s*(.*)$')
tmp = []
for i in xrange(len(indexes)):
m = x.match(indexes[i])
if not m:
print 'Syntax Error: line=%d\n - %s' % (i+1, indexes[i])
exit(1)
start = stop = int(m.group(1))
if m.group(2):
stop = int(m.group(3)) if m.group(3) else None
name = m.group(4).strip()
tmp.append((start, stop, name))
return tmp
def main():
parser = optparse.OptionParser(usage='%prog [Options] src')
parser.add_option("-o", "--offset", type="int", default=0)
parser.add_option("-p", "--profile")
(opts, args) = parser.parse_args()
if not args:
parser.print_help()
exit(0)
src = args[0]
profile = opts.profile or src
offset = opts.offset
if not src.lower().endswith('pdf'):
src += '.pdf'
if not profile.lower().endswith('yaml'):
profile += '.yaml'
indexes, lastPage, (fname, fnum) = getprofile(profile)
indexes = makeindexes(indexes)
print 'Divide PDF: %s (using %s)' % (src, profile)
src = pyPdf.PdfFileReader(file(src, 'rb'))
if lastPage is None:
lastPage = src.numPages
else:
lastPage = min(lastPage, src.numPages)
for i, (start, stop, name) in enumerate(indexes):
dst = pyPdf.PdfFileWriter()
if stop is None:
if i < len(indexes) -1:
stop = indexes[i+1][0] - 1
else:
stop = lastPage
filename = fname % dict(fnum=(fnum+i), name=name)
print ' page(%4d-%4d) => %s.pdf' % (start, stop, filename)
for j in xrange(start, stop+1):
if j + offset < lastPage:
dst.addPage(src.getPage(j + offset - 1))
out = file('%s.pdf' % filename, 'wb')
dst.write(out)
out.close()
if __name__ == '__main__':
main()
prefix: 'sub'
suffix: '%(fnum)02d'
fnum: 1
indexes:
- ( 1 ) foo # p. 1 only => "sub foo 01.pdf"
- ( 5, 8) bar # from p. 5 to p. 8 => "sub bar 02.pdf"
- ( 9, ) baz # from p. 9 to p.11 => "sub baz 03.pdf"
- ( 12, ) hoge # from p.12 to last => "sub hoge 04.pdf"
lastPage: 9999 # options (default=last page of a source PDF)
Usage: dpdf.py [Options] src
Options:
-h, --help show this help message and exit
-o OFFSET, --offset=OFFSET
-p PROFILE, --profile=PROFILE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment