Skip to content

Instantly share code, notes, and snippets.

@i-blis
Created March 27, 2011 23:34
Show Gist options
  • Save i-blis/889765 to your computer and use it in GitHub Desktop.
Save i-blis/889765 to your computer and use it in GitHub Desktop.
Dump PDF outline (ToC)
#!/usr/bin/env python
# encoding: utf-8
"""
pdfoutlinedump.py
"""
from optparse import OptionParser
from pyPdf import PdfFileReader
def flattenWithLevel(iterable, level=0):
for element in iter(iterable):
if isinstance(element, list):
for e in flattenWithLevel(element, level+1):
yield e
else:
yield {"level":level, "content":element}
parser = OptionParser()
parser.add_option("-t", "--tabulator", dest="tab", default="\t")
(options, args) = parser.parse_args()
for fileName in args:
pdf = PdfFileReader(file(fileName, "rb"))
for outline in flattenWithLevel( pdf.getOutlines() ):
print options.tab * int(outline["level"]) , outline["content"]["/Title"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment