#!/usr/bin/env python
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
def parse(filename, maxlevel):
fp = open(filename, 'rb')
parser = PDFParser(fp)
doc = PDFDocument(parser)
outlines = doc.get_outlines()
for (level, title, dest, a, se) in outlines:
if level <= maxlevel:
title_words = title \
.encode('utf8') \
.replace('\n', '') \
title = ' '.join(title_words)
print ' ' * level, title
if __name__ == '__main__':
import sys
if len(sys.argv) != 3:
print 'Usage: %s xxx.pdf level' % sys.argv[0]
parse(sys.argv[1], int(sys.argv[2]))
I had to parse a PDF which contains 3520 pages, each one beeing a level one entry, so the script produced the following error : RuntimeError: maximum recursion depth exceeded while calling a Python object and stopped.

As a workaround, I splitted the PDF into 500 pages parts, and everything went fine.

Thank you for your script.

