Skip to content

Instantly share code, notes, and snippets.

from PyPDF2 import PdfFileReader
class Extractor:
def __init__(self, file_name: str) -> None:
self.pdf_reader = PdfFileReader(file_name)
self.sections = self._extract_sections(self.pdf_reader.outlines)
self.pages_range = self._find_pages_range()
self.raw_text = self._extract_raw_text() # useful for creating / testing new functionality