Skip to content

Instantly share code, notes, and snippets.

@MattEding
Created September 12, 2018 03:12
Show Gist options
  • Save MattEding/d508884261fd9cf7e012a9973ca2eedc to your computer and use it in GitHub Desktop.
Save MattEding/d508884261fd9cf7e012a9973ca2eedc to your computer and use it in GitHub Desktop.
Splits a Eurika math pdf into separate lessons
import os
import re
import PyPDF2
def split_eurika_book(pdf_path, page_list, *, name_format='G{grade} - M{module} L{lesson}.pdf'):
"""Splits a PDF of a Eurika Math book into separate sections based on
a list of page numbers provided.
"""
pattern = re.compile(r'[gG](\d+).[mM](\d+)')
match = pattern.search(pdf_path)
grade, module = match.groups()
with open(pdf_path, 'rb') as pdf_orig_fp:
pdf_reader = PyPDF2.PdfFileReader(pdf_orig_fp)
page_list.append(pdf_reader.numPages + 1)
pairwise = zip(page_list, page_list[1:])
for lesson, (start, stop) in enumerate(pairwise, start=1):
filename = name_format.format(grade=grade, module=module, lesson=lesson)
with open(filename, 'wb') as pdf_new_fp:
pdf_writer = PyPDF2.PdfFileWriter()
for page_num in range(start, stop):
page = pdf_reader.getPage(page_num - 1)
pdf_writer.addPage(page)
pdf_writer.write(pdf_new_fp)
if __name__ == '__main__':
# Unfortunately PyPDF2.extractText() returns an empty string on the Eurika books
# therefore the page numbers for each lesson must be provided manually.
grade6 = '/Users/meding/Downloads/Eurika/Math 6/g6_m1_student_wkbook_v1_3_1.pdf'
pages6 = [2, 6, 11, 14, 19, 26, 31, 36, 43, 49, 55, 61, 65, 72, 78, 81, 86, 92, 98, 104]
split_eurika_book(grade6, pages6)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment