Skip to content

Instantly share code, notes, and snippets.

@Yuri-M-Dias
Created July 30, 2019 21:48
Show Gist options
  • Save Yuri-M-Dias/d82b3c3bf2635fdf8005a59425d68ece to your computer and use it in GitHub Desktop.
Save Yuri-M-Dias/d82b3c3bf2635fdf8005a59425d68ece to your computer and use it in GitHub Desktop.
Split PDF and rename pages to a defined CSV file
#!/usr/bin/env python
import os
import copy, sys
import pandas as pd
from PyPDF2 import PdfFileWriter, PdfFileReader
import slate3k as slate
def pdf_splitter(path, csvTargets):
fname = os.path.splitext(os.path.basename(path))[0]
pdf = PdfFileReader(path)
csvNames = pd.read_csv(csvTargets)
for page in range(pdf.getNumPages()):
pdf_writer = PdfFileWriter()
current_pdf_page = pdf.getPage(page)
pdf_writer.addPage(current_pdf_page)
nome = csvNames.iloc[page].Nome
output_filename = '{}.pdf'.format(nome)
with open(output_filename, 'wb') as out:
pdf_writer.write(out)
print('Created: {}'.format(output_filename))
def pdf_renamer(path):
for filename in os.listdir(path):
if filename.endswith(".pdf"):
with open('./Results/{}'.format(filename),'rb') as f:
extracted_text = slate.PDF(f)
print(extracted_text)
else:
continue
if __name__ == '__main__':
path = 'participantes.pdf'
csvFile = "./nomesDS.csv"
#pdf_splitter(path, csvFile)
pdf_renamer("./Results/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment