Skip to content

Instantly share code, notes, and snippets.

@fmasanori
Last active October 24, 2017 00:06
Show Gist options
  • Save fmasanori/859b67c597473ee5d4ca4bc5dd3beeb1 to your computer and use it in GitHub Desktop.
Save fmasanori/859b67c597473ee5d4ca4bc5dd3beeb1 to your computer and use it in GitHub Desktop.
import PyPDF2
import os
import os.path
def traduz(f):
pdfFileObj = open(f, 'rb')
try:
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
except:
print (f, 'corrompido')
return
f = f.replace('.pdf', '.txt')
out = open(f, 'w')
try:
content = ''
numPages = pdfReader.getNumPages()
for i in range(numPages):
content += pdfReader.getPage(i).extractText() + '\n'
out.write(content)
except:
print (f, 'não conseguiu gravar o texto')
out.close()
dirList = os.listdir(".")
for f in dirList:
if f.endswith('.pdf'):
t = f.replace('.pdf', '.txt')
print (f)
traduz(f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment