lingpri/pdf_to_text.py

## pdf_to_text.py
def convert_pdf_to_text(filename):
    count = 0
    text = ""
    pdfFileObj = open(filename,'rb')
    pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
    num_pages = pdfReader.numPages
    while count < num_pages:
        pageObj = pdfReader.getPage(count)
        count +=1
        text += pageObj.extractText()
    return text
	def convert_pdf_to_text(filename):
	count = 0
	text = ""
	pdfFileObj = open(filename,'rb')
	pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
	num_pages = pdfReader.numPages
	while count < num_pages:
	pageObj = pdfReader.getPage(count)
	count +=1
	text += pageObj.extractText()
	return text