mbstacy/checkFor2pages.py

## checkFor2pages.py
import PyPDF2
from sys import argv

def getTextPdf(filename):
    '''
     Reads entire file and returns text
    '''
    #open allows you to read the file
    with  open(filename,'rb') as pdfFileObj:
        #The pdfReader variable is a readable object that will be parsed
        pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
        #discerning the number of pages will allow us to parse through all #the pages
        num_pages = pdfReader.numPages
        count = 0
        text = ""
        #The while loop will read each page
        while count < num_pages:
            pageObj = pdfReader.getPage(count)
            count +=1
            text += pageObj.extractText()
    return text

def getFirstPageTextPdf(filename):
    '''
    Reads first page
    '''
    #open allows you to read the file
    with  open(filename,'rb') as pdfFileObj:
        #The pdfReader variable is a readable object that will be parsed
        pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
        #discerning the number of pages will allow us to parse through all #the pages
        pageObj = pdfReader.getPage(0)
        text = pageObj.extractText()
    return text

if __name__ == "__main__":
    text=getFirstPageTextPdf(argv[1])
    if "Authors" in text[:9]:
        print("TRUE: {0}".format(argv[1]))
	import PyPDF2
	from sys import argv

	def getTextPdf(filename):
	'''
	Reads entire file and returns text
	'''
	#open allows you to read the file
	with open(filename,'rb') as pdfFileObj:
	#The pdfReader variable is a readable object that will be parsed
	pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
	#discerning the number of pages will allow us to parse through all #the pages
	num_pages = pdfReader.numPages
	count = 0
	text = ""
	#The while loop will read each page
	while count < num_pages:
	pageObj = pdfReader.getPage(count)
	count +=1
	text += pageObj.extractText()
	return text

	def getFirstPageTextPdf(filename):
	'''
	Reads first page
	'''
	#open allows you to read the file
	with open(filename,'rb') as pdfFileObj:
	#The pdfReader variable is a readable object that will be parsed
	pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
	#discerning the number of pages will allow us to parse through all #the pages
	pageObj = pdfReader.getPage(0)
	text = pageObj.extractText()
	return text

	if __name__ == "__main__":
	text=getFirstPageTextPdf(argv[1])
	if "Authors" in text[:9]:
	print("TRUE: {0}".format(argv[1]))