steven0608/.py

## .py
import requests
# To make the get request
from bs4 import BeautifulSoup
# to pull data out of HTML
import pandas
#use the pandas dataframe to show my data in table on my jupyter notebook

class GetListOfBooks:

    def __init__(self,url):
        self.url = url
        self.list=[]

    def getPageNum(self):
        totalPageButton = self.parseUrl(self.url).find("div",{"class":"pagination"}).find_all("a")
        totalPages = totalPageButton[len(totalPageButton)-2]
        return int(totalPages.text)


    def parseUrl(self,url):
        r = requests.get(url)
        c = r.content
        return BeautifulSoup(c,"html.parser")

    def getPageContent(self,url):
        allTableRow = self.parseUrl(url).find_all("tr")
        for row in allTableRow:
            tableData = row.find_all("td")
            if len(tableData) == 2:
                bookList={}
                for data in tableData:
                    try:
                        bookList["Book Name"] = data.find("div",{"class":"content"}).text
                    except:
                        bookList["Author Name"] = data.text
                self.list.append(bookList)


    def getData(self):
        self.getPageContent(self.url)
        for num in range(2,self.getPageNum()+1):
            url = "https://thawing-ridge-65567.herokuapp.com/books?page="+str(num)
            self.getPageContent(url)
	import requests
	# To make the get request
	from bs4 import BeautifulSoup
	# to pull data out of HTML
	import pandas
	#use the pandas dataframe to show my data in table on my jupyter notebook

	class GetListOfBooks:

	def __init__(self,url):
	self.url = url
	self.list=[]

	def getPageNum(self):
	totalPageButton = self.parseUrl(self.url).find("div",{"class":"pagination"}).find_all("a")
	totalPages = totalPageButton[len(totalPageButton)-2]
	return int(totalPages.text)


	def parseUrl(self,url):
	r = requests.get(url)
	c = r.content
	return BeautifulSoup(c,"html.parser")

	def getPageContent(self,url):
	allTableRow = self.parseUrl(url).find_all("tr")
	for row in allTableRow:
	tableData = row.find_all("td")
	if len(tableData) == 2:
	bookList={}
	for data in tableData:
	try:
	bookList["Book Name"] = data.find("div",{"class":"content"}).text
	except:
	bookList["Author Name"] = data.text
	self.list.append(bookList)


	def getData(self):
	self.getPageContent(self.url)
	for num in range(2,self.getPageNum()+1):
	url = "https://thawing-ridge-65567.herokuapp.com/books?page="+str(num)
	self.getPageContent(url)