Python BeautifulSoup Scraper that scrapes book covers, titles, descriptions, average rating, rating and authors from
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import re
url= ""
page = requests.get(url)
soup = bs(page.content, 'html.parser')
titles = soup.find_all('a', class_='bookTitle')
authors = soup.find_all('a', class_='authorName')
ratings = soup.find_all('span', attrs={'class':'greyText smallText'})
df = pd.DataFrame(columns=['title', 'description' ,'author', 'image', 'avg_rating', 'rating_count'])
for title, author, rating in zip(titles, authors, ratings):
book_page = requests.get("" + title["href"])
book_soup = bs(book_page.content, 'html.parser')
for item in book_soup.find_all(attrs={'data-text-id': True}):
bookID = item['data-text-id']
bookIDContainer = 'freeTextContainer'+bookID
desc = book_soup.find('span', id=bookIDContainer)
description = desc.get_text()
image = book_soup.find('img', id='coverImage')
image = image['src']
title = title.get_text()
title = re.sub("[\(\[].*?[\)\]]", "", title)
author = author.get_text()
avg_rating ='avg rating ([\d.]+)', rating.text)
rating_count ='([\d,]+) ratings', rating.text)
df2 = pd.DataFrame([[title, description, author, image,,]],columns=['title', 'description' ,'author', 'image', 'avg_rating', 'rating_count'])
df = df.append(df2)
df.to_csv('temp.csv', index=False, encoding='utf-8')
