Skip to content

Instantly share code, notes, and snippets.

Created August 15, 2020 20:34
Show Gist options
  • Save kipronokoech/cf752f917bf0230b152112c3ca7704c6 to your computer and use it in GitHub Desktop.
Save kipronokoech/cf752f917bf0230b152112c3ca7704c6 to your computer and use it in GitHub Desktop.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import numpy as np
import re
def StarConversion(value):
if value == "One":
return 1
elif value == "Two":
return 2
elif value == "Three":
return 3
elif value == "Four":
return 4
elif value == "Five":
return 5
# Scrape one category # Travel
#Set up the path to the chrome driver
PATH = "/home/kiprono/chromedriver"
driver = webdriver.Chrome(PATH)
# Lets find all books in the page
incategory = driver.find_elements_by_class_name("product_pod")
#Generate a list of links for each and every book
links = []
for i in range(len(incategory)):
item = incategory[i]
#get the href property
a = item.find_element_by_tag_name("h3").find_element_by_tag_name("a").get_property("href")
#Append the link to list links
all_details = []
# Lets loop through each link to acces the page of each book
for link in links:
# get one book url
# title of the book
title = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/h1")
# price of the book
price = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/p[1]")
# stock - number of copies available for the book
stock = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/p[2]")
# Stock comes as string
stock = int(re.findall("\d+",stock.text)[0])
# Stars - Actual stars are in the tag attribute
stars = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/p[3]").get_attribute("class")
# convert string to number. Stars are like One, Two, Three ... We need 1,2,3,...
stars = StarConversion(stars.split()[1])
# Description
description = driver.find_element_by_xpath("//*[@id='content_inner']/article/p")
description = description.text
description = None
upc = driver.find_element_by_xpath("//*[@id='content_inner']/article/table/tbody/tr[1]/td")
# Tax imposed in the book
tax = driver.find_element_by_xpath("//*[@id='content_inner']/article/table/tbody/tr[5]/td")
# Category of the book
category_a = driver.find_element_by_xpath("//*[@id='default']/div/div/ul/li[3]/a")
# Define a dictionary with details we need
r = {
"3Stock": stock,
"4Stars": stars,
"8Description": description
# append r to all details
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment