Skip to content

Instantly share code, notes, and snippets.

@kipronokoech
Created August 15, 2020 19:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kipronokoech/73e87ab9042ca76ce774842900f16115 to your computer and use it in GitHub Desktop.
Save kipronokoech/73e87ab9042ca76ce774842900f16115 to your computer and use it in GitHub Desktop.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import numpy as np
import re
#Set up the path to the chrome driver
PATH = "/home/kiprono/chromedriver"
driver = webdriver.Chrome(PATH)
#parse the page source using get() function
driver.get("http://books.toscrape.com/catalogue/category/books_1/page-1.html")
#We find all the books in the page and just use 1
incategory = driver.find_elements_by_class_name("product_pod")[0]
#local the URL to open the contents of the book
a = incategory.find_element_by_tag_name("h3").find_element_by_tag_name("a").get_property("href")
driver.get(a)
#locate our elements of interest on the page containing book details.
title = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/h1")
price = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/p[1]")
stock = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/p[2]")
stars = driver.find_element_by_xpath("//*[@id='content_inner']/article/div[1]/div[2]/p[3]").get_attribute("class")
stock = int(re.findall("\d+",stock.text)[0])
# This is a fuction to convert stars from string expressions to int
def StarConversion(value):
if value == "One":
return 1
elif value == "Two":
return 2
elif value == "Three":
return 3
elif value == "Four":
return 4
elif value == "Five":
return 5
stars = StarConversion(stars.split()[1])
description = driver.find_element_by_xpath("//*[@id='content_inner']/article/p")
upc = driver.find_element_by_xpath("//*[@id='content_inner']/article/table/tbody/tr[1]/td")
tax = driver.find_element_by_xpath("//*[@id='content_inner']/article/table/tbody/tr[5]/td")
category_a = driver.find_element_by_xpath("//*[@id='default']/div/div/ul/li[3]/a")
# all of our interest into a dictionary r
r = {
"Title":title.text,
"Stock": stock,
"Stars": stars,
"Price":price.text,
"Tax":tax.text,
"UPC":upc.text,
"Description": description.text
}
# print all contents of the dictionary
print(r)
time.sleep(3)
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment