Skip to content

Instantly share code, notes, and snippets.

@aflansburg
Created November 2, 2017 02:42
Show Gist options
  • Save aflansburg/353d5e3c24315a6bfa9d77ec726267a9 to your computer and use it in GitHub Desktop.
Save aflansburg/353d5e3c24315a6bfa9d77ec726267a9 to your computer and use it in GitHub Desktop.
Scrape an Amazon product review IFrame (returned from the Product Advertising API for a particular ASIN)
from selenium import webdriver
# Python 3.6.3
# hold on to your butts
driver_path = '/usr/local/bin/chromedriver'
url = 'https://www.amazon.com/reviews/iframe?akid=AKIAIOWAH2MM2J3QSNPA&alinkCode=xm2&asin=B006R7AW6M&atag=AssociateTag%3Dsomeutility-20&exp=2017-11-02T21%3A59%3A44Z&v=2&sig=ljBbKJxQiq%252F90us8lfn1uDQ7VmXr%252BDknLvJ49jIsaHU%253D'
try:
browser = webdriver.Chrome(executable_path=driver_path)
browser.get(url)
html = browser.page_source
# NOTE: product name will have to be gleaned elsewhere (product advertising api?)
# grab the element containing the total number of reviews
totalReviews = int(str(browser.find_element_by_class_name('tiny').text).replace(' Reviews', ''))
# grab the element containing the number of 5 star reviews and parse out everything
# except for the percentage and then convert it to a float
fiveStarsP = str(browser.find_element_by_class_name('histoRowFive').text)
fiveStarsP = fiveStarsP.replace('\n', '').replace('5 star', '').replace('%', '')
fiveStarsP = float(float(fiveStarsP)/100)
fiveStars = totalReviews * fiveStarsP
fiveStars = int(round(fiveStars, 0))
fourStarsP = str(browser.find_element_by_class_name('histoRowFour').text)
fourStarsP = fourStarsP.replace('\n', '').replace('4 star', '').replace('%', '')
fourStarsP = float(float(fourStarsP)/100)
fourStars = totalReviews * fourStarsP
fourStars = int(round(fourStars, 0))
threeStarsP = str(browser.find_element_by_class_name('histoRowThree').text)
threeStarsP = threeStarsP.replace('\n', '').replace('3 star', '').replace('%', '')
threeStarsP = float(float(threeStarsP)/100)
threeStars = totalReviews * threeStarsP
threeStars = int(round(threeStars, 0))
twoStarsP = str(browser.find_element_by_class_name('histoRowTwo').text)
twoStarsP = twoStarsP.replace('\n', '').replace('2 star', '').replace('%', '')
twoStarsP = float(float(twoStarsP)/100)
twoStars = totalReviews * twoStarsP
twoStars = int(round(twoStars, 0))
oneStarP = str(browser.find_element_by_class_name('histoRowOne').text)
oneStarP = oneStarP.replace('\n', '').replace('1 star', '').replace('%', '')
oneStarP = float(float(oneStarP)/100)
oneStar = totalReviews * oneStarP
oneStar = int(round(oneStar, 0))
print(f'Total reviews: {totalReviews}\n')
print(f'Total number of 1 star reviews: {oneStar} ({int(oneStarP*100)}%)')
print(f'Total number of 2 star reviews: {twoStars} ({int(twoStarsP*100)}%)')
print(f'Total number of 3 star reviews: {threeStars} ({int(threeStarsP*100)}%)')
print(f'Total number of 4 star reviews: {fourStars} ({int(fourStarsP*100)}%)')
print(f'Total number of 5 star reviews: {fiveStars} ({int(fiveStarsP*100)}%)')
# this will close the browser
browser.close()
except:
print('Something died....')
if browser:
browser.close()
@aflansburg
Copy link
Author

Requires chromedriver

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment