Last active
February 5, 2019 21:35
-
-
Save mottet-dev/99f3321cab8e24eaa543816922286442 to your computer and use it in GitHub Desktop.
Real-time Scraping - Simple Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
r = requests.get('https://store.steampowered.com/search/?term=The+witcher') | |
soup = BeautifulSoup(r.text, 'html.parser') | |
resultsRow = soup.find_all('a', {'class': 'search_result_row'}) | |
results = [] | |
for resultRow in resultsRow: | |
gameURL = resultRow.get('href') | |
title = resultRow.find('span', {'class': 'title'}).text | |
releaseDate = resultRow.find('div', {'class': 'search_released'}).text | |
imgURL = resultRow.select('div.search_capsule img')[0].get('src') | |
price = None | |
discountedPrice = None | |
# The price is a bit more tricky to get since it is not always there | |
# Basically we're checking if it is there, then we get rid of the | |
# whitespaces. | |
# Then we look for a discounted price, if it exists, we get the full | |
# text of the div.search_price selector. This will return the full | |
# price appended at the discounted price. | |
# We then simply replace the price by an empty string to get only | |
# the value of the discounted price. | |
if (resultRow.select('div.search_price span strike')): | |
price = resultRow.select('div.search_price span strike')[ | |
0].text.strip(' \t\n\r') | |
if (resultRow.select('div.search_price')): | |
rawDiscountPrice = resultRow.select( | |
'div.search_price')[0].text.strip(' \t\n\r') | |
discountedPrice = rawDiscountPrice.replace(price, '') | |
# Once formatted, the data are then appended to the results list | |
results.append({ | |
'gameURL': gameURL, | |
'title': title, | |
'releaseDate': releaseDate, | |
'imgURL': imgURL, | |
'price': price, | |
'discountedPrice': discountedPrice | |
}) | |
print(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment