Last active
December 24, 2019 21:55
-
-
Save lowweihong/d0f277bff4f2ba99eb99dc1c909a3047 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gspread | |
from oauth2client.service_account import ServiceAccountCredentials | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
# Access credentials for google sheet and access the google sheet | |
scope = ['https://spreadsheets.google.com/feeds', | |
'https://www.googleapis.com/auth/drive'] | |
# Copy your path to your credential JSON file. | |
PATH_TO_CREDENTIAL = '/Users/bla/credential/data science-4936da0d6b1f.json' | |
# Initiate your credential | |
credentials = ServiceAccountCredentials.from_json_keyfile_name(PATH_TO_CREDENTIAL, scope) | |
# Authorize your connection to your google sheet | |
gc = gspread.authorize(credentials) | |
# Access google sheet | |
sht1 = gc.open_by_key('11DU3qbPoXlbUXJ-o_zerndJdqQ8pNBMsAZpeBfbwyc0') | |
# Get the data from the google sheet | |
df = pd.DataFrame(sht1.worksheet("Sheet1").get_all_values()[1:]) | |
# Rename column name | |
df.columns =df.iloc[0] | |
df.drop(df.index[0], inplace=True) | |
# Make a request to the competitor's link | |
header = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36", | |
"upgrade-insecure-requests": "1", | |
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", | |
"accept-encoding": "gzip, deflate, br", | |
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8", | |
"cache-control": "max-age=0"} | |
res = requests.get(df['Competitor Link'].iloc[0], headers=header) | |
# Get the price from the response | |
soup = BeautifulSoup(res.text) | |
price = soup.find("span", {"id": "priceblock_ourprice"}).text.replace("$","") | |
# Update the crawled price to the google sheet with: | |
# sheet name: Sheet1 | |
# cell name: D3 | |
sht1.worksheet("Sheet1").update_acell('D3', price) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
Thank you for your quick reply.
Your suggestion works a little bit better :)
I'm not sure what the data type is , nor if the price array is having the same length as the cell list.
How would you expand your own script for more items?