Created
June 21, 2020 19:24
-
-
Save ClaireSoftware/6ce111a2b927deb1d3684847bb17bd57 to your computer and use it in GitHub Desktop.
Scrape locations from pokemonDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import pickle | |
import os.path | |
from googleapiclient.discovery import build | |
from google_auth_oauthlib.flow import InstalledAppFlow | |
from google.auth.transport.requests import Request | |
from bs4 import BeautifulSoup | |
import requests | |
# If modifying these scopes, delete the file token.pickle. | |
SCOPES = ['https://www.googleapis.com/auth/spreadsheets'] | |
# The ID and range of a sample spreadsheet. | |
SAMPLE_SPREADSHEET_ID = 'SPREADSHEET_ID' | |
SAMPLE_RANGE_NAME = 'Sheet1!A2:A252' | |
def main(): | |
"""Shows basic usage of the Sheets API. | |
Prints values from a sample spreadsheet. | |
""" | |
creds = None | |
# The file token.pickle stores the user's access and refresh tokens, and is | |
# created automatically when the authorization flow completes for the first | |
# time. | |
if os.path.exists('token.pickle'): | |
with open('token.pickle', 'rb') as token: | |
creds = pickle.load(token) | |
# If there are no (valid) credentials available, let the user log in. | |
if not creds or not creds.valid: | |
if creds and creds.expired and creds.refresh_token: | |
creds.refresh(Request()) | |
else: | |
flow = InstalledAppFlow.from_client_secrets_file( | |
'credentials.json', SCOPES) | |
creds = flow.run_local_server(port=0) | |
# Save the credentials for the next run | |
with open('token.pickle', 'wb') as token: | |
pickle.dump(creds, token) | |
service = build('sheets', 'v4', credentials=creds) | |
# Call the Sheets API | |
sheet = service.spreadsheets() | |
result = sheet.values().get(spreadsheetId=SAMPLE_SPREADSHEET_ID, | |
range=SAMPLE_RANGE_NAME).execute() | |
values = result.get('values', []) | |
locations = [] | |
for value in values: | |
print(value) | |
URL = "https://pokemondb.net/pokedex/" + value[0] + "#dex-locations" | |
page = requests.get(URL) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
divs = soup.findAll("div") | |
for div in divs: | |
h2 = div.find("h2") | |
if h2 and h2.get_text().startswith('Where to find'): | |
silverlocation = div | |
for tr in silverlocation.findAll("tr"): | |
span = tr.find("th").find("span") | |
# change this for your desired gen | |
if span['class'][1] == "gold": | |
locations.append([(tr.find("td").get_text())]) | |
locations2 = [] | |
print(locations) | |
body = { | |
'values' : locations | |
} | |
result2 = service.spreadsheets().values().update( | |
spreadsheetId=SAMPLE_SPREADSHEET_ID, range="Sheet1!h2:h252", | |
valueInputOption='RAW', body=body).execute() | |
print('{0} cells updated.'.format(result.get('updatedCells'))) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment