Skip to content

Instantly share code, notes, and snippets.

@ClaireSoftware
Created June 21, 2020 19:24
Show Gist options
  • Save ClaireSoftware/6ce111a2b927deb1d3684847bb17bd57 to your computer and use it in GitHub Desktop.
Save ClaireSoftware/6ce111a2b927deb1d3684847bb17bd57 to your computer and use it in GitHub Desktop.
Scrape locations from pokemonDB
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from bs4 import BeautifulSoup
import requests
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
# The ID and range of a sample spreadsheet.
SAMPLE_SPREADSHEET_ID = 'SPREADSHEET_ID'
SAMPLE_RANGE_NAME = 'Sheet1!A2:A252'
def main():
"""Shows basic usage of the Sheets API.
Prints values from a sample spreadsheet.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('sheets', 'v4', credentials=creds)
# Call the Sheets API
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=SAMPLE_SPREADSHEET_ID,
range=SAMPLE_RANGE_NAME).execute()
values = result.get('values', [])
locations = []
for value in values:
print(value)
URL = "https://pokemondb.net/pokedex/" + value[0] + "#dex-locations"
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
divs = soup.findAll("div")
for div in divs:
h2 = div.find("h2")
if h2 and h2.get_text().startswith('Where to find'):
silverlocation = div
for tr in silverlocation.findAll("tr"):
span = tr.find("th").find("span")
# change this for your desired gen
if span['class'][1] == "gold":
locations.append([(tr.find("td").get_text())])
locations2 = []
print(locations)
body = {
'values' : locations
}
result2 = service.spreadsheets().values().update(
spreadsheetId=SAMPLE_SPREADSHEET_ID, range="Sheet1!h2:h252",
valueInputOption='RAW', body=body).execute()
print('{0} cells updated.'.format(result.get('updatedCells')))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment