Skip to content

Instantly share code, notes, and snippets.

@philshem
Last active November 4, 2019 11:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philshem/2d84ce8d3cc82f7a7364d867592e557c to your computer and use it in GitHub Desktop.
Save philshem/2d84ce8d3cc82f7a7364d867592e557c to your computer and use it in GitHub Desktop.
NYTimes Spelling Bee scraper ๐Ÿโ˜ ๏ธ
#!/usr/bin/env python3
import requests
from bs4 import BeautifulSoup
import json
def main():
# the answers are stored as a json inside the page source
url = 'https://www.nytimes.com/puzzles/spelling-bee'
r = requests.get(url)
soup = BeautifulSoup(r.text,'html5lib')
element = soup.find('div', class_='pz-game-screen')
element = element.find('script')
data = element.text.replace('window.gameData = ','')
data = json.loads(data)
print(json.dumps(data, indent = 4))
dt = data.get('today').get('printDate')
with open(dt+'.json','w') as fp:
json.dump(data,fp)
print(dt)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment