Skip to content

Instantly share code, notes, and snippets.

@theriley106
Created January 21, 2018 23:13
Show Gist options
  • Save theriley106/ba40d28e3c317e38adba22d0bbe43a72 to your computer and use it in GitHub Desktop.
Save theriley106/ba40d28e3c317e38adba22d0bbe43a72 to your computer and use it in GitHub Desktop.
import requests
import bs4
import json
def grabSite(url):
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/60.0.3112.113 Chrome/60.0.3112.113 Safari/537.36'}
return requests.get(url, headers=headers)
if __name__ == '__main__':
DB = []
for i in range(1,17):
url = "https://www.usnews.com/best-colleges/rankings/national-universities?_mode=table&_page={}&format=json".format(i)
print url
res = grabSite(url).json()
for schools in res['data']['items']:
information = {}
for key, val in schools['institution'].items():
information[key] = val
for key, val in schools['searchData'].items():
information[key] = schools['searchData'][key]['rawValue']
DB.append(information)
with open('data.json', 'w') as fp:
json.dump(DB, fp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment