Skip to content

Instantly share code, notes, and snippets.

@lindsaycarbonell
Created November 21, 2017 19:48
Show Gist options
  • Save lindsaycarbonell/9a328c951d33d74467ed3e1ab44095f6 to your computer and use it in GitHub Desktop.
Save lindsaycarbonell/9a328c951d33d74467ed3e1ab44095f6 to your computer and use it in GitHub Desktop.
Race/Gender NCDPI Scraper
import requests
import http.cookiejar, urllib.request
import pandas as pd
import webbrowser
from bs4 import BeautifulSoup
url = "http://apps.schools.nc.gov/ords/f?p=1:220:1214343525970901::NO::P220_SELECTLEA:920"
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html, "html.parser")
link_param = soup.find("div", attrs={'class':'t17CVS'}).a['href'];
link_to_click = "http://apps.schools.nc.gov/ords/" + link_param;
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
f = open("file.xls", "wb")
res = opener.open(link_to_click)
f.write(res.read())
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment