Skip to content

Instantly share code, notes, and snippets.

View theriley106's full-sized avatar

Chris Lambert theriley106

View GitHub Profile
@theriley106
theriley106 / congressEducation.py
Created December 9, 2017 19:17
Web Scraping congress education information
import requests
import bs4
import re
import json
listOfCongress = []
url = 'https://www.govtrack.us/congress/members/current?sort=sortname&page=1&faceting=false&allow_redirect=false&do_search=1'
congressCount = requests.get(url).json()['total']
information = {}
@theriley106
theriley106 / dataSetCreator.py
Created December 12, 2017 18:53
Net Neutrality Voting
import bs4
import requests
import csv
import json
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
res = requests.get('https://www.theverge.com/2017/12/11/16746230/net-neutrality-fcc-isp-congress-campaign-contribution', headers=headers)
page = bs4.BeautifulSoup(res.text, 'lxml')
dataBase = []
with open('example.csv', 'rb') as f:
reader = csv.reader(f)
@theriley106
theriley106 / scrapeCDS.py
Created January 20, 2018 21:51
Grabbing College Common Data Sets
import re
from selenium import webdriver
import bs4
def extractCollegeName(title):
print title
for parts in title.split("-"):
if 'university' in str(parts).lower() or 'college' in str(parts).lower() or 'institute' in str(parts).lower():
return parts
return title
import csv
stores = {}
with open('Database.csv', 'r') as f:
reader = csv.reader(f)
your_list = list(reader)
for line in your_list:
stores[line[0]] = line[1]
while True:
import requests
import bs4
import json
def grabSite(url):
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/60.0.3112.113 Chrome/60.0.3112.113 Safari/537.36'}
return requests.get(url, headers=headers)
if __name__ == '__main__':
DB = []
@theriley106
theriley106 / satDataGrab.py
Created January 24, 2018 00:07
Python program to grab SAT Information
import json
SAT_2017_TAKERS = 1715481
def inverse():
for key, value in satListz.items():
value = int(value)
key = int(key)
if value not in flipped:
flipped[value] = [key]
else:
@theriley106
theriley106 / scrapingGrammyAwards.py
Last active February 3, 2021 22:34
Python script to scrape Grammy Award winners and categories
import requests
import bs4
import json
DB = []
def grabSite(url):
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
return requests.get(url, headers=headers, timeout=10)
@theriley106
theriley106 / housingScrape.py
Created February 8, 2018 03:05
Scraping Valid Addresses from all US ZipCodes
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
import requests
import bs4
import zipcode
import threading
import re
import json
import time
@theriley106
theriley106 / patd.py
Created February 9, 2018 14:20
Script used to get the data in this dataset: https://www.kaggle.com/theriley106/panic-at-the-dataset/
import requests
import bs4
import re
import json
from textblob import TextBlob
def getLyricSentiment(lyrics):
lyrics = re.sub('\s+',' ',lyrics)
return TextBlob(lyrics).sentiment.polarity
import sys
# sys.setdefaultencoding() does not exist, here!
reload(sys) # Reload does the trick!
sys.setdefaultencoding('UTF8')
import json
import glob
import traceback
DB = {"Total": 0}
finalList = {}
listF = []