Skip to content

Instantly share code, notes, and snippets.

@kcecireyes
Last active March 8, 2018 21:29
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kcecireyes/07db9ca13b8ab5cce83e4d567b2c1251 to your computer and use it in GitHub Desktop.
Save kcecireyes/07db9ca13b8ab5cce83e4d567b2c1251 to your computer and use it in GitHub Desktop.
# TODO: output something meaninful if parser fails on caught exceptions
from bs4 import BeautifulSoup
import pandas as pd
import requests, time
e = pd.DataFrame(pd.read_csv('ids.csv'))
# Assume the first 2 digits are from a year in the 2000s
# Assume that the DIVCODE is 2 digits long (MM)
# Assume case number is 6 digits long (even if digs into DIVCODE)
def parseCase(case):
yr = case[0:2]
divcode = case[2:4]
no = case[-6:]
return "20"+yr, divcode, no
new = {}
def get_info(case):
year, divcode, no = parseCase(case['CaseNum'])
try:
url = ''
r = requests.get(url)
answer = r.text
page = BeautifulSoup(answer, "lxml")
trs = page.table.find_next_siblings()[2].findAll('tr')
if trs[6].td.text == 'Defendant(s)':
d = [trs[7].td.text, trs[8].td.text]
else:
d = [trs[6].td.text, trs[7].td.text]
print d, case['id']
new[case['id']] = d
return case
except requests.exceptions.ConnectionError:
print 'connection error'
print 'waiting...'
time.sleep(5)
except AttributeError:
print 'none type, bro'
pass
except IndexError:
print 'index out of range, yo'
pass
e = e[((e['id'] > 1995))].apply(get_info, axis=1)
m = pd.DataFrame({'ID' : new.keys() , 'info' : new.values() })
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment