All material used for dgmde15
still dumping material in here
import requests, time | |
from bs4 import BeautifulSoup | |
# We've now imported the two packages that will do the heavy lifting | |
# for us, reqeusts and BeautifulSoup | |
# This is the URL that lists the current inmates | |
# Should this URL go away, and archive is available at | |
# http://perma.cc/2HZR-N38X | |
url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/' |
time.sleep(1) |
$ python process.py | |
CRAIG ELTON GILLEN, 20 | |
White Male from SPRING HILL, IA | |
Booked at 7/6/2015 11:51 AM | |
JEREMY MONTEZ AMERISON SMITH, 27 | |
Black Male from CLIVE, IA | |
Booked at 7/6/2015 11:45 AM | |
. |
inmate_cities = {} | |
for inmate in inmates: | |
if inmate['city'] in inmate_cities: | |
inmate_cities[inmate['city']] += 1 | |
else: | |
inmate_cities[inmate['city']] = 1 | |
print inmate_cities |
inmates = [] | |
for inmate_link in inmates_links[:10]: | |
r = requests.get(inmate_link) | |
soup = BeautifulSoup(r.text) | |
inmate_details = {} | |
inmate_profile_rows = soup.select("#inmateProfile tr") | |
inmate_details['age'] = inmate_profile_rows[0].findAll('td')[0].text.strip() |
url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/' | |
r = requests.get(url_to_scrape) | |
soup = BeautifulSoup(r.text) | |
inmates_links = [] | |
for table_row in soup.select(".inmatesList tr"): | |
table_cells = table_row.findAll('td') |
import requests | |
from bs4 import BeautifulSoup |
import requests | |
from bs4 import BeautifulSoup | |
import time | |
# We've now imported the two packages that will do the heavy lifting | |
# for us, reqeusts and BeautifulSoup | |
# This is the URL that lists the current inmates | |
# Should this URL go away, and archive is available at | |
# http://perma.cc/2HZR-N38X |
still dumping material in here
import requests | |
from bs4 import BeautifulSoup | |
# We've now imported the two packages that will do the heavy lifting | |
# for us, reqeusts and BeautifulSoup | |
# Let's put the URL of the page we want to scrape in a variable | |
# so that our code down below can be a little cleaner | |
url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/' |