Skip to content

Instantly share code, notes, and snippets.

Avatar

Matthew Phillips phillipsm

View GitHub Profile
@phillipsm
phillipsm / gist:8afdcf295b90691810e5
Created Jul 14, 2015
Python 3 version of Journalist's Resource Tip Sheet web scraping script
View gist:8afdcf295b90691810e5
import requests, time
from bs4 import BeautifulSoup
# We've now imported the two packages that will do the heavy lifting
# for us, reqeusts and BeautifulSoup
# This is the URL that lists the current inmates
# Should this URL go away, and archive is available at
# http://perma.cc/2HZR-N38X
url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/'
View gist:3d61b7b38317a962ebd2
$ python process.py
CRAIG ELTON GILLEN, 20
White Male from SPRING HILL, IA
Booked at 7/6/2015 11:51 AM
JEREMY MONTEZ AMERISON SMITH, 27
Black Male from CLIVE, IA
Booked at 7/6/2015 11:45 AM
.
@phillipsm
phillipsm / gist:29d4cb4addb5c5a21ae7
Created Jun 24, 2015
Sum and print aggregations
View gist:29d4cb4addb5c5a21ae7
inmate_cities = {}
for inmate in inmates:
if inmate['city'] in inmate_cities:
inmate_cities[inmate['city']] += 1
else:
inmate_cities[inmate['city']] = 1
print inmate_cities
View gist:1f272a7caec08e44df2f
inmates = []
for inmate_link in inmates_links[:10]:
r = requests.get(inmate_link)
soup = BeautifulSoup(r.text)
inmate_details = {}
inmate_profile_rows = soup.select("#inmateProfile tr")
inmate_details['age'] = inmate_profile_rows[0].findAll('td')[0].text.strip()
View gist:7199f931a2de6787c0b6
url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/'
r = requests.get(url_to_scrape)
soup = BeautifulSoup(r.text)
inmates_links = []
for table_row in soup.select(".inmatesList tr"):
table_cells = table_row.findAll('td')
View gist:2bdb5f622cbabe107c5b
import requests
from bs4 import BeautifulSoup
View gist:404780e419c49a5b62a8
import requests
from bs4 import BeautifulSoup
import time
# We've now imported the two packages that will do the heavy lifting
# for us, reqeusts and BeautifulSoup
# This is the URL that lists the current inmates
# Should this URL go away, and archive is available at
# http://perma.cc/2HZR-N38X
@phillipsm
phillipsm / gist:c832c825c994735b31fe
Last active Aug 29, 2015
All material for dgmde15
View gist:c832c825c994735b31fe

All material used for dgmde15

still dumping material in here

@phillipsm
phillipsm / gist:0ed98b2585f0ada5a769
Last active Apr 30, 2022
Example of parsing a table using BeautifulSoup and requests in Python
View gist:0ed98b2585f0ada5a769
import requests
from bs4 import BeautifulSoup
# We've now imported the two packages that will do the heavy lifting
# for us, reqeusts and BeautifulSoup
# Let's put the URL of the page we want to scrape in a variable
# so that our code down below can be a little cleaner
url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/'