Last active
January 26, 2020 22:39
-
-
Save paralax/93d781621f54dcc87a6e to your computer and use it in GitHub Desktop.
quick and dirty skim over the list of CA breach notices
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from collections import Counter | |
import itertools | |
import urllib | |
from BeautifulSoup import BeautifulSoup | |
import timestring | |
URL="http://oag.ca.gov/ecrime/databreach/list" | |
html = urllib.urlopen(URL).read() | |
soup = BeautifulSoup(html) | |
trs = soup.findAll('tr') | |
trs = filter(lambda x: x.attrs != [], trs) | |
# -1 is date reported | |
# -2 is date breached, if known (or n/a) | |
dates = [ x.findAll('td')[-2].text.split(',') for x in trs ] | |
dates = filter(lambda x: x != 'n/a', list(itertools.chain.from_iterable(dates))) | |
# counts by year | |
print Counter([ x.year for x in map(timestring.Date, dates) ]) | |
# most common reporting orgs | |
print Counter([ x.findAll('td')[0].text for x in trs ]).most_common() | |
""" | |
breaches by year: {2014: 180, 2013: 155, 2012: 146, 2015: 32, 2011: 26, 2010: 3, 2007: 1, 2009: 1} | |
most common orgs breached: | |
(u'American Express Travel Related Services Company, Inc and /or its Affiliates (\u201cAXP\u201d)', | |
61), | |
(u'Discover Financial Services', 12), | |
(u'Massachusetts Mutual Life Insurance Company', 5), | |
(u'Yolo Federal Credit Union', 3), | |
(u'California Department of Public Health', 3), | |
(u'East West Bank', 3), | |
(u'California Department of Corrections and Rehabilitation', 3) | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment