Skip to content

Instantly share code, notes, and snippets.

@policevideorequests
Created February 19, 2015 04:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save policevideorequests/fea0441cefa37c6b1800 to your computer and use it in GitHub Desktop.
Save policevideorequests/fea0441cefa37c6b1800 to your computer and use it in GitHub Desktop.
import os
import re
os.system('rm *.pdf')
os.system('wget -nd -r -l 1 -A pdf http://www.seattle.gov/opa/closed-case-summaries')
files = sorted([f for f in os.listdir('.') if f.endswith('.pdf')])
for filename in files:
os.system('pdf2txt.py %s > %s' % (filename, filename[:-4]+'.txt'))
files = sorted([f for f in os.listdir('.') if f.endswith('.txt')])
opa_files = []
for filename in files:
f = open(filename, 'r')
opa_file = f.read()
opa_file_dict = {}
print opa_file
print filename
try:
opa_file_dict['Complaint number'] = re.search('Complaint Number(.*?)\n', opa_file).group(1).strip(' :')
opa_file_dict['Issued date'] = re.search('Issued Date(.*?)\n', opa_file).group(1).strip(' :')
# OPA Finding
#
#Not Sustained (Lawful & Proper)
opa_file_dict['OPA finding'] = re.search('OPA Finding\s*(.*?)\n\n', opa_file).group(1).strip(' :')
#Final Discipline
#
#N/A
opa_file_dict['Final discipline'] = re.search('Final Discipline\s*(.*?)\n\n', opa_file).group(1).strip(' :')
opa_file_dict['Incident synopsis'] = re.search('INCIDENT SYNOPSIS(.*?)(Complaint|COMPLAINT)', opa_file, re.MULTILINE|re.DOTALL).group(1).strip(' :\n').replace('\n', '')
opa_file_dict['Complaint'] = re.search('COMPLAINT(.*?)(Complaint|INVESTIGATION)', opa_file, re.MULTILINE|re.DOTALL).group(1).strip(' :\n').replace('\n', '')
opa_file_dict['Investigation'] = re.search('INVESTIGATION(.*?)(Complaint|ANALYSIS AND CONCLUSION)', opa_file, re.MULTILINE|re.DOTALL).group(1).strip(' :\n').replace('\n', '')
opa_file_dict['Analysis and conclusion'] = re.search('ANALYSIS AND CONCLUSION(.*?)(Complaint|FINDINGS)', opa_file, re.MULTILINE|re.DOTALL).group(1).strip(' :\n').replace('\n', '')
opa_file_dict['Findings'] = re.search('FINDINGS(.*?)NOTE', opa_file, re.MULTILINE|re.DOTALL).group(1).strip(' :\n').replace('\n', '')
#opa_file_dict['complaint'] = re.search('COMPLAINT\s*(.*?)\n\n\n', opa_file).group(1).strip(' :')
except:
continue
opa_files.append(opa_file_dict)
f.close()
#print opa_files
html = """
<title>Seattle Police OPA Closed Case Summaries</title>
<h1>OPA Closed Case Summaries</h1>
<p>This table is automatically compiled from the PDFs at <a href="http://www.seattle.gov/opa/closed-case-summaries">http://www.seattle.gov/opa/closed-case-summaries</a>. The source code is at </p><style>
@import url('static/css/reset.css');
body {
font:.7em Arial;
}
p {
margin:10px 10px 0 10px;}
table {
margin:10px;
border-collapse:collapse;
}
th,td {
border:1px solid #000;
padding:5px;
text-align:left;
vertical-align:top;
}
th {
font-weight:bold;
}
h1 {
background:#000;
color:#FFF;
padding:10px;
font-size:2em;
font-weight:bold;
}
h2 {
font-weight:bold;
font-size:1.5em;
}
h3 {
font-size:1.25em;
}
#main {
padding:10px;
}
p, table, ol, h2, h3, h4, h5 {
margin-bottom:10px;
}
ol {
padding-left:20px;
}
ol li {
list-style-type: decimal;
}
</style>
<table>"""
ordered_heading = ['Complaint number', 'Issued date', 'OPA finding', 'Final discipline', 'Incident synopsis', 'Complaint', 'Investigation', 'Analysis and conclusion', 'Findings']
html += "<tr>"
for header in ordered_heading:
html += "<th>%s</th>" % (header)
html += "</tr>"
for opa_complaint in opa_files:
html += "<tr>"
for header in ordered_heading:
html += "<td>%s</td>" % (opa_complaint[header])
html += "</tr>"
html += "</tr>"
html += "</table>"
f = open('/var/www/html/opa_summaries.html', 'w')
f.write(html)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment