policevideorequests/generate_web_pages_of_go_reports_with_raw_and_redacted_versions

## generate_web_pages_of_go_reports_with_raw_and_redacted_versions
# created by Tim Clemans and is open sourced under GPL
import os
import re
from datetime import datetime
from datetime import date

def calculate_age(born):
    today = date.today()
    try:
        birthday = born.replace(year=today.year)
    except ValueError: # raised when birth date is February 29 and the current year is not a leap year
        birthday = born.replace(year=today.year, month=born.month+1, day=1)
    if birthday > today:
        return today.year - born.year - 1
    else:
        return today.year - born.year

def grab_names(go_report_data):
    #print go_report_data.items()
    names = []
    #print go_report_data.keys()
    for heading in ['victims', 'witnesses', 'suspects']:
        #print heading, type(go_report_data.get('heading'))
        if isinstance(go_report_data.get(heading), list):
            #print 'list'
            for item in go_report_data.get(heading):
                if isinstance(item, dict):
                    #print 'dict'
                    if item.get('firstname'):
                        names.append(item.get('firstname'))
                        names.append(item.get('firstname').lower().capitalize())
                    if item.get('lastname'):
                        names.append(item.get('lastname'))
                        names.append(item.get('lastname').lower().capitalize())
    return names

def get_incident_type(go_report):
    return go_report.split('\n')[7]


def get_go_report_number(go_report):
    m = re.search('\d{4}-\d{6}', go_report)
    if m:
        return m.group()
    else:
        return None

def get_number_of_pages(go_report):
    m = re.search('Page \d+ of (?P<pages>\d+)', go_report)
    if m:
        return int(m.group('pages'))
    else:
        return None

def get_number_of_words(go_report):
    return len(go_report.split(' '))

def get_number_of_victims(go_report):
    return len(re.findall('VICTIM # \d+', go_report) + re.findall('VICT/SUSPECT', go_report))

def get_number_of_witnesses(go_report):
    return len(re.findall('WITNESS # \d+', go_report))

def get_number_of_arrested(go_report):
    return len(re.findall('JUV \- ARRES # \d+', go_report))

def get_number_of_suspects(go_report):
    return len(re.findall('VICT/SUSPECT', go_report))

def get_operational_status(go_report):
    m = re.search('Operational status: (.*?)\n', go_report)
    if m:
        return m.group(1)
    else:
        return None

def get_narrative(go_report):
    lines = [i.strip() for i in go_report.split('\n')]
    narrative = ''
    try:
        narrative = lines[lines.index('15 INITIAL INCIDENT DESCRIPTION / NARRATIVE:')+1:lines.index('I hereby declare (certify) under penalty of perjury under the laws of the')]
        narrative = [i for i in narrative if not i.startswith('Page')] # remove page numbers
        narrative = [i for i in narrative if not i.startswith('For: ')]  # remove For:
        narrative = [i for i in narrative if not i.startswith('SEATTLE POLICE DEPARTMENT')]  # remove
        narrative = [i for i in narrative if not i.startswith('GENERAL OFFENSE HARDCOPY')]  # remove
        narrative = [i for i in narrative if not i.startswith('PUBLIC DISCLOSURE RELEASE COPY')]  # remove
        narrative = [i for i in narrative if not i.startswith('GO#')]  # remove
        narrative = [i for i in narrative if not i.strip().startswith('LAW DEPT BY FOLLOW-UP UNIT')] # LAW DEPT BY FOLLOW-UP UNIT
        narrative = [i for i in narrative if not re.search('\d+\-\d', i)]  # remove
        narrative = [i.strip() for i in narrative if not i.startswith(']')]  # remove

        narrative = '\n'.join(narrative)
        narrative = narrative.strip().replace('\n\n', '').strip('[]')
        narrative = narrative.replace('\n', '<br/>')
        #print repr(narrative)
    except:
        pass
    return narrative

def convert_personal_info_to_dict(personal_info):
    personal_data = {}
    m = re.search('(?P<lastname>[A-Z]+), (?P<firstname>[A-Z]+) (?P<middleinital>[A-Z])', personal_info)
    if m:
        personal_data = m.groupdict()
    else:
        m = re.search('(?P<lastname>[A-Z]+), (?P<firstname>[A-Z]+)', personal_info)
        if m:
            personal_data = m.groupdict()
    personal_data.update(dict([(i[0].strip().lower().replace(' ', '_'), i[1].strip()) for i in re.findall('(.*?):(.*?)\n', personal_info)]))
    personal_data['type'] = re.search('(?P<type>[ A-Z\-/]+) # \d+', personal_info).group('type')
    try:
        personal_data['age'] = calculate_age(datetime.strptime(personal_data['date_of_birth'], '%b-%d-%Y').date())
    except:
        pass
    return personal_data

def extract_personal_info(go_report):
    lines = go_report.split('\n')
    #print len(lines)
    personal_info_start_stop = []
    looking_for = 'start'
    start = None
    stop = None
    for n, line in enumerate(lines):
        if looking_for == 'start':
            #print 'looking'
            m = re.search('[ A-Z\-/]+ # \d+', line)
            if m:
                #print 'T'
                looking_for = 'stop'
                start = n
        else:
            if line.startswith('For:'):
                stop = n
                personal_info_start_stop.append((start, stop))
                looking_for = 'start'
    personal_info = [convert_personal_info_to_dict('\n'.join(lines[start:stop])) for start, stop in personal_info_start_stop]
    data = {'witnesses': [], 'suspects': [], 'victims': [], 'victim_vehicles': [], 'stolens': []}
    for info in personal_info:
        info['type'] = info['type'].strip()
        #print info.get('type')
        if info.get('type') == 'WITNESS':
            data['witnesses'].append(info)
        elif info.get('type') == 'VICTIM':
            data['victims'].append(info)
        elif info.get('type') == 'VICTIM VEHICLE' or info.get('type') == 'STOLEN':
            data['victim_vehicles'].append(info)

        else:
            data['suspects'].append(info)

    #print personal_info_start_stop
    return data

go_reports_data = []
go_reports = [f for f in os.listdir('emailswsb/pdfs') if f.endswith('.txt')]
print 'Were redacted: %s/%s' % (len([i for i in go_reports if ('Redacted' in i or i.startswith('r_'))]), len(go_reports))
for go_report in go_reports:

    go_report_filename = go_report
    #print go_report
    f = open('emailswsb/pdfs/%s' % (go_report), 'r')
    go_report = f.read()

    # calculate ages
    # Date of birth: Oct-22-1954
    birthdays = re.findall('Date of birth: (?P<bd>[\w\d]+\-[\w\d]+\-[\w\d]+)', go_report)
    birthdays = [datetime.strptime(bd, '%b-%d-%Y').date() for bd in birthdays]
    ages = [calculate_age(bd) for bd in birthdays]
    is_minor = [age < 18 for age in ages]
    #print birthdays
    #print ages
    go_report_data = {}
    questions = [i[0].strip() for i in re.findall('\n(.*?):(.*?)\n', go_report)]
    questions_count = {}
    for q in sorted(list(set(questions))):
        questions_count[q] = questions.count(q)

    go_report_data.update(dict([(i[0].strip().lower().replace(' ', '_'), i[1].strip()) for i in re.findall('\n(.*?):(.*?)\n', go_report) if questions_count[i[0].strip()] == 1 and not re.match('\d+(.*?)', i[0].strip())]))

    parser_functions = sorted([variable for variable in globals().copy() if variable.startswith('get_')])
    for parser_function in parser_functions:
        go_report_data[parser_function[4:]] = globals()[parser_function](go_report)
    go_report_data['is_redacted'] = True if (go_report_filename.startswith('r_') or 'Redacted' in go_report_filename) else False
    go_report_data.update(extract_personal_info(go_report))
    go_reports_data.append(go_report_data)
#print go_reports_data
go_reports_data_keys = []
for go_report_data in go_reports_data:
    go_reports_data_keys.extend(go_report_data.keys())
headings = sorted(list(set(go_reports_data_keys)))

#print [variable for variable in globals().copy() if variable.startswith('get_')]
#print globals()['get_number_of_pages']

import csv
import sys

f = open('go_reports.csv', 'wt')
try:
    writer = csv.writer(f)
    writer.writerow( headings )
    for go_report_data in go_reports_data:
        writer.writerow( [go_report_data.get(heading) for heading in headings] )
finally:
    f.close()

html = """
<h1>Demonstration of early days of converting Seattle Police Go reports to structured data</h1>
<h2>This is very very early days.</h2>
<p>The personal info isn't showing up in the right columns for example. There's a ton of work left to do. I haven't even begun writing parsers for the narratives.</p>
<style>
@import url('static/css/reset.css');
body {
font:.7em Arial;
}
table {
border-collapse:collapse;
}
th,td {
border:1px solid #000;
padding:5px;
text-align:left;
vertical-align:top;
}
th {
font-weight:bold;
}
h1 {
background:#000;
color:#FFF;
padding:10px;
font-size:2em;
font-weight:bold;
}
h2 {
font-weight:bold;
font-size:1.5em;
}
h3 {
font-size:1.25em;
}
#main {
padding:10px;
}
p, table, ol, h2, h3, h4, h5 {
margin-bottom:10px;
}
ol {
padding-left:20px;
}
ol li {
list-style-type: decimal;
}
</style>
<table>"""
ordered_heading = ['occurred_between', 'go_report_number','incident_type', 'narrative', 'victims', 'victim_vehicles', 'suspects', 'witnesses']

for go_report_data in go_reports_data:
    html += "<tr>"
    for header in ordered_heading+headings:
        html += "<th>%s</th>" % (header)
    html += "</tr>"
    html += "<tr>"
    for col in [go_report_data.get(heading) for heading in ordered_heading+headings]:
        if isinstance(col, list):
            html += "<td><table>"
            for i, person in enumerate(col):
                html += '<tr><th colspan="2">#%s</th></tr>' % (i)
                for k in sorted(person.keys()):
                    html += '<tr><th>%s</th><td>%s</td></tr>' % (k, person[k])
            html += "</table></td>"
        else:
            html += "<td>%s</td>" % (col)
    html += "</tr>"
html += """
</table>
"""
f = open('/var/www/html/rawbigtable.html', 'w')
f.write(html)
f.close()
#print html
html = """
<h1>Demonstration of early days of converting Seattle Police Go reports to structured data</h1>
<h2>This is very very early days.</h2>
<p>The personal info isn't showing up in the right columns for example. There's a ton of work left to do. I haven't even begun writing parsers for the narratives.</p>
<style>
@import url('static/css/reset.css');
body {
font:.7em Arial;
}
table {
border-collapse:collapse;
}
th,td {
border:1px solid #000;
padding:5px;
text-align:left;
vertical-align:top;
}
th {
font-weight:bold;
}
h1 {
background:#000;
color:#FFF;
padding:10px;
font-size:2em;
font-weight:bold;
}
h2 {
font-weight:bold;
font-size:1.5em;
}
h3 {
font-size:1.25em;
}
#main {
padding:10px;
}
p, table, ol, h2, h3, h4, h5 {
margin-bottom:10px;
}
ol {
padding-left:20px;
}
ol li {
list-style-type: decimal;
}
</style>
<table>"""

ordered_heading = ['occurred_between', 'go_report_number','incident_type', 'narrative']


import nltk
from nltk.tag.stanford import NERTagger
st = NERTagger('english.all.3class.distsim.crf.ser.gz', 'stanford-ner.jar')
for n, go_report_data in enumerate(go_reports_data):
    print 'Number:', n
    #print grab_names(go_report_data)
    html += "<tr>"
    for header in ordered_heading:
        html += "<th>%s</th>" % (header)
    html += "</tr>"
    html += "<tr>"
    for heading in ordered_heading:
        col = go_report_data.get(heading)
        if isinstance(col, list):
            html += "<td><table>"
            for i, person in enumerate(col):
                html += '<tr><th colspan="2">#%s</th></tr>' % (i)
                for k in sorted(person.keys()):
                    html += '<tr><th>%s</th><td>%s</td></tr>' % (k, person[k])
            html += "</table></td>"
        else:
            if heading == 'narrative':
                print grab_names(go_report_data)
                for name in grab_names(go_report_data):
                    col = col.replace(name, '*redacted person/organization*')
                #find names with dashes in them
                names_with_dashes = re.findall('[A-Z][a-z]+-[A-Z][a-z]+', col)
                for name_with_dash in names_with_dashes:
                    col = col.replace(name_with_dash, '*redacted person/organization*')
                capitalized_words = sorted(list(set(re.findall('[A-Z][a-z]+', col))))
                tags = st.tag(capitalized_words)
                for tag in tags:
                    print tag
                    if tag[1]=='PERSON' or tag[1]=='ORGANIZATION':
                        col = col.replace(tag[0], '*redacted person/organization*')
                print capitalized_words
                text = col

                #for sent in nltk.sent_tokenize(text):
                #    print 'working on', sent
                #    tokens = nltk.tokenize.word_tokenize(sent)
                #    tags = st.tag(tokens)
                #    for tag in tags:
                #        if tag[1]=='PERSON': print tag
                #
                #        col = col.replace(tag[0], '*R*')
                html += "<td>%s</td>" % (col)
            else:
                html += "<td>%s</td>" % (col)
    html += "</tr>"
html += """
</table>
"""
f = open('/var/www/html/bigtable.html', 'w')
f.write(html)
f.close()
	# created by Tim Clemans and is open sourced under GPL
	import os
	import re
	from datetime import datetime
	from datetime import date

	def calculate_age(born):
	today = date.today()
	try:
	birthday = born.replace(year=today.year)
	except ValueError: # raised when birth date is February 29 and the current year is not a leap year
	birthday = born.replace(year=today.year, month=born.month+1, day=1)
	if birthday > today:
	return today.year - born.year - 1
	else:
	return today.year - born.year

	def grab_names(go_report_data):
	#print go_report_data.items()
	names = []
	#print go_report_data.keys()
	for heading in ['victims', 'witnesses', 'suspects']:
	#print heading, type(go_report_data.get('heading'))
	if isinstance(go_report_data.get(heading), list):
	#print 'list'
	for item in go_report_data.get(heading):
	if isinstance(item, dict):
	#print 'dict'
	if item.get('firstname'):
	names.append(item.get('firstname'))
	names.append(item.get('firstname').lower().capitalize())
	if item.get('lastname'):
	names.append(item.get('lastname'))
	names.append(item.get('lastname').lower().capitalize())
	return names

	def get_incident_type(go_report):
	return go_report.split('\n')[7]


	def get_go_report_number(go_report):
	m = re.search('\d{4}-\d{6}', go_report)
	if m:
	return m.group()
	else:
	return None

	def get_number_of_pages(go_report):
	m = re.search('Page \d+ of (?P<pages>\d+)', go_report)
	if m:
	return int(m.group('pages'))
	else:
	return None

	def get_number_of_words(go_report):
	return len(go_report.split(' '))

	def get_number_of_victims(go_report):
	return len(re.findall('VICTIM # \d+', go_report) + re.findall('VICT/SUSPECT', go_report))

	def get_number_of_witnesses(go_report):
	return len(re.findall('WITNESS # \d+', go_report))

	def get_number_of_arrested(go_report):
	return len(re.findall('JUV \- ARRES # \d+', go_report))

	def get_number_of_suspects(go_report):
	return len(re.findall('VICT/SUSPECT', go_report))

	def get_operational_status(go_report):
	m = re.search('Operational status: (.*?)\n', go_report)
	if m:
	return m.group(1)
	else:
	return None

	def get_narrative(go_report):
	lines = [i.strip() for i in go_report.split('\n')]
	narrative = ''
	try:
	narrative = lines[lines.index('15 INITIAL INCIDENT DESCRIPTION / NARRATIVE:')+1:lines.index('I hereby declare (certify) under penalty of perjury under the laws of the')]
	narrative = [i for i in narrative if not i.startswith('Page')] # remove page numbers
	narrative = [i for i in narrative if not i.startswith('For: ')] # remove For:
	narrative = [i for i in narrative if not i.startswith('SEATTLE POLICE DEPARTMENT')] # remove
	narrative = [i for i in narrative if not i.startswith('GENERAL OFFENSE HARDCOPY')] # remove
	narrative = [i for i in narrative if not i.startswith('PUBLIC DISCLOSURE RELEASE COPY')] # remove
	narrative = [i for i in narrative if not i.startswith('GO#')] # remove
	narrative = [i for i in narrative if not i.strip().startswith('LAW DEPT BY FOLLOW-UP UNIT')] # LAW DEPT BY FOLLOW-UP UNIT
	narrative = [i for i in narrative if not re.search('\d+\-\d', i)] # remove
	narrative = [i.strip() for i in narrative if not i.startswith(']')] # remove

	narrative = '\n'.join(narrative)
	narrative = narrative.strip().replace('\n\n', '').strip('[]')
	narrative = narrative.replace('\n', '<br/>')
	#print repr(narrative)
	except:
	pass
	return narrative

	def convert_personal_info_to_dict(personal_info):
	personal_data = {}
	m = re.search('(?P<lastname>[A-Z]+), (?P<firstname>[A-Z]+) (?P<middleinital>[A-Z])', personal_info)
	if m:
	personal_data = m.groupdict()
	else:
	m = re.search('(?P<lastname>[A-Z]+), (?P<firstname>[A-Z]+)', personal_info)
	if m:
	personal_data = m.groupdict()
	personal_data.update(dict([(i[0].strip().lower().replace(' ', '_'), i[1].strip()) for i in re.findall('(.?):(.?)\n', personal_info)]))
	personal_data['type'] = re.search('(?P<type>[ A-Z\-/]+) # \d+', personal_info).group('type')
	try:
	personal_data['age'] = calculate_age(datetime.strptime(personal_data['date_of_birth'], '%b-%d-%Y').date())
	except:
	pass
	return personal_data

	def extract_personal_info(go_report):
	lines = go_report.split('\n')
	#print len(lines)
	personal_info_start_stop = []
	looking_for = 'start'
	start = None
	stop = None
	for n, line in enumerate(lines):
	if looking_for == 'start':
	#print 'looking'
	m = re.search('[ A-Z\-/]+ # \d+', line)
	if m:
	#print 'T'
	looking_for = 'stop'
	start = n
	else:
	if line.startswith('For:'):
	stop = n
	personal_info_start_stop.append((start, stop))
	looking_for = 'start'
	personal_info = [convert_personal_info_to_dict('\n'.join(lines[start:stop])) for start, stop in personal_info_start_stop]
	data = {'witnesses': [], 'suspects': [], 'victims': [], 'victim_vehicles': [], 'stolens': []}
	for info in personal_info:
	info['type'] = info['type'].strip()
	#print info.get('type')
	if info.get('type') == 'WITNESS':
	data['witnesses'].append(info)
	elif info.get('type') == 'VICTIM':
	data['victims'].append(info)
	elif info.get('type') == 'VICTIM VEHICLE' or info.get('type') == 'STOLEN':
	data['victim_vehicles'].append(info)

	else:
	data['suspects'].append(info)

	#print personal_info_start_stop
	return data

	go_reports_data = []
	go_reports = [f for f in os.listdir('emailswsb/pdfs') if f.endswith('.txt')]
	print 'Were redacted: %s/%s' % (len([i for i in go_reports if ('Redacted' in i or i.startswith('r_'))]), len(go_reports))
	for go_report in go_reports:

	go_report_filename = go_report
	#print go_report
	f = open('emailswsb/pdfs/%s' % (go_report), 'r')
	go_report = f.read()

	# calculate ages
	# Date of birth: Oct-22-1954
	birthdays = re.findall('Date of birth: (?P<bd>[\w\d]+\-[\w\d]+\-[\w\d]+)', go_report)
	birthdays = [datetime.strptime(bd, '%b-%d-%Y').date() for bd in birthdays]
	ages = [calculate_age(bd) for bd in birthdays]
	is_minor = [age < 18 for age in ages]
	#print birthdays
	#print ages
	go_report_data = {}
	questions = [i[0].strip() for i in re.findall('\n(.?):(.?)\n', go_report)]
	questions_count = {}
	for q in sorted(list(set(questions))):
	questions_count[q] = questions.count(q)

	go_report_data.update(dict([(i[0].strip().lower().replace(' ', '_'), i[1].strip()) for i in re.findall('\n(.?):(.?)\n', go_report) if questions_count[i[0].strip()] == 1 and not re.match('\d+(.*?)', i[0].strip())]))

	parser_functions = sorted([variable for variable in globals().copy() if variable.startswith('get_')])
	for parser_function in parser_functions:
	go_report_data[parser_function[4:]] = globals()[parser_function](go_report)
	go_report_data['is_redacted'] = True if (go_report_filename.startswith('r_') or 'Redacted' in go_report_filename) else False
	go_report_data.update(extract_personal_info(go_report))
	go_reports_data.append(go_report_data)
	#print go_reports_data
	go_reports_data_keys = []
	for go_report_data in go_reports_data:
	go_reports_data_keys.extend(go_report_data.keys())
	headings = sorted(list(set(go_reports_data_keys)))

	#print [variable for variable in globals().copy() if variable.startswith('get_')]
	#print globals()['get_number_of_pages']

	import csv
	import sys

	f = open('go_reports.csv', 'wt')
	try:
	writer = csv.writer(f)
	writer.writerow( headings )
	for go_report_data in go_reports_data:
	writer.writerow( [go_report_data.get(heading) for heading in headings] )
	finally:
	f.close()

	html = """
	<h1>Demonstration of early days of converting Seattle Police Go reports to structured data</h1>
	<h2>This is very very early days.</h2>
	<p>The personal info isn't showing up in the right columns for example. There's a ton of work left to do. I haven't even begun writing parsers for the narratives.</p>
	<style>
	@import url('static/css/reset.css');
	body {
	font:.7em Arial;
	}
	table {
	border-collapse:collapse;
	}
	th,td {
	border:1px solid #000;
	padding:5px;
	text-align:left;
	vertical-align:top;
	}
	th {
	font-weight:bold;
	}
	h1 {
	background:#000;
	color:#FFF;
	padding:10px;
	font-size:2em;
	font-weight:bold;
	}
	h2 {
	font-weight:bold;
	font-size:1.5em;
	}
	h3 {
	font-size:1.25em;
	}
	#main {
	padding:10px;
	}
	p, table, ol, h2, h3, h4, h5 {
	margin-bottom:10px;
	}
	ol {
	padding-left:20px;
	}
	ol li {
	list-style-type: decimal;
	}
	</style>
	<table>"""
	ordered_heading = ['occurred_between', 'go_report_number','incident_type', 'narrative', 'victims', 'victim_vehicles', 'suspects', 'witnesses']

	for go_report_data in go_reports_data:
	html += "<tr>"
	for header in ordered_heading+headings:
	html += "<th>%s</th>" % (header)
	html += "</tr>"
	html += "<tr>"
	for col in [go_report_data.get(heading) for heading in ordered_heading+headings]:
	if isinstance(col, list):
	html += "<td><table>"
	for i, person in enumerate(col):
	html += '<tr><th colspan="2">#%s</th></tr>' % (i)
	for k in sorted(person.keys()):
	html += '<tr><th>%s</th><td>%s</td></tr>' % (k, person[k])
	html += "</table></td>"
	else:
	html += "<td>%s</td>" % (col)
	html += "</tr>"
	html += """
	</table>
	"""
	f = open('/var/www/html/rawbigtable.html', 'w')
	f.write(html)
	f.close()
	#print html
	html = """
	<h1>Demonstration of early days of converting Seattle Police Go reports to structured data</h1>
	<h2>This is very very early days.</h2>
	<p>The personal info isn't showing up in the right columns for example. There's a ton of work left to do. I haven't even begun writing parsers for the narratives.</p>
	<style>
	@import url('static/css/reset.css');
	body {
	font:.7em Arial;
	}
	table {
	border-collapse:collapse;
	}
	th,td {
	border:1px solid #000;
	padding:5px;
	text-align:left;
	vertical-align:top;
	}
	th {
	font-weight:bold;
	}
	h1 {
	background:#000;
	color:#FFF;
	padding:10px;
	font-size:2em;
	font-weight:bold;
	}
	h2 {
	font-weight:bold;
	font-size:1.5em;
	}
	h3 {
	font-size:1.25em;
	}
	#main {
	padding:10px;
	}
	p, table, ol, h2, h3, h4, h5 {
	margin-bottom:10px;
	}
	ol {
	padding-left:20px;
	}
	ol li {
	list-style-type: decimal;
	}
	</style>
	<table>"""

	ordered_heading = ['occurred_between', 'go_report_number','incident_type', 'narrative']



	import nltk
	from nltk.tag.stanford import NERTagger
	st = NERTagger('english.all.3class.distsim.crf.ser.gz', 'stanford-ner.jar')
	for n, go_report_data in enumerate(go_reports_data):
	print 'Number:', n
	#print grab_names(go_report_data)
	html += "<tr>"
	for header in ordered_heading:
	html += "<th>%s</th>" % (header)
	html += "</tr>"
	html += "<tr>"
	for heading in ordered_heading:
	col = go_report_data.get(heading)
	if isinstance(col, list):
	html += "<td><table>"
	for i, person in enumerate(col):
	html += '<tr><th colspan="2">#%s</th></tr>' % (i)
	for k in sorted(person.keys()):
	html += '<tr><th>%s</th><td>%s</td></tr>' % (k, person[k])
	html += "</table></td>"
	else:
	if heading == 'narrative':
	print grab_names(go_report_data)
	for name in grab_names(go_report_data):
	col = col.replace(name, 'redacted person/organization')
	#find names with dashes in them
	names_with_dashes = re.findall('[A-Z][a-z]+-[A-Z][a-z]+', col)
	for name_with_dash in names_with_dashes:
	col = col.replace(name_with_dash, 'redacted person/organization')
	capitalized_words = sorted(list(set(re.findall('[A-Z][a-z]+', col))))
	tags = st.tag(capitalized_words)
	for tag in tags:
	print tag
	if tag[1]=='PERSON' or tag[1]=='ORGANIZATION':
	col = col.replace(tag[0], 'redacted person/organization')
	print capitalized_words
	text = col

	#for sent in nltk.sent_tokenize(text):
	# print 'working on', sent
	# tokens = nltk.tokenize.word_tokenize(sent)
	# tags = st.tag(tokens)
	# for tag in tags:
	# if tag[1]=='PERSON': print tag
	#
	# col = col.replace(tag[0], 'R')
	html += "<td>%s</td>" % (col)
	else:
	html += "<td>%s</td>" % (col)
	html += "</tr>"
	html += """
	</table>
	"""
	f = open('/var/www/html/bigtable.html', 'w')
	f.write(html)
	f.close()