Skip to content

Instantly share code, notes, and snippets.

@bhelx
Last active February 13, 2021 21:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bhelx/36be62f1ed50609ff6fdd802c86b684b to your computer and use it in GitHub Desktop.
Save bhelx/36be62f1ed50609ff6fdd802c86b684b to your computer and use it in GitHub Desktop.
Refactoring the [neighborhood annotation script](https://github.com/codefornola/nola-neighborhood-annotation) to utilize [the nolabase](https://github.com/codefornola/nolabase/) instead of using all the data and doing the computation locally. This demonstrates using the nolabase as a dependency to a community run application or tool. Run it by p…
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
# Refactoring the [neighborhood annotation script](https://github.com/codefornola/nola-neighborhood-annotation) to
# utilize [the nolabase](https://github.com/codefornola/nolabase/) instead of using all the data
# and doing the computation locally.
# This demonstrates using the nolabase as a dependency to a community run application or tool.
# Run it by passing a connection string and an input and output csv file:
# ```
# # get 2021 calls for service data as a test
# curl https://data.nola.gov/resource/3pha-hum9.csv > calls.csv
# python3 annotate.py postgresql://nolabaseuser:nolabasepassword@nolabase.codeforneworleans.org/nolabase calls.csv output.csv
# ```
import sys
import csv
import re
import psycopg2
from urllib.parse import urlparse
lat_lng_rg = re.compile('.*?([+-]?\\d*\\.\\d+)(?![-+0-9\\.]).*?([+-]?\\d*\\.\\d+)(?![-+0-9\\.])')
def parse_lat_lng(lat_lng_string):
"""
Turns the Location column into (lat, lng) floats
May look like this "(29.98645605, -90.06910049)"
May have degree symbol "(29.98645605°,-90.06910049°)"
"""
m = lat_lng_rg.search(lat_lng_string)
if m:
return (float(m.group(1)), float(m.group(2)))
else:
return (None, None)
# If I were to use this in production, I'd change this to be a batch
# call to the database instead of doing one row at a time
def find_neighborhood(conn, lat, lng):
cur = conn.cursor()
sql = """
SELECT name FROM geometries.neighborhoods AS n WHERE
st_within(ST_GeomFromText('POINT(%f %f)', 4326), n.geom) LIMIT 1
""" % (lat, lng)
cur.execute(sql)
rows = cur.fetchone()
if rows is None or len(rows) <= 0:
return "N/A"
return rows[0]
def annotate_csv(conn, in_file, out_file):
"""
Goes row by row through the in_file and
writes out the row to the out_file with
the new Neighbhorhood column
"""
reader = csv.reader(in_file)
writer = csv.writer(out_file)
# Write headers first, add new neighborhood column
headers = next(reader)
headers.append('Neighborhood')
writer.writerow(headers)
for row in reader:
# WGS84 point, "Location" column, is last element
lat, lng = parse_lat_lng(row[-1])
if lat and lng:
neighborhood = find_neighborhood(conn, lat, lng)
else:
neighborhood = 'N/A'
row.append(neighborhood)
writer.writerow(row)
print("#%s lat: %s lng: %s -> %s" % (reader.line_num, lat, lng,
neighborhood))
def print_help():
help = """
Usage:
python annotate.py postgresql://username:password@nolabase.codefornola.org/nolabase input.csv output.csv
"""
print(help)
if __name__ == '__main__':
if len(sys.argv) < 3:
print_help()
sys.exit()
parts = urlparse(sys.argv[1])
conn = psycopg2.connect(
user=parts.username,
password=parts.password,
dbname=parts.path[1:],
host=parts.hostname,
port=parts.port
)
in_file_path = sys.argv[2]
out_file_path = sys.argv[3]
with open(in_file_path, 'r') as in_file:
with open(out_file_path, 'w') as out_file:
annotate_csv(conn, in_file, out_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment