Skip to content

Instantly share code, notes, and snippets.

@bojanbabic
Last active September 22, 2023 16:03
Show Gist options
  • Save bojanbabic/11442faa5e82f1e7b791f22026da7425 to your computer and use it in GitHub Desktop.
Save bojanbabic/11442faa5e82f1e7b791f22026da7425 to your computer and use it in GitHub Desktop.
Address to Hood Id
import pandas as pd
from nextdoor.i18n import locale_api
from nextdoor.gql.geo.geo_tag_utils import get_neighborhood_from_lat_lon
from nextdoor.common.mapbox import mapbox_client
locale_api.set_locales('en-us')
csv_file = "/app/CA_ncesdata_642B9065_cleaned.csv"
output_file = "/app/CA_ncesdata_642B9065_hood_id.csv"
df = pd.read_csv(csv_file)
def get_lat_lon(row):
if row.name % 100 == 0:
print(f"Processing {row.name}")
address = row["Street Address"]
city = row["City"]
state = row["State"]
zip = row["ZIP"]
raw_address = f"{address} {city} {zip} {state}"
response = mapbox_client.fetch_search_data_from_mapbox(
raw_address, enable_autocomplete='false', proximity=None
)
centroid = (
response["features"][0]["center"]
if "features" in response
and len(response["features"]) > 0
and "center" in response["features"][0]
else None
)
if not centroid:
return None, None
lon, lat = centroid
return lat, lon
def get_hood_id(row):
lat = row["lat"]
lon = row["lon"]
if not lat or not lon:
return None
hood = get_neighborhood_from_lat_lon(lat, lon)
if not hood:
return None
return hood.id
df[['lat', 'lon']] = df.apply(lambda x: get_lat_lon(x), axis=1).apply(pd.Series)
df['hood_id'] = df.apply(lambda x: get_hood_id(x), axis=1).apply(pd.Series)
df.to_csv(output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment