Created
October 4, 2022 00:38
-
-
Save mhubrich/da9b5cf085af7cc207ce101f54fc4b69 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Data sources: | |
* NYC neighborhoods: | |
https://data.cityofnewyork.us/City-Government/Neighborhood-Tabulation-Areas-NTA-/cpf4-rkhq | |
* NYC street trees: | |
https://data.cityofnewyork.us/Environment/2015-Street-Tree-Census-Tree-Data/pi5s-9p35 | |
""" | |
import geopandas as gpd | |
import shapely.ops | |
from rtree import index | |
df_neighborhoods = gpd.read_file('Neighborhood Tabulation Areas (NTA).geojson') | |
df_trees = gpd.read_file('2015 Street Tree Census - Tree Data.geojson') | |
# 1. First, we create a polygon representing the Upper East Side boundary | |
# Extract relevant neighborhoods | |
lenox_hill = df_neighborhoods.loc[df_neighborhoods.ntacode == 'MN31'].geometry | |
yorkville = df_neighborhoods.loc[df_neighborhoods.ntacode == 'MN32'].geometry | |
carnegie_hill = df_neighborhoods.loc[df_neighborhoods.ntacode == 'MN40'].geometry | |
# Both Lenox Hill and Yorkville have islands we want to exclude | |
lenox_hill = max(lenox_hill.to_numpy()[0], key=lambda x: x.area) | |
yorkville = max(yorkville.to_numpy()[0], key=lambda x: x.area) | |
carnegie_hill = max(carnegie_hill.to_numpy()[0], key=lambda x: x.area) | |
# Merge above neighborhoods to form Upper East Side | |
geom_upper_east_side = shapely.ops.unary_union([lenox_hill, yorkville, carnegie_hill]) | |
# 2. Next, we create an R-tree index holding all NYC trees | |
idx = index.Index() | |
for id, geom in zip(df_trees.index, df_trees.geometry): | |
idx.insert(id, geom.bounds) | |
# 3. Now, we generate a list of 'potential candidates', i.e. all trees that are within the bounding box of the Upper East Side | |
search_window = geom_upper_east_side.bounds | |
potential = list(idx.intersection(search_window)) | |
# 4. Finally, we iterate through all potential candidates to extract the ones that are fully within the Upper East Side | |
trees = [] | |
for p in df_trees.loc[potential, 'geometry']: | |
if geom_upper_east_side.contains(p): | |
trees.append(p) | |
# 5. Print results | |
print('Total number of trees in dataset: %d' % len(df_trees)) | |
print('Total number of trees tested: %d' % len(potential)) | |
print('Number of trees in Upper East Side: %d' % len(trees)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment