Skip to content

Instantly share code, notes, and snippets.

@melanieimfeld
Last active September 17, 2020 16:45
Show Gist options
  • Save melanieimfeld/b69869f838158f8dffa98a333c1e06a7 to your computer and use it in GitHub Desktop.
Save melanieimfeld/b69869f838158f8dffa98a333c1e06a7 to your computer and use it in GitHub Desktop.
Floodplain analysis NYC Geopandas
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon, shape
from sodapy import Socrata
import os
#Run this file to get the data from open data NYC
#Below are the details, such as identifiers, for the datasets we want to download. Change limit to get a smaller subset
querylist = [
{'identifier':'myk6-g6eq','limit': 10000, 'queries' : [["The_geom"], [""]], 'tolerance': 0.0001, 'geom' : 'The_geom'}, #floodplains
{'identifier':'jp9i-3b7y','limit': 100, 'queries' : [["The_geom,boro_cd"],[" "]], 'tolerance': 0, 'geom' : 'The_geom'}, #community districts
{'identifier':'xi7c-iiu2','limit': 100, 'queries' : [["*"],[" "]], 'tolerance': None, 'geom' : None}, #population
{'identifier':'qjdb-bxqz','limit': 1000000, 'queries' : [["CNSTRCT_YR,the_geom"],["FEAT_CODE==2100"]], 'tolerance': 0.0001, 'geom' : 'the_geom'}, #buildings
]
outdir = "data"
# Create data folder if it does no exist
if not os.path.exists(outdir):
os.makedirs(outdir)
def getBasedata(identifier, limit, queries, tolerance, geom):
domain = 'data.cityofnewyork.us'
client = Socrata(domain, None)
results = client.get(identifier,limit=limit, select=queries[0], where=queries[1])
results = pd.DataFrame.from_dict(results)
if geom != None: #spatial files
gdf = createGdf(results,tolerance, geom)
gdf.to_file("{}/{}.geojson".format(outdir, identifier),driver='GeoJSON')
else: #non spatial
results.to_csv("{}/{}.csv".format(outdir, identifier))
print("download finished")
def createGdf(json,tolerance,name):
geometry = [shape(x).simplify(tolerance, preserve_topology=True) for x in json[name]]
json = json.drop(name, axis=1)
gdf = gpd.GeoDataFrame(json, crs='EPSG:4326', geometry=geometry)
gdf = gdf.to_crs(epsg=2263) #reproject to NAD83
return gdf
for i in querylist:
getBasedata(i["identifier"],i["limit"], i["queries"], i["tolerance"], i["geom"])
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon, shape
#Functions for the basic analysis. Only works if data was downloaded with getdata.py
floodplains = gpd.read_file("data/myk6-g6eq.geojson")
cd = gpd.read_file("data/jp9i-3b7y.geojson")
buildings = gpd.read_file("data/qjdb-bxqz.geojson")
pop = pd.read_csv("data/xi7c-iiu2.csv", index_col = 0)
def checkFloodplain(cd,fp):
cd["boro_cd"] = cd["boro_cd"].astype(int)
intersection = gpd.overlay(cd, fp, how='intersection')
cd_subset = intersection.dissolve(by = 'boro_cd')
merged = pd.merge(cd["boro_cd"], cd_subset, how = "left", on = "boro_cd")
merged["contains_fldplain"] = merged.geometry.apply(lambda x: "No" if x==None else "Yes")
merged[["boro_cd", "contains_fldplain"]].to_csv('data/output.csv')
print("Files saved")
checkFloodplain(cd,floodplains)
#Functions for the extended analysis. It takes a bit of time, uncomment to run
def checkFloodplain2(cd,fp):
cd["boro_cd"] = cd["boro_cd"].astype(int)
intersection = gpd.overlay(cd, fp, how='intersection')
cd_subset = intersection.dissolve(by = 'boro_cd', as_index = False)
#get percentage data
cd_subset["fld_area_sqkm"] = cd_subset.geometry.area/10764000
cd["tot_area_sqkm"] = cd.geometry.area/10764000
merged = pd.merge(cd.loc[:,cd.columns != "geometry"], cd_subset, how = "left", on = "boro_cd")
merged["contains_fldplain"] = merged.geometry.apply(lambda x: "No" if x==None else "Yes")
merged["perc_fldplain"] = (merged["fld_area_sqkm"] / merged["tot_area_sqkm"] * 100).fillna(0).round(1)
print("percentage data added, wait...")
#get building data
merged = merged.merge(selectBuildings(cd_subset,buildings), on = "boro_cd", how = "left")
merged["building_density"] = merged.index_right / merged.fld_area_sqkm
print("building data added, wait...")
#get population data
merged = merged.merge(preparePopdata(pop), how="left", on="boro_cd")
print("pop growth rate data added")
export(merged)
def preparePopdata(pop):
pop["boro_cd"] = pop.apply(lambda x: boroCD(x. borough, x.cd_number), axis = 1)
pop["pop_growth_rate"] = (pop._2010_population - pop._1970_population) / pop._1970_population * 100
pop = pop[["boro_cd", "pop_growth_rate"]]
return pop
def boroCD(b, no):
if b == "Bronx":
return 200 + no
elif b == "Brooklyn":
return 300 + no
elif b == "Queens":
return 400 + no
elif b == "Manhattan":
return 100 + no
elif b == "Staten Island":
return 500 + no
else:
return 0
def selectBuildings(subset, buildings):
buildings.geometry = buildings.geometry.centroid
buildings.CNSTRCT_YR = pd.to_numeric(buildings.CNSTRCT_YR).fillna(0).astype(int)
b_intersection = gpd.sjoin(buildings, subset, op="within").dissolve(by = "boro_cd", aggfunc = {"CNSTRCT_YR":"median", "index_right": "count"})
return b_intersection[["CNSTRCT_YR", "index_right"]]
def export(merged):
merged.to_file("data/output.gpkg", layer='floodplains', driver="GPKG")
print("Files saved")
#checkFloodplain2(cd,floodplains)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment