Created
June 1, 2017 23:41
-
-
Save r-shekhar/7625fd524e9639933371da5560b6dccc to your computer and use it in GitHub Desktop.
Assign Taxi Zones Snippet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def assign_taxi_zones(df, lon_var, lat_var, locid_var): | |
"""Joins DataFrame with Taxi Zones shapefile. | |
This function takes longitude values provided by `lon_var`, and latitude | |
values provided by `lat_var` in DataFrame `df`, and performs a spatial join | |
with the NYC taxi_zones shapefile. | |
The shapefile is hard coded in, as this function makes a hard assumption of | |
latitude and longitude coordinates. It also assumes latitude=0 and | |
longitude=0 is not a datapoint that can exist in your dataset. Which is | |
reasonable for a dataset of New York, but bad for a global dataset. | |
Only rows where `df.lon_var`, `df.lat_var` are reasonably near New York, | |
and `df.locid_var` is set to np.nan are updated. | |
Parameters | |
---------- | |
df : pandas.DataFrame or dask.DataFrame | |
DataFrame containing latitudes, longitudes, and location_id columns. | |
lon_var : string | |
Name of column in `df` containing longitude values. Invalid values | |
should be np.nan. | |
lat_var : string | |
Name of column in `df` containing latitude values. Invalid values | |
should be np.nan | |
locid_var : string | |
Name of series to return. | |
""" | |
import geopandas | |
from shapely.geometry import Point | |
# make a copy since we will modify lats and lons | |
localdf = df[[lon_var, lat_var]].copy() | |
# missing lat lon info is indicated by nan. Fill with zero | |
# which is outside New York shapefile. | |
localdf[lon_var] = localdf[lon_var].fillna(value=0.) | |
localdf[lat_var] = localdf[lat_var].fillna(value=0.) | |
shape_df = geopandas.read_file('../shapefiles/taxi_zones.shp') | |
shape_df.drop(['OBJECTID', "Shape_Area", "Shape_Leng", "borough", "zone"], | |
axis=1, inplace=True) | |
shape_df = shape_df.to_crs({'init': 'epsg:4326'}) | |
try: | |
local_gdf = geopandas.GeoDataFrame( | |
localdf, crs={'init': 'epsg:4326'}, | |
geometry=[Point(xy) for xy in | |
zip(localdf[lon_var], localdf[lat_var])]) | |
local_gdf = geopandas.sjoin( | |
local_gdf, shape_df, how='left', op='within') | |
return local_gdf.LocationID.rename(locid_var) | |
except ValueError as ve: | |
print(ve) | |
print(ve.stacktrace()) | |
series = localdf[lon_var] | |
series = np.nan | |
return series |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment