Skip to content

Instantly share code, notes, and snippets.

@ivopbernardo
Last active March 11, 2022 14:00
Show Gist options
  • Save ivopbernardo/bf1e385762eafa2e5ea9564fd4a95f9b to your computer and use it in GitHub Desktop.
Save ivopbernardo/bf1e385762eafa2e5ea9564fd4a95f9b to your computer and use it in GitHub Desktop.
Locate your Data and Boost it with Geo-Processing Post
# Getting Latitude and Longitude from Nominatim
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
geocoder = Nominatim(user_agent="FindAddress")
geocode = RateLimiter(
geocoder.geocode,
min_delay_seconds = 1,
return_value_on_exception = None
)
# Returns the location from the Nominatim server
geocode("Areeiro, Lisboa, Portugal")
# Using GeoPandas
import geopandas as gpd
house_data_gdf = gpd.GeoDataFrame(
house_data,
geometry=gpd.points_from_xy(
house_data.longitude,
house_data.latitude
),
crs="epsg:4326",
)
# Changing the CRS to a Projected Coordinate System ideal for Portugal.
house_data_gdf.to_crs(epsg=3857, inplace=True)
# Obtaining Shape File for Portugal
parishes_url = "zip+https://dados.gov.pt/s/resources/freguesias-de-portugal/20181112-195834/cont-aad-caop2017.zip"
parishes = gpd.read_file(parishes_url)
parishes.head()
# Joining House Data with Parish
gpd.sjoin(house_data_gdf, parishes, how="left", op="within")
# Counting the Hospitals near Houses
house_data_gdf_buffer = (
house_data_gdf
.copy()
.assign(geometry_buffer = lambda d: d.buffer(1000))
.set_geometry("geometry_buffer")
)
# To get the a statistic of a geometry within another geometry,
# we can use a spatial join and then aggregate the values
house_data_gdf["hospitals_in_1km"] = (
gpd.sjoin(
house_data_gdf_buffer,
hospitals_gdf,
how="left",
op="contains"
)
.groupby("house_id", as_index=False)
.agg({"OBJECTID": "count"})
.rename(columns={"OBJECTID":"Hospitals"})
.loc[:, "Hospitals"]
)
# Calculating Distance from Metro to each House
# Read the data
metro = pd.read_csv(Path("data", "metro_stations.csv"))
# Convert to a GeoDataFrame
metro = gpd.GeoDataFrame(
metro,
geometry=gpd.points_from_xy(metro.longitude, metro.latitude),
crs="epsg:4326"
).to_crs(epsg=3857)
# Get the closest metro station to each house
closest_metros = gpd.sjoin_nearest(
house_data_gdf,
metro,
how="left"
).loc[:, ["latitude_right", "longitude_right"]]
# Convert the closest metro location to a geometry
house_data_gdf["closest_metro"] = gpd.points_from_xy(
closest_metros.longitude_right,
closest_metros.latitude_right,
crs="epsg:4326"
).to_crs(epsg=3857)
# Calculate the distance to the closest metro station
house_data_gdf.assign(
metro_distance=house_data_gdf.distance(
house_data_gdf.closest_metro, align=True))
# Obtaining Elevation
import rasterio
from rasterio.plot import show
url = "zip+file:data/mdt.zip!mdt.tif"
lisbon_elevation = rasterio.open(url)
# Plot the raster data to get a sense of it
show(lisbon_elevation, cmap="terrain")
# Get the elevation from the raster data
house_data_gdf["elevation"] = (
house_data_gdf
.to_crs(lisbon_elevation.crs)
.geometry
.apply(
lambda x: next(lisbon_elevation.sample([(x.x, x.y)]))[0]
)
)
# Performing statistics
y = house_data_gdf['price_per_m2'].values
w = KNN.from_dataframe(house_data_gdf, k=1000)
w.transform = "r"
from splot.esda import lisa_cluster
from esda.moran import Moran_Local
import contextily as ctx
moran_loc = Moran_Local(y, w)
fig, ax = lisa_cluster(moran_loc, gdf, p=0.05, figsize = (10,10), markersize = 500)
ctx.add_basemap(ax, source=ctx.providers.Stamen.TonerLite)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment