Skip to content

Instantly share code, notes, and snippets.

@rossbernet
Last active April 19, 2017 14:19
Show Gist options
  • Save rossbernet/50627f725b68e882cd27a1df57ec9160 to your computer and use it in GitHub Desktop.
Save rossbernet/50627f725b68e882cd27a1df57ec9160 to your computer and use it in GitHub Desktop.
Recreating the libya weighted raster overlay app in python
from pyspark import RDD
from geopyspark.context import GeoPyContext
gsc = GeoPyContext(
master = "local[*]"
appName = "libya-demo"
)
## Load GeoJSON using Shapley
# TODO: use real shapley syntax
road_lines = open(roads.geojson)
libya_polygon = open(libya.geojson)
conflict_points = open(conflict.geojson)
population_points = open(population.geojson)
# TODO: should we use a Shapely extent here?
# Because the output of this worksheet will be a webmap we're going to work on the base level
project_tile_layout = TmsLayoutScheme(zoom = 12, crs = "EPSG:3857")
from geopyspark.geotrellis import rasterize, reclassify, costdistance
road_raster_rdd = rasterize(
geometry = road_lines,
value = 1,
layout = project_tile_layout)
# create friction layer from roads. 2 steps. rasterize then reclassify
road_friction_rdd = reclassify(
raster_rdd = road_raster_rdd,
classes = {
1: 10,
(2,5): 2,
None: 10
})
#Cost distance
conflict_cost_distance_rdd = costdistance(
source_geometry = conflict_points,
friction_layer = road_friction_rdd,
max_distance = 50000
)
# We need to make a layer that shows how likely we are to find people based on population and their ability to travel
population_cd_rdd = costdistance(
source_geomery = population_points,
firctoin_layer = road_friction_rdd,
max_distance = 50000
)
### Break for doodling ---
# Bronze:
# TODO: helper function hh that turns dictionary into a generator
magic_pop_rdd = pipulation_cd_rdd.map(
lambda x: { 'no_data': x['no_data'],
'data' : x['data'] / 50000 * -1 + 1 })
magic_pop_rdd = pipulation_cd_rdd.map(
lambda tiles: [x / 50000 * -1 + 1 for x in hh(tiles)])
# Silver:
magic_pop_rdd = 1 - (population_cd_rdd / 50000)
# this only on rdd of our format ...
# TODO: benchmark the serialzation
magic_pop_rdd.focal(Square(3), SUM)
magic_pop_rdd.focal(Circle(3), SUM)
magic_pop_rdd.focal(Square(3), lambda nn: sum(nn))
# Gold:
# we pay serialization cost only once
# TODO: Lets get jiggy with ASTs
class Add(Operation):
...
class Operation:
def toRDD(self):
return the goods
something_like_rdd = Add(Multiply(Divide(population_cd_rdd, 50000), -1), 1)
something_like_rdd.rdd
# Interface: (we pay for serialization once)
magic_pop = 1 - (population_cd_rdd / 50000)
type(magic_op) #type: MapAlgebra
magic_pop_rdd = magic_pop.rdd
# ---
def scaled_cost_distance(features, friction):
''' Implment something that does this per feature
>>> rdd = 1 - (population_cd_rdd / 50000) * feature.weight
'''
return result
populartion_scale_rdd = scaled_cost_distance(
population_points,
road_friction_rdd
).normalize((0, max_city_population), (0, 100))
conflict_cost_distance_rdd = costdistance(
source_geometry = conflict_points,
friction_layer = road_friction_rdd,
max_distance = 50000
).normalize(from = (0, 5000), to = (0,100))
# TODO: lets test normalize that runs a min/max on source layer
## Perform the weighted average
suitability_rdd = (population_scale_rdd * 0.4) + (conflict_cost_distance_rdd * 0.6)
# This will trigger the execution of everything above
arr = suitablity_rdd.clip(libya_polygon).stitch(5000, 5000)
matplotlib.display(arr, cmap=hot)
## Imagine: map of libya weighted overlay
# ross' crack at GPS libya ideal pseudo code
# Using a simplified version that only includes 2 factors (conflict, population centers)
# But will require a roads network friction layer to run cost distance on each of those.
# so there will be 4 total data sets required: roads (polyline), libya boundary (polygon), conflict (points), population (points)
$pip install geopyspark
import geopyspark as gps
import numpy as np
## open all the vector datasets
roads = open(roads.geojson)
boundary = open(libya.geojson)
conflict = open(conflict.geojson)
population = open(population.geojson)
# Set extent. My ideal would be to be able to draw this on a map. Otherwise something like:
extent = clipTo(boundary)
# create friction layer from roads. 2 steps. rasterize then reclassify
roadRaster = gps.rasterize(roads)
roadFriction = gps.reclassify(road_raster, "1 1; NODATA 10")
#Cost distance
conflictCostDistance = gps.costDistance(conflict, roadFriction)
populationCostDistance = gps.costDistance(population, roadFriction)
meanCostDistance = (conflictCostDistance + populationCostDistance)/2
finalReclassify = gps.reclassify(meanCostDistance, "some numbers thats make sense.... ; NODATA 10")
open('roads.geojson')
# ARCPY version
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------
# CD_script.py
# Created on: 2017-03-14 12:56:35.00000
# (generated by ArcGIS/ModelBuilder)
# Description:
# ---------------------------------------------------------------------------
# Import arcpy module
import arcpy
# Local variables:
population_shp = "\\\\FILESHARE\\users\\rbernet\\Cost_Distance_Sample_Demo\\population.shp"
roads_shp = "\\\\FILESHARE\\users\\rbernet\\Cost_Distance_Sample_Demo\\roads.shp"
roads_PolylineToRaster2 = "C:\\Users\\rbernet\\Documents\\ArcGIS\\Default.gdb\\roads_PolylineToRaster2"
Reclass_road11 = "C:\\Users\\rbernet\\Documents\\ArcGIS\\Default.gdb\\Reclass_road11"
Extent = "14.3092600001489 27.3469200003578 17.5478600003388 30.0000000003997"
Output_backlink_raster = ""
conflict_shp = "\\\\FILESHARE\\users\\rbernet\\Cost_Distance_Sample_Demo\\conflict.shp"
Output_backlink_raster__2_ = ""
CostDis_shp25 = "C:\\Users\\rbernet\\Documents\\ArcGIS\\Default.gdb\\CostDis_shp25"
CostDis_shp26 = "C:\\Users\\rbernet\\Documents\\ArcGIS\\Default.gdb\\CostDis_shp26"
CellSta_Cost1 = "C:\\Users\\rbernet\\Documents\\ArcGIS\\Default.gdb\\CellSta_Cost1"
# Process: Polyline to Raster
arcpy.PolylineToRaster_conversion(roads_shp, "FID", roads_PolylineToRaster2, "MAXIMUM_LENGTH", "NONE", "0.011")
# Process: Reclassify
arcpy.gp.Reclassify_sa(roads_PolylineToRaster2, "Value", "0 169 1;NODATA 10", Reclass_road11, "DATA")
# Process: Cost Distance
tempEnvironment0 = arcpy.env.extent
arcpy.env.extent = Extent
arcpy.gp.CostDistance_sa(population_shp, Reclass_road11, CostDis_shp26, "", Output_backlink_raster, "", "", "", "")
arcpy.env.extent = tempEnvironment0
# Process: Cost Distance (2)
tempEnvironment0 = arcpy.env.extent
arcpy.env.extent = Extent
arcpy.gp.CostDistance_sa(conflict_shp, Reclass_road11, CostDis_shp25, "", Output_backlink_raster__2_, "", "", "", "")
arcpy.env.extent = tempEnvironment0
# Process: Cell Statistics
arcpy.gp.CellStatistics_sa("C:\\Users\\rbernet\\Documents\\ArcGIS\\Default.gdb\\CostDis_shp25;C:\\Users\\rbernet\\Documents\\ArcGIS\\Default.gdb\\CostDis_shp26", CellSta_Cost1, "MEAN", "DATA")
@rossbernet
Copy link
Author

rossbernet commented Mar 14, 2017

image

image

image

image

image

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment