Last active
August 29, 2015 14:16
-
-
Save mklaber/ff18df06486b11ad3f3e to your computer and use it in GitHub Desktop.
AWS EMR version of Esri's point-in-polygon-aggregation-hive sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This is a AWS-ified version of https://github.com/Esri/gis-tools-for-hadoop/blob/master/samples/point-in-polygon-aggregation-hive/run-sample.sql | |
-- expects https://github.com/Esri/gis-tools-for-hadoop/tree/master/samples/lib and https://github.com/Esri/gis-tools-for-hadoop/tree/master/samples/data to be in the root of ${INPUT} | |
-- s3://elasticmapreduce/libs/hive/hive-script --run-hive-script --hive-versions 0.13.1 --args -f s3://my-s3-bucket/run-sample.sql -d INPUT=s3://my-s3-bucket/path/to/samples -d OUTPUT=s3://my-s3-bucket/output/ | |
add jar | |
${INPUT}/lib/esri-geometry-api.jar | |
${INPUT}/lib/spatial-sdk-hadoop.jar; | |
create temporary function ST_Point as 'com.esri.hadoop.hive.ST_Point'; | |
create temporary function ST_Contains as 'com.esri.hadoop.hive.ST_Contains'; | |
CREATE EXTERNAL TABLE IF NOT EXISTS earthquakes (earthquake_date STRING, latitude DOUBLE, longitude DOUBLE, magnitude DOUBLE) | |
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' | |
LOCATION '${INPUT}/data/earthquake-data'; | |
CREATE EXTERNAL TABLE IF NOT EXISTS counties (Area string, Perimeter string, State string, County string, Name string, BoundaryShape binary) | |
ROW FORMAT SERDE 'com.esri.hadoop.hive.serde.JsonSerde' | |
STORED AS INPUTFORMAT 'com.esri.json.hadoop.EnclosedJsonInputFormat' | |
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' | |
LOCATION '${INPUT}/data/counties-data'; | |
INSERT OVERWRITE DIRECTORY '${OUTPUT}/earthquakes/' | |
SELECT counties.name, count(*) cnt FROM counties | |
JOIN earthquakes | |
WHERE ST_Contains(counties.boundaryshape, ST_Point(earthquakes.longitude, earthquakes.latitude)) | |
GROUP BY counties.name | |
ORDER BY cnt desc; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment