Skip to content

Instantly share code, notes, and snippets.

@mklaber
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mklaber/ff18df06486b11ad3f3e to your computer and use it in GitHub Desktop.
Save mklaber/ff18df06486b11ad3f3e to your computer and use it in GitHub Desktop.
AWS EMR version of Esri's point-in-polygon-aggregation-hive sample
-- This is a AWS-ified version of https://github.com/Esri/gis-tools-for-hadoop/blob/master/samples/point-in-polygon-aggregation-hive/run-sample.sql
-- expects https://github.com/Esri/gis-tools-for-hadoop/tree/master/samples/lib and https://github.com/Esri/gis-tools-for-hadoop/tree/master/samples/data to be in the root of ${INPUT}
-- s3://elasticmapreduce/libs/hive/hive-script --run-hive-script --hive-versions 0.13.1 --args -f s3://my-s3-bucket/run-sample.sql -d INPUT=s3://my-s3-bucket/path/to/samples -d OUTPUT=s3://my-s3-bucket/output/
add jar
${INPUT}/lib/esri-geometry-api.jar
${INPUT}/lib/spatial-sdk-hadoop.jar;
create temporary function ST_Point as 'com.esri.hadoop.hive.ST_Point';
create temporary function ST_Contains as 'com.esri.hadoop.hive.ST_Contains';
CREATE EXTERNAL TABLE IF NOT EXISTS earthquakes (earthquake_date STRING, latitude DOUBLE, longitude DOUBLE, magnitude DOUBLE)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '${INPUT}/data/earthquake-data';
CREATE EXTERNAL TABLE IF NOT EXISTS counties (Area string, Perimeter string, State string, County string, Name string, BoundaryShape binary)
ROW FORMAT SERDE 'com.esri.hadoop.hive.serde.JsonSerde'
STORED AS INPUTFORMAT 'com.esri.json.hadoop.EnclosedJsonInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '${INPUT}/data/counties-data';
INSERT OVERWRITE DIRECTORY '${OUTPUT}/earthquakes/'
SELECT counties.name, count(*) cnt FROM counties
JOIN earthquakes
WHERE ST_Contains(counties.boundaryshape, ST_Point(earthquakes.longitude, earthquakes.latitude))
GROUP BY counties.name
ORDER BY cnt desc;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment