Skip to content

Instantly share code, notes, and snippets.

@timrobertson100
Last active October 10, 2022 14:45
Show Gist options
  • Save timrobertson100/3d996c0a8fc1bdbeb94feab639258abf to your computer and use it in GitHub Desktop.
Save timrobertson100/3d996c0a8fc1bdbeb94feab639258abf to your computer and use it in GitHub Desktop.
Export cluster example
-- wget https://repository.gbif.org/repository/gbif/org/gbif/occurrence/occurrence-hive/0.187/occurrence-hive-0.187-jar-with-dependencies.jar
-- hdfs dfs -put occurrence-hive-0.187-jar-with-dependencies.jar /tmp
-- wget https://repository.gbif.org/repository/central/com/klout/brickhouse/0.6.0/brickhouse-0.6.0.jar
-- hdfs dfs -put brickhouse-0.6.0.jar /tmp
ADD JAR hdfs:///tmp/occurrence-hive-0.187-jar-with-dependencies.jar;
ADD JAR hdfs:///tmp/brickhouse-0.6.0.jar;
CREATE TEMPORARY FUNCTION toLocalISO8601 AS 'org.gbif.occurrence.hive.udf.ToLocalISO8601UDF';
CREATE TEMPORARY FUNCTION joinArray AS 'brickhouse.udf.collect.JoinArrayUDF';
CREATE table tim.cluster_export
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
AS SELECT
reasons,
id1,
dataset1,
o1.occurrenceid AS occurrenceID1,
o1.scientificname AS scientificName1,
o1.countrycode AS countryCode1,
o1.locality AS locality1,
o1.stateprovince AS stateprovince1,
o1.occurrencestatus AS occurrencestatus1,
o1.publishingorgkey AS publishingorgkey1,
o1.decimallatitude AS decimalLatitude1,
o1.decimallongitude AS decimalLongitude1,
o1.coordinateuncertaintyinmeters AS coordinateUncertaintyInMeters1,
o1.coordinateprecision AS coordinatePrecision1,
toLocalISO8601(o1.eventdate) AS eventdate1,
o1.day AS day1,
o1.month AS month1,
o1.year AS year1,
o1.basisofrecord AS basisOfRecord1,
o1.institutioncode AS institutionCode1,
o1.collectioncode AS collectionCode1,
o1.catalognumber AS catalogNumber1,
o1.recordnumber AS recordNumber1,
if(o1.identifiedby IS NULL,'',joinArray(o1.identifiedby,'\\;')) AS identifiedBy1,
toLocalISO8601(o1.dateidentified) AS dateIdentified1,
if(o1.recordedby IS NULL,'',joinArray(o1.recordedby,'\\;')) AS recordedBy1,
if(o1.typestatus IS NULL,'',joinArray(o1.typestatus,'\\;')) AS typeStatus1,
o2.occurrenceid AS occurrenceID2,
o2.scientificname AS scientificName2,
o2.countrycode AS countryCode2,
o2.locality AS locality2,
o2.stateprovince AS stateprovince2,
o2.occurrencestatus AS occurrencestatus2,
o2.publishingorgkey AS publishingorgkey2,
o2.decimallatitude AS decimalLatitude2,
o2.decimallongitude AS decimalLongitude2,
o2.coordinateuncertaintyinmeters AS coordinateUncertaintyInMeters2,
o2.coordinateprecision AS coordinatePrecision2,
toLocalISO8601(o2.eventdate) AS eventdate2,
o2.day AS day2,
o2.month AS month2,
o2.year AS year2,
o2.basisofrecord AS basisOfRecord2,
o2.institutioncode AS institutionCode2,
o2.collectioncode AS collectionCode2,
o2.catalognumber AS catalogNumber2,
o2.recordnumber AS recordNumber2,
if(o2.identifiedby IS NULL,'',joinArray(o2.identifiedby,'\\;')) AS identifiedBy2,
toLocalISO8601(o2.dateidentified) AS dateIdentified2,
if(o2.recordedby IS NULL,'',joinArray(o2.recordedby,'\\;')) AS recordedBy2,
if(o2.typestatus IS NULL,'',joinArray(o2.typestatus,'\\;')) AS typeStatus2
from prod_h.occurrence_relationships r
JOIN prod_h.occurrence o1 on r.id1 = o1.gbifID
JOIN prod_h.occurrence o2 on r.id2 = o2.gbifID
WHERE
r.id1 < r.id2 AND
o1.genusKey=7989631 AND
o1.countryCode='BR';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment