Skip to content

Instantly share code, notes, and snippets.

@timrobertson100
Created September 29, 2017 20:34
Show Gist options
  • Save timrobertson100/2fd42c60c7648f5ea0e12f0a49ad99cc to your computer and use it in GitHub Desktop.
Save timrobertson100/2fd42c60c7648f5ea0e12f0a49ad99cc to your computer and use it in GitHub Desktop.
Verbatim DwC Term Frequency (Occurrences, verbatim fields)
FROM prod_d.occurrence_hdfs
SELECT
sum(CASE WHEN v_type IS NULL THEN 0 ELSE 1 END) AS type ,
sum(CASE WHEN v_modified IS NULL THEN 0 ELSE 1 END) AS modified ,
sum(CASE WHEN v_language IS NULL THEN 0 ELSE 1 END) AS language ,
sum(CASE WHEN v_license IS NULL THEN 0 ELSE 1 END) AS license ,
sum(CASE WHEN v_rightsHolder IS NULL THEN 0 ELSE 1 END) AS rightsHolder ,
sum(CASE WHEN v_accessRights IS NULL THEN 0 ELSE 1 END) AS accessRights ,
sum(CASE WHEN v_bibliographicCitation IS NULL THEN 0 ELSE 1 END) AS bibliographicCitation ,
sum(CASE WHEN v_references IS NULL THEN 0 ELSE 1 END) AS references ,
sum(CASE WHEN v_institutionID IS NULL THEN 0 ELSE 1 END) AS institutionID ,
sum(CASE WHEN v_collectionID IS NULL THEN 0 ELSE 1 END) AS collectionID ,
sum(CASE WHEN v_datasetID IS NULL THEN 0 ELSE 1 END) AS datasetID ,
sum(CASE WHEN v_institutionCode IS NULL THEN 0 ELSE 1 END) AS institutionCode ,
sum(CASE WHEN v_collectionCode IS NULL THEN 0 ELSE 1 END) AS collectionCode ,
sum(CASE WHEN v_datasetName IS NULL THEN 0 ELSE 1 END) AS datasetName ,
sum(CASE WHEN v_ownerInstitutionCode IS NULL THEN 0 ELSE 1 END) AS ownerInstitutionCode ,
sum(CASE WHEN v_basisOfRecord IS NULL THEN 0 ELSE 1 END) AS basisOfRecord ,
sum(CASE WHEN v_informationWithheld IS NULL THEN 0 ELSE 1 END) AS informationWithheld ,
sum(CASE WHEN v_dataGeneralizations IS NULL THEN 0 ELSE 1 END) AS dataGeneralizations ,
sum(CASE WHEN v_dynamicProperties IS NULL THEN 0 ELSE 1 END) AS dynamicProperties ,
sum(CASE WHEN v_occurrenceID IS NULL THEN 0 ELSE 1 END) AS occurrenceID ,
sum(CASE WHEN v_catalogNumber IS NULL THEN 0 ELSE 1 END) AS catalogNumber ,
sum(CASE WHEN v_recordNumber IS NULL THEN 0 ELSE 1 END) AS recordNumber ,
sum(CASE WHEN v_recordedBy IS NULL THEN 0 ELSE 1 END) AS recordedBy ,
sum(CASE WHEN v_individualCount IS NULL THEN 0 ELSE 1 END) AS individualCount ,
sum(CASE WHEN v_organismQuantity IS NULL THEN 0 ELSE 1 END) AS organismQuantity ,
sum(CASE WHEN v_organismQuantityType IS NULL THEN 0 ELSE 1 END) AS organismQuantityType ,
sum(CASE WHEN v_sex IS NULL THEN 0 ELSE 1 END) AS sex ,
sum(CASE WHEN v_lifeStage IS NULL THEN 0 ELSE 1 END) AS lifeStage ,
sum(CASE WHEN v_reproductiveCondition IS NULL THEN 0 ELSE 1 END) AS reproductiveCondition ,
sum(CASE WHEN v_behavior IS NULL THEN 0 ELSE 1 END) AS behavior ,
sum(CASE WHEN v_establishmentMeans IS NULL THEN 0 ELSE 1 END) AS establishmentMeans ,
sum(CASE WHEN v_occurrenceStatus IS NULL THEN 0 ELSE 1 END) AS occurrenceStatus ,
sum(CASE WHEN v_preparations IS NULL THEN 0 ELSE 1 END) AS preparations ,
sum(CASE WHEN v_disposition IS NULL THEN 0 ELSE 1 END) AS disposition ,
sum(CASE WHEN v_associatedMedia IS NULL THEN 0 ELSE 1 END) AS associatedMedia ,
sum(CASE WHEN v_associatedReferences IS NULL THEN 0 ELSE 1 END) AS associatedReferences ,
sum(CASE WHEN v_associatedSequences IS NULL THEN 0 ELSE 1 END) AS associatedSequences ,
sum(CASE WHEN v_associatedTaxa IS NULL THEN 0 ELSE 1 END) AS associatedTaxa ,
sum(CASE WHEN v_otherCatalogNumbers IS NULL THEN 0 ELSE 1 END) AS otherCatalogNumbers ,
sum(CASE WHEN v_occurrenceRemarks IS NULL THEN 0 ELSE 1 END) AS occurrenceRemarks ,
sum(CASE WHEN v_organismID IS NULL THEN 0 ELSE 1 END) AS organismID ,
sum(CASE WHEN v_organismName IS NULL THEN 0 ELSE 1 END) AS organismName ,
sum(CASE WHEN v_organismScope IS NULL THEN 0 ELSE 1 END) AS organismScope ,
sum(CASE WHEN v_associatedOccurrences IS NULL THEN 0 ELSE 1 END) AS associatedOccurrences ,
sum(CASE WHEN v_associatedOrganisms IS NULL THEN 0 ELSE 1 END) AS associatedOrganisms ,
sum(CASE WHEN v_previousIdentifications IS NULL THEN 0 ELSE 1 END) AS previousIdentifications ,
sum(CASE WHEN v_organismRemarks IS NULL THEN 0 ELSE 1 END) AS organismRemarks ,
sum(CASE WHEN v_materialSampleID IS NULL THEN 0 ELSE 1 END) AS materialSampleID ,
sum(CASE WHEN v_eventID IS NULL THEN 0 ELSE 1 END) AS eventID ,
sum(CASE WHEN v_parentEventID IS NULL THEN 0 ELSE 1 END) AS parentEventID,
sum(CASE WHEN v_fieldNumber IS NULL THEN 0 ELSE 1 END) AS fieldNumber ,
sum(CASE WHEN v_eventDate IS NULL THEN 0 ELSE 1 END) AS eventDate ,
sum(CASE WHEN v_eventTime IS NULL THEN 0 ELSE 1 END) AS eventTime ,
sum(CASE WHEN v_startDayOfYear IS NULL THEN 0 ELSE 1 END) AS startDayOfYear ,
sum(CASE WHEN v_endDayOfYear IS NULL THEN 0 ELSE 1 END) AS endDayOfYear ,
sum(CASE WHEN v_year IS NULL THEN 0 ELSE 1 END) AS year ,
sum(CASE WHEN v_month IS NULL THEN 0 ELSE 1 END) AS month ,
sum(CASE WHEN v_day IS NULL THEN 0 ELSE 1 END) AS day ,
sum(CASE WHEN v_verbatimEventDate IS NULL THEN 0 ELSE 1 END) AS verbatimEventDate ,
sum(CASE WHEN v_habitat IS NULL THEN 0 ELSE 1 END) AS habitat ,
sum(CASE WHEN v_samplingProtocol IS NULL THEN 0 ELSE 1 END) AS samplingProtocol ,
sum(CASE WHEN v_samplingEffort IS NULL THEN 0 ELSE 1 END) AS samplingEffort ,
sum(CASE WHEN v_sampleSizeValue IS NULL THEN 0 ELSE 1 END) AS sampleSizeValue ,
sum(CASE WHEN v_sampleSizeUnit IS NULL THEN 0 ELSE 1 END) AS sampleSizeUnit ,
sum(CASE WHEN v_fieldNotes IS NULL THEN 0 ELSE 1 END) AS fieldNotes ,
sum(CASE WHEN v_eventRemarks IS NULL THEN 0 ELSE 1 END) AS eventRemarks ,
sum(CASE WHEN v_locationID IS NULL THEN 0 ELSE 1 END) AS locationID ,
sum(CASE WHEN v_higherGeographyID IS NULL THEN 0 ELSE 1 END) AS higherGeographyID ,
sum(CASE WHEN v_higherGeography IS NULL THEN 0 ELSE 1 END) AS higherGeography ,
sum(CASE WHEN v_continent IS NULL THEN 0 ELSE 1 END) AS continent ,
sum(CASE WHEN v_waterBody IS NULL THEN 0 ELSE 1 END) AS waterBody ,
sum(CASE WHEN v_islandGroup IS NULL THEN 0 ELSE 1 END) AS islandGroup ,
sum(CASE WHEN v_island IS NULL THEN 0 ELSE 1 END) AS island ,
sum(CASE WHEN v_country IS NULL THEN 0 ELSE 1 END) AS country ,
sum(CASE WHEN v_countryCode IS NULL THEN 0 ELSE 1 END) AS countryCode ,
sum(CASE WHEN v_stateProvince IS NULL THEN 0 ELSE 1 END) AS stateProvince ,
sum(CASE WHEN v_county IS NULL THEN 0 ELSE 1 END) AS county ,
sum(CASE WHEN v_municipality IS NULL THEN 0 ELSE 1 END) AS municipality ,
sum(CASE WHEN v_locality IS NULL THEN 0 ELSE 1 END) AS locality ,
sum(CASE WHEN v_verbatimLocality IS NULL THEN 0 ELSE 1 END) AS verbatimLocality ,
sum(CASE WHEN v_minimumElevationInMeters IS NULL THEN 0 ELSE 1 END) AS minimumElevationInMeters ,
sum(CASE WHEN v_maximumElevationInMeters IS NULL THEN 0 ELSE 1 END) AS maximumElevationInMeters ,
sum(CASE WHEN v_verbatimElevation IS NULL THEN 0 ELSE 1 END) AS verbatimElevation ,
sum(CASE WHEN v_minimumDepthInMeters IS NULL THEN 0 ELSE 1 END) AS minimumDepthInMeters ,
sum(CASE WHEN v_maximumDepthInMeters IS NULL THEN 0 ELSE 1 END) AS maximumDepthInMeters ,
sum(CASE WHEN v_verbatimDepth IS NULL THEN 0 ELSE 1 END) AS verbatimDepth ,
sum(CASE WHEN v_minimumDistanceAboveSurfaceInMeters IS NULL THEN 0 ELSE 1 END) AS minimumDistanceAboveSurfaceInMeters ,
sum(CASE WHEN v_maximumDistanceAboveSurfaceInMeters IS NULL THEN 0 ELSE 1 END) AS maximumDistanceAboveSurfaceInMeters ,
sum(CASE WHEN v_locationAccordingTo IS NULL THEN 0 ELSE 1 END) AS locationAccordingTo ,
sum(CASE WHEN v_locationRemarks IS NULL THEN 0 ELSE 1 END) AS locationRemarks ,
sum(CASE WHEN v_decimalLatitude IS NULL THEN 0 ELSE 1 END) AS decimalLatitude ,
sum(CASE WHEN v_decimalLongitude IS NULL THEN 0 ELSE 1 END) AS decimalLongitude ,
sum(CASE WHEN v_geodeticDatum IS NULL THEN 0 ELSE 1 END) AS geodeticDatum ,
sum(CASE WHEN v_coordinateUncertaintyInMeters IS NULL THEN 0 ELSE 1 END) AS coordinateUncertaintyInMeters ,
sum(CASE WHEN v_coordinatePrecision IS NULL THEN 0 ELSE 1 END) AS coordinatePrecision ,
sum(CASE WHEN v_pointRadiusSpatialFit IS NULL THEN 0 ELSE 1 END) AS pointRadiusSpatialFit ,
sum(CASE WHEN v_verbatimCoordinates IS NULL THEN 0 ELSE 1 END) AS verbatimCoordinates ,
sum(CASE WHEN v_verbatimLatitude IS NULL THEN 0 ELSE 1 END) AS verbatimLatitude ,
sum(CASE WHEN v_verbatimLongitude IS NULL THEN 0 ELSE 1 END) AS verbatimLongitude ,
sum(CASE WHEN v_verbatimCoordinateSystem IS NULL THEN 0 ELSE 1 END) AS verbatimCoordinateSystem ,
sum(CASE WHEN v_verbatimSRS IS NULL THEN 0 ELSE 1 END) AS verbatimSRS ,
sum(CASE WHEN v_footprintWKT IS NULL THEN 0 ELSE 1 END) AS footprintWKT ,
sum(CASE WHEN v_footprintSRS IS NULL THEN 0 ELSE 1 END) AS footprintSRS ,
sum(CASE WHEN v_footprintSpatialFit IS NULL THEN 0 ELSE 1 END) AS footprintSpatialFit ,
sum(CASE WHEN v_georeferencedBy IS NULL THEN 0 ELSE 1 END) AS georeferencedBy ,
sum(CASE WHEN v_georeferencedDate IS NULL THEN 0 ELSE 1 END) AS georeferencedDate ,
sum(CASE WHEN v_georeferenceProtocol IS NULL THEN 0 ELSE 1 END) AS georeferenceProtocol ,
sum(CASE WHEN v_georeferenceSources IS NULL THEN 0 ELSE 1 END) AS georeferenceSources ,
sum(CASE WHEN v_georeferenceVerificationStatus IS NULL THEN 0 ELSE 1 END) AS georeferenceVerificationStatus ,
sum(CASE WHEN v_georeferenceRemarks IS NULL THEN 0 ELSE 1 END) AS georeferenceRemarks ,
sum(CASE WHEN v_geologicalContextID IS NULL THEN 0 ELSE 1 END) AS geologicalContextID ,
sum(CASE WHEN v_earliestEonOrLowestEonothem IS NULL THEN 0 ELSE 1 END) AS earliestEonOrLowestEonothem ,
sum(CASE WHEN v_latestEonOrHighestEonothem IS NULL THEN 0 ELSE 1 END) AS latestEonOrHighestEonothem ,
sum(CASE WHEN v_earliestEraOrLowestErathem IS NULL THEN 0 ELSE 1 END) AS earliestEraOrLowestErathem ,
sum(CASE WHEN v_latestEraOrHighestErathem IS NULL THEN 0 ELSE 1 END) AS latestEraOrHighestErathem ,
sum(CASE WHEN v_earliestPeriodOrLowestSystem IS NULL THEN 0 ELSE 1 END) AS earliestPeriodOrLowestSystem ,
sum(CASE WHEN v_latestPeriodOrHighestSystem IS NULL THEN 0 ELSE 1 END) AS latestPeriodOrHighestSystem ,
sum(CASE WHEN v_earliestEpochOrLowestSeries IS NULL THEN 0 ELSE 1 END) AS earliestEpochOrLowestSeries ,
sum(CASE WHEN v_latestEpochOrHighestSeries IS NULL THEN 0 ELSE 1 END) AS latestEpochOrHighestSeries ,
sum(CASE WHEN v_earliestAgeOrLowestStage IS NULL THEN 0 ELSE 1 END) AS earliestAgeOrLowestStage ,
sum(CASE WHEN v_latestAgeOrHighestStage IS NULL THEN 0 ELSE 1 END) AS latestAgeOrHighestStage ,
sum(CASE WHEN v_lowestBiostratigraphicZone IS NULL THEN 0 ELSE 1 END) AS lowestBiostratigraphicZone ,
sum(CASE WHEN v_highestBiostratigraphicZone IS NULL THEN 0 ELSE 1 END) AS highestBiostratigraphicZone ,
sum(CASE WHEN v_lithostratigraphicTerms IS NULL THEN 0 ELSE 1 END) AS lithostratigraphicTerms ,
sum(CASE WHEN v_group IS NULL THEN 0 ELSE 1 END) AS group ,
sum(CASE WHEN v_formation IS NULL THEN 0 ELSE 1 END) AS formation ,
sum(CASE WHEN v_member IS NULL THEN 0 ELSE 1 END) AS member ,
sum(CASE WHEN v_bed IS NULL THEN 0 ELSE 1 END) AS bed ,
sum(CASE WHEN v_identificationID IS NULL THEN 0 ELSE 1 END) AS identificationID ,
sum(CASE WHEN v_identificationQualifier IS NULL THEN 0 ELSE 1 END) AS identificationQualifier ,
sum(CASE WHEN v_typeStatus IS NULL THEN 0 ELSE 1 END) AS typeStatus ,
sum(CASE WHEN v_identifiedBy IS NULL THEN 0 ELSE 1 END) AS identifiedBy ,
sum(CASE WHEN v_dateIdentified IS NULL THEN 0 ELSE 1 END) AS dateIdentified ,
sum(CASE WHEN v_identificationReferences IS NULL THEN 0 ELSE 1 END) AS identificationReferences ,
sum(CASE WHEN v_identificationVerificationStatus IS NULL THEN 0 ELSE 1 END) AS identificationVerificationStatus ,
sum(CASE WHEN v_identificationRemarks IS NULL THEN 0 ELSE 1 END) AS identificationRemarks ,
sum(CASE WHEN v_taxonID IS NULL THEN 0 ELSE 1 END) AS taxonID ,
sum(CASE WHEN v_scientificNameID IS NULL THEN 0 ELSE 1 END) AS scientificNameID ,
sum(CASE WHEN v_acceptedNameUsageID IS NULL THEN 0 ELSE 1 END) AS acceptedNameUsageID ,
sum(CASE WHEN v_parentNameUsageID IS NULL THEN 0 ELSE 1 END) AS parentNameUsageID ,
sum(CASE WHEN v_originalNameUsageID IS NULL THEN 0 ELSE 1 END) AS originalNameUsageID ,
sum(CASE WHEN v_nameAccordingToID IS NULL THEN 0 ELSE 1 END) AS nameAccordingToID ,
sum(CASE WHEN v_namePublishedInID IS NULL THEN 0 ELSE 1 END) AS namePublishedInID ,
sum(CASE WHEN v_taxonConceptID IS NULL THEN 0 ELSE 1 END) AS taxonConceptID ,
sum(CASE WHEN v_scientificName IS NULL THEN 0 ELSE 1 END) AS scientificName ,
sum(CASE WHEN v_acceptedNameUsage IS NULL THEN 0 ELSE 1 END) AS acceptedNameUsage ,
sum(CASE WHEN v_parentNameUsage IS NULL THEN 0 ELSE 1 END) AS parentNameUsage ,
sum(CASE WHEN v_originalNameUsage IS NULL THEN 0 ELSE 1 END) AS originalNameUsage ,
sum(CASE WHEN v_nameAccordingTo IS NULL THEN 0 ELSE 1 END) AS nameAccordingTo ,
sum(CASE WHEN v_namePublishedIn IS NULL THEN 0 ELSE 1 END) AS namePublishedIn ,
sum(CASE WHEN v_namePublishedInYear IS NULL THEN 0 ELSE 1 END) AS namePublishedInYear ,
sum(CASE WHEN v_higherClassification IS NULL THEN 0 ELSE 1 END) AS higherClassification ,
sum(CASE WHEN v_kingdom IS NULL THEN 0 ELSE 1 END) AS kingdom ,
sum(CASE WHEN v_phylum IS NULL THEN 0 ELSE 1 END) AS phylum ,
sum(CASE WHEN v_class IS NULL THEN 0 ELSE 1 END) AS class ,
sum(CASE WHEN v_order IS NULL THEN 0 ELSE 1 END) AS order ,
sum(CASE WHEN v_family IS NULL THEN 0 ELSE 1 END) AS family ,
sum(CASE WHEN v_genus IS NULL THEN 0 ELSE 1 END) AS genus ,
sum(CASE WHEN v_subgenus IS NULL THEN 0 ELSE 1 END) AS subgenus ,
sum(CASE WHEN v_specificEpithet IS NULL THEN 0 ELSE 1 END) AS specificEpithet ,
sum(CASE WHEN v_infraspecificEpithet IS NULL THEN 0 ELSE 1 END) AS infraspecificEpithet ,
sum(CASE WHEN v_taxonRank IS NULL THEN 0 ELSE 1 END) AS taxonRank ,
sum(CASE WHEN v_verbatimTaxonRank IS NULL THEN 0 ELSE 1 END) AS verbatimTaxonRank ,
sum(CASE WHEN v_scientificNameAuthorship IS NULL THEN 0 ELSE 1 END) AS scientificNameAuthorship ,
sum(CASE WHEN v_vernacularName IS NULL THEN 0 ELSE 1 END) AS vernacularName ,
sum(CASE WHEN v_nomenclaturalCode IS NULL THEN 0 ELSE 1 END) AS nomenclaturalCode ,
sum(CASE WHEN v_taxonomicStatus IS NULL THEN 0 ELSE 1 END) AS taxonomicStatus ,
sum(CASE WHEN v_nomenclaturalStatus IS NULL THEN 0 ELSE 1 END) AS nomenclaturalStatus ,
sum(CASE WHEN v_taxonRemarks IS NULL THEN 0 ELSE 1 END) AS taxonRemarks
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment