timrobertson100/snapshotSchema.sql

## snapshotSchema.sql
CREATE TABLE snapshot.occurrence_${date}(
  id int,
  dataset_id ${datasetType},  -- Note: this is STRING (UUID) in newer versions (Post Sept. 2013) and INT in older
  publisher_id int,
  kingdom string,
  phylum string,
  class_rank string,
  order_rank string,
  family string,
  genus string,
  species string,
  scientific_name string,
  kingdom_id int,
  phylum_id int,
  class_id int,
  order_id int,
  family_id int,
  genus_id int,
  species_id int,
  taxon_id int,
  basis_of_record string,
  latitude double,
  longitude double,
  country string,
  day int,
  month int,
  year int,
  publisher_country string)
ROW FORMAT SERDE
  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'
LOCATION
  'hdfs://c1n8.gbif.org:8020/user/hive/warehouse/snapshot.db/occurrence_20120713'
TBLPROPERTIES (
  'numPartitions'='0',
  'numFiles'='275',
  'transient_lastDdlTime'='1411590107',
  'totalSize'='8148930514',
  'numRows'='0',
  'rawDataSize'='0')
	CREATE TABLE snapshot.occurrence_${date}(
	id int,
	dataset_id ${datasetType}, -- Note: this is STRING (UUID) in newer versions (Post Sept. 2013) and INT in older
	publisher_id int,
	kingdom string,
	phylum string,
	class_rank string,
	order_rank string,
	family string,
	genus string,
	species string,
	scientific_name string,
	kingdom_id int,
	phylum_id int,
	class_id int,
	order_id int,
	family_id int,
	genus_id int,
	species_id int,
	taxon_id int,
	basis_of_record string,
	latitude double,
	longitude double,
	country string,
	day int,
	month int,
	year int,
	publisher_country string)
	ROW FORMAT SERDE
	'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
	STORED AS INPUTFORMAT
	'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
	OUTPUTFORMAT
	'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'
	LOCATION
	'hdfs://c1n8.gbif.org:8020/user/hive/warehouse/snapshot.db/occurrence_20120713'
	TBLPROPERTIES (
	'numPartitions'='0',
	'numFiles'='275',
	'transient_lastDdlTime'='1411590107',
	'totalSize'='8148930514',
	'numRows'='0',
	'rawDataSize'='0')