Skip to content

Instantly share code, notes, and snippets.

@nuria
Last active August 11, 2017 20:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nuria/acd67dc1d237c59a2dda9799e82da4c3 to your computer and use it in GitHub Desktop.
Save nuria/acd67dc1d237c59a2dda9799e82da4c3 to your computer and use it in GitHub Desktop.
Create avro table on top of avro files
{
"type" : "record",
"name" : "AutoGeneratedSchema",
"doc" : "Sqoop import of QueryResult",
"fields" : [ {
"name" : "id",
"type" : [ "null", "int" ],
"default" : null,
"columnName" : "id",
"sqlType" : "4"
}, {
"name" : "uuid",
"type" : [ "null", "bytes" ],
"default" : null,
"columnName" : "uuid",
"sqlType" : "-2"
}, {
"name" : "timestamp",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "timestamp",
"sqlType" : "12"
}, {
"name" : "webhost",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "webhost",
"sqlType" : "12"
}, {
"name" : "wiki",
"type" : [ "null", "bytes" ],
"default" : null,
"columnName" : "wiki",
"sqlType" : "-3"
}, {
"name" : "event_isAPI",
"type" : [ "null", "int" ],
"default" : null,
"columnName" : "event_isAPI",
"sqlType" : "-5"
}, {
"name" : "event_isMobile",
"type" : [ "null", "int" ],
"default" : null,
"columnName" : "event_isMobile",
"sqlType" : "-5"
}, {
"name" : "event_revisionId",
"type" : [ "null", "int" ],
"default" : null,
"columnName" : "event_revisionId",
"sqlType" : "-5"
} ],
"tableName" : "QueryResult"
}
CREATE EXTERNAL TABLE `PageContentSaveComplete_5588433`
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION
'hdfs://analytics-hadoop/tmp/PageContentSaveCompleteAvro'
TBLPROPERTIES ('avro.schema.url'='hdfs://analytics-hadoop/tmp/PageContentSaveCompleteAutoGeneratedSchema.avsc')
;
use nuria;
CREATE EXTERNAL TABLE `PageContentSaveComplete_5588433`(
`id` int,
`uuid` binary,
`timestamp` binary,
`webHost` binary,
`wiki` binary,
`event_isAPI` int,
`event_isMobile` int,
`event_revisionId` bigint
)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION
'hdfs://analytics-hadoop/tmp/PageContentSaveCompleteAvro'
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment