Skip to content

Instantly share code, notes, and snippets.

@kuenishi
Created February 14, 2014 14:42
Show Gist options
  • Save kuenishi/9002135 to your computer and use it in GitHub Desktop.
Save kuenishi/9002135 to your computer and use it in GitHub Desktop.
-- columnar style +
create table if not exists col_fluentlog
(dt string, tag string, host string, user string, method string, path string, code int, size int, referer string, agent string, time string, tag2 string)
row format serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
stored as inputformat 'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat';
create table if not exists ocol_fluentlog
(dt string, tag string, host string, user string, method string, path string, code int, size int, referer string, agent string, time string, tag2 string)
stored as orc;
-- row format serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
add jar lib/parquet-hive-bundle-1.3.2.jar;
create table if not exists parq_fluentlog
(dt string, tag string, host string, user string, method string, path string, code string, size string, referer string, agent string, time string, tag2 string)
row format serde 'parquet.hive.serde.ParquetHiveSerDe'
stored as inputformat 'parquet.hive.DeprecatedParquetInputFormat'
outputformat 'parquet.hive.DeprecatedParquetOutputFormat';
show tables;
-- memo
-- host user method path code size referer agent time tag id
-- select * from fluentlog f lateral view
-- json_tuple(f.json,"host","method","path","size","time" ,"tag") b
-- as id limit 10;
-- see create_fluent_log_table.hql for table schema
insert overwrite table col_fluentlog
select f.dt, f.tag, j.host, j.user, j.method, j.path, j.code, j.size, j.referer, j.agent, j.time, j.tag2
from fluentlog f lateral view
json_tuple(f.json, "host", "user", "method", "path", "code", "size", "referer", "agent", "time", "tag") j
as host, user, method, path, code, size, referer, agent, time, tag2;
insert overwrite table ocol_fluentlog
select f.dt, f.tag, j.host, j.user, j.method, j.path, j.code, j.size, j.referer, j.agent, j.time, j.tag2
from fluentlog f lateral view
json_tuple(f.json, "host", "user", "method", "path", "code", "size", "referer", "agent", "time", "tag") j
as host, user, method, path, code, size, referer, agent, time, tag2;
add jar lib/parquet-hive-bundle-1.3.2.jar;
insert overwrite table parq_fluentlog
select f.dt, f.tag, j.host, j.user, j.method, j.path, j.code, j.size, j.referer, j.agent, j.time, j.tag2
from fluentlog f lateral view
json_tuple(f.json, "host", "user", "method", "path", "code", "size", "referer", "agent", "time", "tag") j
as host, user, method, path, code, size, referer, agent, time, tag2;
-- memo
-- host user method path code size referer agent time tag id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment