Last active
March 10, 2021 04:56
-
-
Save irajhedayati/d82772d59e48bb783832329465afdb97 to your computer and use it in GitHub Desktop.
A set of tutorials for Hive encodings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE DATABASE fall2019_iraj; | |
-- Encoding; CSV table | |
CREATE TABLE test ( | |
name STRING, | |
age INT | |
); | |
/* Check the structure */ | |
-- Note the location | |
DESCRIBE test; | |
DESCRIBE EXTENDED test; | |
DESCRIBE FORMATTED test; | |
-- Insert one record | |
INSERT into test values ('John', 30); | |
-- Check the file on HDFS | |
CREATE TABLE test_csv ( | |
name STRING, | |
age INT | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
STORED AS TEXTFILE; | |
INSERT into test_csv values ('John', 30); | |
-- Check the file on HDFS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Most of the time, we work with already stored data on HDFS | |
-- and we'd like to run analysis and query data using Hive | |
/* Create an extrernal table */ | |
CREATE EXTERNAL TABLE ext_movie ( | |
id INT, | |
title STRING, | |
year INT, | |
director STRING | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
LOCATION '/user/bdss2019/iraj/movie'; | |
SELECT * FROM ext_movie; | |
-- What is the problem? header is the first row | |
CREATE EXTERNAL TABLE ext_movie_wo_header ( | |
id int, | |
title string, | |
year int, | |
director string | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
LOCATION '/user/bdsf2001/iraj/movie' | |
TBLPROPERTIES ('skip.header.line.count' = '1'); | |
SELECT * FROM ext_movie; | |
-- What is the problem? a null value is represented with empty string | |
CREATE EXTERNAL TABLE ext_movie_wo_header_w_null ( | |
id int, | |
title string, | |
year int, | |
director string | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
LOCATION '/user/bdsf2001/iraj/movie' | |
TBLPROPERTIES ( | |
'skip.header.line.count' = '1', | |
'serialization.null.format' = '' | |
); | |
/* We could have less columns compared with data */ | |
CREATE EXTERNAL TABLE ext_movie_wo_header_w_null_wo_director ( | |
id int, | |
title string, | |
year int | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
LOCATION '/user/bdsf2001/iraj/movie' | |
TBLPROPERTIES ( | |
'skip.header.line.count' = '1', | |
'serialization.null.format' = '' | |
); | |
SELECT * FROM ext_movie; | |
/* If schema doesn't match, returns null */ | |
DROP TABLE ext_movie; | |
CREATE EXTERNAL TABLE ext_movie ( | |
mID INT, | |
title STRING, | |
director STRING | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
STORED AS TEXTFILE | |
LOCATION '/user/bdss2019/iraj/movie' | |
TBLPROPERTIES ( | |
"skip.header.line.count" = "1", | |
"serialization.null.format" ="" | |
); | |
/* If schema doesn't match, returns null */ | |
CREATE EXTERNAL TABLE ext_movie_wo_header_w_null_wo_director_type_mismatch ( | |
id int, | |
title string, | |
director string, | |
year int | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
LOCATION '/user/bdsf2001/iraj/movie' | |
TBLPROPERTIES ( | |
'skip.header.line.count' = '1', | |
'serialization.null.format' = '' | |
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE movie_sequence | |
STORED AS SEQUENCEFILE | |
AS | |
SELECT * FROM ext_movie_wo_header_w_null; | |
CREATE TABLE movie_parquet | |
STORED AS parquet | |
AS | |
SELECT * FROM ext_movie_wo_header_w_null; | |
-- parquet-tools head hdfs://quickstart.cloudera:8020/user/hive/warehouse/bdss2001_iraj.db/movie_parquet/000000_0 | |
-- parquet-tools schema hdfs://quickstart.cloudera:8020/user/hive/warehouse/bdss2001_iraj.db/movie_parquet/000000_0 | |
create external table ext_movie_parquet ( | |
mid int, | |
title string, | |
year int, | |
director string | |
) | |
stored as parquet | |
location '/user/hive/warehouse/bdss2001_iraj.db/movie_parquet/'; | |
select * from ext_movie_parquet; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment