Created
January 7, 2017 00:29
-
-
Save Torenable/d713bd613d76ebb3e74c4b53202722ce to your computer and use it in GitHub Desktop.
Convert CSV to Parquet in favor to Cloudera Impala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- From CSV to Parquet in favor to Cloudera Impala | |
CREATE EXTERNAL TABLE IF NOT EXISTS [from_table] ( | |
schema DATA_TYPE, | |
... | |
) | |
COMMENT 'A sample of vehicle infomation' | |
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' | |
WITH SERDEPROPERTIES ( | |
"separatorChar" = ",", | |
"quoteChar" = "\"" | |
) | |
LOCATION '/user/cloudera/path/to/source.csv' | |
tblproperties ("skip.header.line.count"="1"); | |
-- Conversion | |
CREATE EXTERNAL TABLE IF NOT EXISTS [target_table] ( | |
same_schema DATA_TYPE, | |
... | |
) | |
STORED AS PARQUET | |
LOCATION '/user/cloudera/path/to/target'; | |
INSERT overwrite TABLE [target_table] SELECT * FROM [from_table]; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment