Created
March 18, 2021 04:51
-
-
Save irajhedayati/c595e349d68b7a5074da81f1b8c6eec5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Create a flat table | |
CREATE TABLE calls_nested ( | |
call_id STRING, | |
name STRING, | |
age INT | |
) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
COLLECTION ITEMS TERMINATED BY '|' | |
MAP KEYS TERMINATED BY '#' | |
LINES TERMINATED BY '\n' | |
STORED AS TEXTFILE; | |
-- Insert one record | |
INSERT INTO TABLE calls VALUES ('1', 'John', 20); | |
-- Check the content | |
SELECT * FROM calls_nested; | |
-- Check the file output file | |
-- Add a new column with type of struct | |
ALTER TABLE calls_nested | |
ADD COLUMNS (account STRUCT < first_name: STRING, last_name: STRING >); | |
-- Insert a new record where 'account' has value as well. | |
-- For an struct type we use 'named_struct' function. | |
-- usage: named_struct(FIELD_1_NAME, FIELD_1_VALUE, FIELD_2_NAME, FIELD_2_VALUE, ...) | |
-- The INSERT with VALUES doesn't support 'named_struct' and we should use a different approach | |
INSERT INTO TABLE calls_nested | |
SELECT | |
'2' AS call_id, 'Joe' AS name, 20 AS age, named_struct('first_name', 'Joe', 'last_name', 'Doe') AS account; | |
-- Check the content | |
SELECT * FROM calls_nested; | |
-- Check the file output file and note for the delimiter for struct | |
-- Add a new column with type of array | |
ALTER TABLE calls_nested | |
ADD COLUMNS (phone_numbers ARRAY < STRING >); | |
-- Insert a new record | |
-- For an array we use 'array' function. | |
-- usage: array(VALUE1, VALUE2, ...) | |
INSERT INTO table calls_nested | |
SELECT | |
'3' as call_id, | |
'John' as name, | |
30 as age, | |
named_struct('first_name', 'John', 'last_name', 'Smith') AS account, | |
array('514-111-2222', '514-333-4444') AS phone_numbers; | |
-- Check the content | |
SELECT * FROM calls_nested; | |
-- Check the file output file and note for the delimiter for array | |
-- Add a new column with type of array of struct | |
ALTER TABLE calls_nested | |
ADD COLUMNS (addresses array< struct< street: string, city: string>>); | |
-- Insert a new record | |
INSERT into calls_nested | |
SELECT | |
'5' AS call_id, | |
'Jack' AS name, | |
45 AS age, | |
named_struct('first_name', 'Joe', 'last_name', 'Doe') AS account, | |
named_struct('home', '514-111-2222', 'work', '514-333-4444') AS phone_directory, | |
array( | |
named_struct('street', '1 Guy', 'city', 'Montreal'), | |
named_struct('street', '1 McGill', 'city', 'Montreal') | |
) AS addresses; | |
-- Check the content | |
SELECT * FROM calls_nested; | |
-- Check the file output file and note how it handles collection delimiters for array of struct | |
-- We know that Hive uses same delimiter for array and struct | |
-- And we know that Hive doesn't save the field names in the files | |
-- Let's leverage that and see what happens if we just change the schema | |
-- form array to struct without any modification on the underlying files | |
ALTER TABLE calls_nested | |
CHANGE phone_numbers phone_directory STRUCT< home: STRING, work: STRING> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment