Skip to content

Instantly share code, notes, and snippets.

@irajhedayati
Created March 18, 2021 04:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save irajhedayati/c595e349d68b7a5074da81f1b8c6eec5 to your computer and use it in GitHub Desktop.
Save irajhedayati/c595e349d68b7a5074da81f1b8c6eec5 to your computer and use it in GitHub Desktop.
-- Create a flat table
CREATE TABLE calls_nested (
call_id STRING,
name STRING,
age INT
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY '#'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
-- Insert one record
INSERT INTO TABLE calls VALUES ('1', 'John', 20);
-- Check the content
SELECT * FROM calls_nested;
-- Check the file output file
-- Add a new column with type of struct
ALTER TABLE calls_nested
ADD COLUMNS (account STRUCT < first_name: STRING, last_name: STRING >);
-- Insert a new record where 'account' has value as well.
-- For an struct type we use 'named_struct' function.
-- usage: named_struct(FIELD_1_NAME, FIELD_1_VALUE, FIELD_2_NAME, FIELD_2_VALUE, ...)
-- The INSERT with VALUES doesn't support 'named_struct' and we should use a different approach
INSERT INTO TABLE calls_nested
SELECT
'2' AS call_id, 'Joe' AS name, 20 AS age, named_struct('first_name', 'Joe', 'last_name', 'Doe') AS account;
-- Check the content
SELECT * FROM calls_nested;
-- Check the file output file and note for the delimiter for struct
-- Add a new column with type of array
ALTER TABLE calls_nested
ADD COLUMNS (phone_numbers ARRAY < STRING >);
-- Insert a new record
-- For an array we use 'array' function.
-- usage: array(VALUE1, VALUE2, ...)
INSERT INTO table calls_nested
SELECT
'3' as call_id,
'John' as name,
30 as age,
named_struct('first_name', 'John', 'last_name', 'Smith') AS account,
array('514-111-2222', '514-333-4444') AS phone_numbers;
-- Check the content
SELECT * FROM calls_nested;
-- Check the file output file and note for the delimiter for array
-- Add a new column with type of array of struct
ALTER TABLE calls_nested
ADD COLUMNS (addresses array< struct< street: string, city: string>>);
-- Insert a new record
INSERT into calls_nested
SELECT
'5' AS call_id,
'Jack' AS name,
45 AS age,
named_struct('first_name', 'Joe', 'last_name', 'Doe') AS account,
named_struct('home', '514-111-2222', 'work', '514-333-4444') AS phone_directory,
array(
named_struct('street', '1 Guy', 'city', 'Montreal'),
named_struct('street', '1 McGill', 'city', 'Montreal')
) AS addresses;
-- Check the content
SELECT * FROM calls_nested;
-- Check the file output file and note how it handles collection delimiters for array of struct
-- We know that Hive uses same delimiter for array and struct
-- And we know that Hive doesn't save the field names in the files
-- Let's leverage that and see what happens if we just change the schema
-- form array to struct without any modification on the underlying files
ALTER TABLE calls_nested
CHANGE phone_numbers phone_directory STRUCT< home: STRING, work: STRING>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment