Skip to content

Instantly share code, notes, and snippets.

@PeterAttardo
Last active November 16, 2022 08:14
Show Gist options
  • Star 12 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save PeterAttardo/bc9cee647e1a8c9f5874b80a0cfd0231 to your computer and use it in GitHub Desktop.
Save PeterAttardo/bc9cee647e1a8c9f5874b80a0cfd0231 to your computer and use it in GitHub Desktop.
Create calendar dimension table in hive query language (HQL)
set hivevar:start_date=0000-01-01;
set hivevar:days=1000000;
set hivevar:table_name=[INSERT YOUR TABLE NAME HERE];
-- If you are running a version of HIVE prior to 1.2, comment out all uses of date_format() and uncomment the lines below for equivalent functionality
CREATE TABLE IF NOT EXISTS ${table_name} AS
WITH dates AS (
SELECT date_add("${start_date}", a.pos) as date
FROM (SELECT posexplode(split(repeat(",", ${days}), ","))) a
),
dates_expanded AS (
SELECT
date,
year(date) as year,
month(date) as month,
day(date) as day,
date_format(date, 'u') as day_of_week
-- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "u") as day_of_week
FROM dates
)
SELECT
date,
year,
cast(month(date)/4 + 1 AS BIGINT) as quarter,
month,
date_format(date, 'W') as week_of_month,
-- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "W") as week_of_month,
date_format(date, 'w') as week_of_year,
-- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "w") as week_of_year,
day,
day_of_week,
date_format(date, 'EEE') as day_of_week_s,
-- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "EEE") as day_of_week_s,
date_format(date, 'D') as day_of_year,
-- from_unixtime(unix_timestamp(date, "yyyy-MM-dd"), "D") as day_of_year,
datediff(date, "1970-01-01") as day_of_epoch,
if(day_of_week BETWEEN 6 AND 7, true, false) as weekend,
if(
((month = 1 AND day = 1 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 1 AND day BETWEEN 1 AND 3)) -- New Year's Day
OR (month = 1 AND day_of_week = 1 AND day BETWEEN 15 AND 21) -- MLK Jr
OR (month = 2 AND day_of_week = 1 AND day BETWEEN 15 AND 21) -- President's Day
OR (month = 5 AND day_of_week = 1 AND day BETWEEN 25 AND 31) -- Memorial Day
OR ((month = 7 AND day = 4 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 7 AND day BETWEEN 4 AND 6)) -- Independence Day
OR (month = 9 AND day_of_week = 1 AND day BETWEEN 1 AND 7) -- Labor Day
OR ((month = 11 AND day = 11 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 11 AND day BETWEEN 11 AND 13)) -- Veteran's Day
OR (month = 11 AND day_of_week = 4 AND day BETWEEN 22 AND 28) -- Thanksgiving
OR ((month = 12 AND day = 25 AND day_of_week between 1 AND 5) OR (day_of_week = 1 AND month = 12 AND day BETWEEN 25 AND 27)) -- Christmas
,true, false) as us_holiday
FROM dates_expanded
SORT BY date
;
@Tonyli1991
Copy link

Thanks for sharing it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment