Created
September 28, 2017 19:27
-
-
Save vitalibertas/4eff16e088aca0122d8c167c7977c4c4 to your computer and use it in GitHub Desktop.
Hive row_number() in place of aggregate to determine the maximum event time for each ID per day.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET hive.execution.engine = mr; | |
SET hive.support.concurrency = false; | |
SET hive.exec.parallel = true; | |
SET hive.exec.dynamic.partition.mode=nonstrict; | |
USE hosting_stats; | |
WITH Rank AS ( | |
SELECT | |
cid | |
,event_time | |
,storage_bytes_consumed | |
,storage_device | |
,storage_type | |
,year | |
,month | |
,day | |
,row_number() OVER (PARTITION BY cid ORDER BY event_time DESC) AS row_num | |
FROM | |
hosting_storage | |
WHERE | |
year = ${year} | |
AND month = ${month} | |
AND day = ${day} | |
) | |
INSERT OVERWRITE TABLE mwp_storage PARTITION (year, month, day) | |
SELECT | |
cid | |
,event_time | |
,storage_bytes_consumed | |
,storage_device | |
,storage_type | |
,year | |
,month | |
,day | |
FROM | |
Rank | |
WHERE | |
row_num = 1 | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment