Last active
March 8, 2019 10:43
-
-
Save qi-qi/c58e48a32521991eabdbbf7c9c1e14ff to your computer and use it in GitHub Desktop.
athena-join-parquet.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE table test.stats_20190210_20190306 WITH ( | |
bucketed_by = ARRAY['event_ts'], | |
bucket_count = 10, | |
format = 'orc', | |
external_location = 's3://acast-data-dev/stats_20190210_20190306/') AS | |
with t2 as (select request_id, bytes from test.cf_stitch) | |
SELECT cast(from_iso8601_timestamp(t1.event_date) AS timestamp) AS event_ts, | |
t1.best_effort_user_id, | |
t1.ip, | |
t1.ua_source, | |
t1.episode_url, | |
t1.method, | |
t1.status, | |
t1.range_req, | |
t1.content_length, | |
t2.bytes, | |
t1.client_type, | |
t1.is_rt, | |
transform(t1.ads, ad -> ad.i) AS ad, | |
t1.request_id AS cdn_request_id | |
FROM data_raw.batch t1 JOIN t2 ON t1.request_id = t2.request_id | |
WHERE t1.dt >= '2019-02-09' AND t1.dt <= '2019-03-07' | |
AND cast(from_iso8601_timestamp(event_date) AS Date) >= Date('2019-02-10') | |
AND cast(from_iso8601_timestamp(event_date) AS Date) <= Date('2019-03-06') | |
-- ===================== | |
CREATE table test.stats_20190210_20190306_partition_orc WITH ( | |
partitioned_by = ARRAY['dt'], | |
bucketed_by = ARRAY['event_ts'], | |
bucket_count = 1, | |
format = 'orc', | |
external_location = 's3://acast-data-dev/stats_20190210_20190306_partition_orc/') AS | |
select *, cast(event_ts as Date) as dt | |
from test.stats_20190210_20190306 | |
-- =================================== | |
CREATE table test.stats_20190225_20190306 WITH ( | |
partitioned_by = ARRAY['event_dt'], | |
bucketed_by = ARRAY['event_ts'], | |
bucket_count = 1, | |
format = 'parquet', | |
external_location = 's3://acast-data-dev/stats_20190225_20190306/') AS | |
with t2 as (select request_id, bytes from test.cf_stitch) | |
SELECT cast(from_iso8601_timestamp(t1.event_date) AS timestamp) AS event_ts, | |
t1.best_effort_user_id, | |
t1.ip, | |
t1.ua_source, | |
t1.episode_url, | |
t1.method, | |
t1.status, | |
t1.range_req, | |
t1.content_length, | |
t2.bytes, | |
t1.client_type, | |
t1.is_rt, | |
transform(t1.ads, ad -> ad.i) AS ad, | |
t1.request_id AS cdn_request_id, | |
cast(from_iso8601_timestamp(t1.event_date) AS Date) AS event_dt | |
FROM data_raw.batch t1 | |
JOIN t2 ON t1.request_id = t2.request_id | |
WHERE t1.dt >= '2019-02-24' | |
AND t1.dt <= '2019-03-07' | |
AND cast(from_iso8601_timestamp(event_date) AS Date) >= Date('2019-02-25') | |
AND cast(from_iso8601_timestamp(event_date) AS Date) <= Date('2019-03-06'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment