Last active
June 27, 2019 07:30
-
-
Save qi-qi/1b6b5516ea14c000f483ea066db1b984 to your computer and use it in GitHub Desktop.
Athena SQL Snippet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select dt, count(*), count(distinct batch_id), count(distinct request_id) | |
from data_extract.requests_batch_parquet | |
group by dt | |
order by dt; | |
select dt, count(*), count(distinct batch_id), count(distinct request_id) | |
from data_extract.requests_ad_parquet | |
group by dt | |
order by dt; | |
select dt, count(*), count(distinct batch_id), count(distinct request_id) | |
from data_extract.requests_batch_parquet | |
where cardinality(ads) > 0 | |
group by dt | |
order by dt; | |
select dt, count(*), count(distinct request_id) | |
from data_raw.batch | |
group by dt | |
order by dt; | |
select * | |
from data_transform.listen_group_parquet | |
limit 100; | |
SELECT dt, | |
sum(case when ad_id not like 'AdswizzAd%' then 1 else 0 end) as others_id, | |
sum(case when ad_id like 'AdswizzAd%' then 1 else 0 end) as adswizz_id, | |
(cast(sum(case when ad_id not like 'AdswizzAd%' then 1 else 0 end) as double) / | |
sum(case when ad_id like 'AdswizzAd%' then 1 else 0 end)) as ratio | |
FROM data_extract.requests_ad_parquet | |
WHERE dt >= '2019-03-01' | |
group by dt | |
order by dt; | |
select * | |
from data_extract.requests_batch_parquet | |
where dt = date('2019-04-09') | |
and batch_id in (select batch_id | |
from data_transform.listen_group_parquet | |
where listen_group_id = 'e824edb24ff7904e946b6f5b1fa675c1a841e34427fae19fd89fafd21c8e0448' | |
and dt = '2019-04-09'); | |
select * | |
from data_transform.listen_group_parquet | |
where listen_group_id = 'e824edb24ff7904e946b6f5b1fa675c1a841e34427fae19fd89fafd21c8e0448' | |
and dt = '2019-04-09' | |
order by event_ts; | |
select event_ts, sum(cast(is_listen_group_bytes_sum_1_min as integer)) | |
from data_transform.listen_group_parquet | |
where listen_group_id = '8714e48622385025077c953ea82f0055b697c4205d478a1fd1b1a644197cdeb6' | |
and dt = '2019-04-06' | |
group by event_ts | |
order by event_ts; | |
select network_id, count(*) as counting | |
from data_transform.index_iab_parquet | |
where is_iab_valid = true | |
group by network_id | |
order by 2 desc; | |
select Date(event_ts), count(*) | |
from data_transform.index_iab_parquet | |
where is_iab_valid = true | |
group by Date(event_ts); | |
select count(*) | |
from data_transform.index_podindex_parquet | |
where is_podindex_valid = true; | |
select count(*) | |
from data_transform.index_iab_parquet | |
where is_iab_daily_rank_valid = true | |
and is_iab_listen_group_bytes_sum_valid; | |
select count(*) | |
from data_transform.index_podindex_parquet | |
where is_podindex_hourly_rank_valid = true | |
and is_listen_group_bytes_sum_1_min = true; | |
select * | |
from data_transform.listen_group_parquet | |
where user_episode_hash = '0f14fc59f1c835f8f054a33736b15988147a189e91d768eefd15ba238910210b' | |
and dt = '2019-03-01' | |
order by event_ts; | |
select * | |
from data_transform.index_iab_parquet | |
where is_iab_valid = false | |
order by listen_group_bytes_sum desc | |
limit 100; | |
select show_id, count(*) as counting | |
from data_transform.index_podindex_parquet | |
where is_podindex_valid = true | |
group by show_id | |
order by 2 desc; | |
select * | |
from data_transform.listen_group_parquet | |
where dt = '2019-03-01' | |
order by listen_group_id, event_ts | |
limit 500; | |
select show_id, count(*) as counting | |
from data_transform.index_podindex_parquet | |
where is_podindex_hourly_rank_valid = true | |
and is_listen_group_bytes_sum_1_min = true | |
group by show_id | |
order by 2 desc; | |
select user_episode_hash, count(*) | |
from data_transform.index_iab_parquet | |
where is_listen_group_legit = true | |
and status = 206 | |
group by user_episode_hash | |
order by 2 desc; | |
// different stitch size | |
select * | |
from data_transform.listen_group_parquet | |
where user_episode_hash = '684a7aacaa5a38e8f7f51a75e3a06c4f4bb56d048d3958015e67259ae88ca157' | |
and dt = '2019-03-01' | |
order by event_ts; | |
select show_id, count(*) as counting | |
from data_transform.index_iab_parquet | |
where is_iab_valid = true | |
group by show_id | |
order by 2 desc; | |
select count(*) | |
from data_transform.index_podindex_parquet; | |
select count(*) | |
from data_transform.listen_group_parquet | |
where is_stitch_bytes_1_min is null; | |
select show_id, count(*) | |
from data_transform.index_podindex_parquet | |
where is_podindex_valid = true | |
and dt = '2019-03-01' | |
group by show_id | |
order by 2 desc; | |
select show_id, count(*) | |
from data_transform.index_iab_parquet | |
where is_iab_valid = true | |
and dt = '2019-03-01' | |
group by show_id | |
order by 2 desc; | |
select dt, show_id, count(*) | |
from data_transform.index_iab_parquet | |
where is_iab_valid = true | |
and show_id in ('656c9c4a-cf04-4730-9198-df9533c488e8', 'd556eb54-6160-4c85-95f4-47d9f5216c49', | |
'ee6fd316-9ce1-4f8a-ba2c-a4a4bf5643e4') | |
group by dt, show_id | |
order by 1, 3 desc, 2; | |
select * | |
from data_transform.index_podindex_parquet t1 | |
join data_transform.index_iab_parquet t2 on t1.batch_id = t2.batch_id | |
where t1.show_id = '656c9c4a-cf04-4730-9198-df9533c488e8' | |
and t1.is_podindex_valid = false | |
and t2.is_iab_valid = true | |
and t1.status = 200; | |
select t1.dt, | |
t1.show_id, | |
sum(cast(t2.is_iab_valid as tinyint)) as iab_counting, | |
sum(cast(t1.is_podindex_valid as tinyint)) as podindex_counting | |
from data_transform.index_podindex_parquet t1 | |
join data_transform.index_iab_parquet t2 on t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h | |
where t1.show_id in ('656c9c4a-cf04-4730-9198-df9533c488e8', 'd556eb54-6160-4c85-95f4-47d9f5216c49', | |
'ee6fd316-9ce1-4f8a-ba2c-a4a4bf5643e4', '307373f9-d0e5-46b1-8050-b4b2937b8281') | |
group by t1.dt, t1.show_id | |
order by 1, 2; | |
select * | |
from data_transform.listen_group_parquet | |
limit 100; | |
-- iab: listen | |
select dt, show_id, count(*) as iab_counting | |
from data_transform.index_iab_parquet | |
where is_iab_valid = true | |
group by 1, 2 | |
order by dt, 3 desc, 2; | |
-- podindex: listen | |
select dt, show_id, count(*) as iab_counting | |
from data_transform.index_podindex_parquet | |
where is_podindex_valid = true | |
and dt = '2019-03-01' | |
group by 1, 2 | |
order by dt, 3 desc, 2; | |
-- legacyindex: listen | |
select dt, show_id, count(*) as legacyindex_counting | |
from data_transform.index_legacyindex_parquet | |
where is_legacyindex_valid = true | |
group by 1, 2 | |
order by dt, 3 desc, 2; | |
-- realtimeindex: listen | |
select dt, show_id, count(*) as realtimeindex_counting | |
from data_extract.requests_batch_parquet | |
where is_rt = true | |
and dt >= '2019-02-04' | |
and dt <= '2019-04-12' | |
group by 1, 2 | |
order by dt, 3 desc, 2; | |
-- iab: ads | |
select t1.dt, t2.ad_id, t1.show_id, count(*) as ad_iab_counting | |
from data_transform.index_iab_parquet t1 | |
join data_extract.requests_ad_parquet t2 on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
where t1.is_iab_valid = true | |
group by 1, 2, 3 | |
order by 1, 4 desc, 2, 3; | |
-- podindex: ads | |
select t1.dt, t2.ad_id, t1.show_id, count(*) as ad_podindex_counting | |
from data_transform.index_podindex_parquet t1 | |
join data_extract.requests_ad_parquet t2 on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
where t1.is_podindex_valid = true | |
group by 1, 2, 3 | |
order by 1, 4 desc, 2, 3; | |
-- legacyindex: ads | |
select t1.dt, t2.ad_id, t1.show_id, count(*) as ad_legacyindex_counting | |
from data_transform.index_legacyindex_parquet t1 | |
join data_extract.requests_ad_parquet t2 on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
where t1.is_legacyindex_valid = true | |
group by 1, 2, 3 | |
order by 1, 4 desc, 2, 3; | |
-- realtimeindex: ads | |
select dt, ad_id, show_id, count(*) as ad_realtimeindex_counting | |
from data_extract.requests_ad_parquet | |
where is_rt = true | |
and dt >= '2019-02-04' | |
and dt <= '2019-04-12' | |
group by 1, 2, 3 | |
order by 1, 4 desc, 2, 3; | |
select Month(t1.dt) as month, t2.ad_id, t1.show_id, count(*) as ad_iab_counting | |
from data_transform.index_iab_parquet t1 | |
join data_extract.requests_ad_parquet t2 on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
where t1.is_iab_valid = true | |
group by 1, 2, 3 | |
order by 1, 4 desc, 2, 3; | |
-- select all | |
select t2.*, t1.is_legacyindex_valid as ad_legacyindex_counting | |
from data_transform.index_legacyindex_parquet t1 | |
join data_extract.requests_ad_parquet t2 on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h); | |
-- Join add: ads | |
select t1.*, t2.is_legacyindex_valid, t3.is_iab_valid, t4.is_podindex_valid | |
from data_extract.requests_ad_parquet t1 | |
join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
join data_transform.index_podindex_parquet t4 on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt = Date('2019-04-04') | |
and t1.h = 0 | |
limit 300; | |
-- Join listen: all | |
select t1.*, t2.is_legacyindex_valid, t3.is_iab_valid, t4.is_podindex_valid | |
from data_extract.requests_batch_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt = Date('2019-04-04') | |
and t1.h = 0 | |
limit 300; | |
-- Listen All: group by month, country | |
select Month(t1.dt) as month, | |
t1.show_id, | |
t1.show_url, | |
t1.geo_country_iso, | |
sum(case when t2.is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when t3.is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when t4.is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when t1.is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_extract.requests_batch_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt = Date('2019-02-04') | |
and t1.dt <= Date('2019-04-12') | |
group by 1, 2, 3, 4 | |
order by 1, 5 desc, 2, 3, 4; | |
SELECT dt, show_url, geo_isp, geo_country, ua_source, count(*) as counting | |
FROM data_raw.batch | |
WHERE dt >= Date('2019-04-01') | |
and dt <= Date('2019-04-26') | |
and lower(ua_source) LIKE '%luminary%' | |
group by 1, 2, 3, 4, 5 | |
order by 1, 6 desc; | |
SELECT dt, | |
ip, | |
sum(case when status = 200 then 1 else 0 end) as http_200_count, | |
sum(case when status = 206 then 1 else 0 end) as http_206_count, | |
sum(case when status = 302 then 1 else 0 end) as http_302_count, | |
sum(case when status not in (200, 206, 302) then 1 else 0 end) as http_others_count, | |
count(*) as total_count | |
FROM data_raw.cf_stitch | |
WHERE dt >= Date('2019-04-15') | |
AND method = 'GET' | |
AND lower(user_agent) LIKE '%luminary%' | |
GROUP BY 1, 2 | |
ORDER BY 1, 7 desc; | |
select * | |
from data_extract.requests_batch_parquet | |
where dt = Date('2019-02-21') | |
and h = 0 | |
limit 100; | |
-- ads All: group by month, country | |
select Month(t1.dt) as month, | |
t1.ad_id, | |
t1.show_id, | |
t1.geo_country_iso, | |
sum(case when t2.is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when t3.is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when t4.is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when t1.is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_extract.requests_ad_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt >= Date('2019-03-01') | |
and t1.dt <= Date('2019-03-31') | |
group by 1, 2, 3, 4 | |
order by 1, 5 desc, 2, 3, 4; | |
-- ads All: group by month, country | |
select Month(t1.dt) as month, | |
t1.ad_id, | |
t1.show_id, | |
t1.geo_country_iso, | |
sum(case when t2.is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when t3.is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when t4.is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when t1.is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_extract.requests_ad_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt >= Date('2019-03-01') | |
and t1.dt <= Date('2019-03-31') | |
group by 1, 2, 3, 4 | |
order by 1, 5 desc, 2, 3, 4; | |
select ua_device_family, count(*) | |
from data_extract.requests_batch_parquet | |
where dt = Date('2019-03-26') | |
group by ua_device_family | |
order by 2 desc; | |
-- ads All: group by month | |
select Month(t1.dt) as month, | |
t1.ad_id, | |
t1.show_id, | |
sum(case when t2.is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when t3.is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when t4.is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when t1.is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_extract.requests_ad_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt >= Date('2019-03-01') | |
and t1.dt <= Date('2019-03-31') | |
group by 1, 2, 3 | |
order by 1, 4 desc, 2, 3; | |
select * | |
from data_extract.stitch_parquet | |
where dt = Date('2019-04-23') | |
limit 100; | |
select dt, count(*) as counting | |
from data_raw.create_hosted_episode | |
group by dt | |
order by dt; | |
select * | |
from data_raw.create_hosted_episode | |
where dt = '2019-05-06' | |
limit 100; | |
select dt, count(*) | |
from data_extract.stitch_parquet | |
group by dt | |
order by dt; | |
-- Join listen: all | |
select Hour(t1.event_ts) as hour, | |
t1.ua_device_family, | |
t1.is_rt as is_realtimeindex_valid, | |
t2.is_legacyindex_valid, | |
t3.is_iab_valid, | |
t4.is_podindex_valid | |
from data_extract.requests_batch_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt = Date('2019-03-26') | |
and (t1.is_rt = true or t2.is_legacyindex_valid = true or t3.is_iab_valid = true or t4.is_podindex_valid = true) | |
order by 1; | |
-- Listen All: group by month, country | |
select Month(t1.dt) as month, | |
t1.show_id, | |
t1.show_url, | |
t1.geo_country_iso, | |
sum(case when t2.is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when t3.is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when t4.is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when t1.is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_extract.requests_batch_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt >= Date('2019-05-01') | |
and t1.dt <= Date('2019-05-03') | |
group by 1, 2, 3, 4 | |
order by 1, 5 desc, 2, 3, 4; | |
-- ads | |
select Month(t1.dt) as month, | |
t1.ad_id, | |
t1.show_id, | |
sum(case when t2.is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when t3.is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when t4.is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when t1.is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_extract.requests_ad_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt = Date('2019-04-11') | |
group by 1, 2, 3 | |
order by 1, 4 desc, 2, 3; | |
-- raw: | |
select * | |
from data_raw.batch | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
select * | |
from data_raw.stitch_cdn | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
-- extract: | |
select * | |
from data_extract.requests_batch_parquet | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
select * | |
from data_extract.requests_ad_parquet | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
select * | |
from data_extract.stitch_parquet | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
-- transform: | |
select * | |
from data_transform.listen_group_parquet | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
select * | |
from data_transform.index_legacyindex_parquet | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
select * | |
from data_transform.index_podindex_parquet | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
select * | |
from data_transform.index_iab_parquet | |
where dt = Date('2019-05-14') | |
and h = 15 | |
limit 200; | |
select dt, count(*) | |
from data_extract.requests_batch_parquet | |
where dt >= Date('2019-05-01') | |
group by dt | |
order by 1; | |
select * | |
from data_transform.listen_group_parquet | |
limit 10; | |
select * | |
from data_transform.index_iab_parquet | |
limit 10; | |
select * | |
from data_transform.index_podindex_parquet | |
limit 10; | |
select * | |
from data_transform.index_legacyindex_parquet | |
limit 10; | |
select * | |
from data_transform.listen_batch_multiindex_parquet | |
where batch_id = 'f8b8822836f0d2b945e8dae880dde7548111b2e54cb5d9e878a23bbca4aab42a'; | |
-- Listen All: group by month, country | |
select Month(t1.dt) as month, | |
t1.show_id, | |
t1.show_url, | |
t1.geo_country_iso, | |
sum(case when t2.is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when t3.is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when t4.is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when t1.is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_extract.requests_batch_parquet t1 | |
left join data_transform.index_legacyindex_parquet t2 | |
on (t1.batch_id = t2.batch_id and t1.dt = t2.dt and t1.h = t2.h) | |
left join data_transform.index_iab_parquet t3 on (t1.batch_id = t3.batch_id and t1.dt = t3.dt and t1.h = t3.h) | |
left join data_transform.index_podindex_parquet t4 | |
on (t1.batch_id = t4.batch_id and t1.dt = t4.dt and t1.h = t4.h) | |
where t1.dt >= Date('2019-05-01') | |
and t1.dt <= Date('2019-05-03') | |
group by 1, 2, 3, 4 | |
order by 1, 5 desc, 2, 3, 4; | |
-- Listen All: group by month, country | |
select Month(dt) as month, | |
show_id, | |
show_url, | |
geo_country_iso, | |
sum(case when is_legacyindex_valid = true then 1 else 0 end) as legacyindex_count, | |
sum(case when is_iab_valid = true then 1 else 0 end) as iab_count, | |
sum(case when is_podindex_valid = true then 1 else 0 end) as podindex_count, | |
sum(case when is_rt = true then 1 else 0 end) as realtimeindex_count | |
from data_transform.listen_batch_multiindex_parquet | |
where dt >= Date('2019-05-01') | |
and dt <= Date('2019-05-03') | |
group by 1, 2, 3, 4 | |
order by 1, 5 desc, 2, 3, 4; | |
SELECT dt, | |
edge_location, | |
sum(case when x_edge_result_type = 'Error' and x_edge_response_result_type != 'Error' then 1 else 0 end) as distribution_error_count, | |
count(*) as total_count, | |
(sum(case when x_edge_result_type = 'Error' and x_edge_response_result_type != 'Error' then 1 else 0 end) / cast(count(*) as DOUBLE)) as error_ratio | |
FROM data_extract.stitch_parquet | |
WHERE dt >= Date('2019-05-07') and dt <= Date('2019-05-15') | |
group by 1,2 | |
order by 1,2; | |
SELECT dt, | |
sum(case when x_edge_result_type = 'Error' and x_edge_response_result_type != 'Error' then 1 else 0 end) as distribution_error_count, | |
count(*) as total_count, | |
(sum(case when x_edge_result_type = 'Error' and x_edge_response_result_type != 'Error' then 1 else 0 end) / cast(count(*) as DOUBLE)) as error_ratio | |
FROM data_extract.stitch_parquet | |
WHERE dt >= Date('2019-05-01') and dt <= Date('2019-05-20') | |
group by 1 | |
order by 1; | |
select event_ts, | |
show_id, | |
show_url, | |
episode_id, | |
episode_url, | |
method, | |
status, | |
ip, | |
ua_source, | |
uri, | |
bytes, | |
range_req | |
from data_transform.listen_batch_multiindex_parquet | |
where show_url = 'ingefaer' and dt = Date('2019-05-20'); | |
select ad.t, | |
ad.i, | |
ad.p, | |
ad.s, | |
ad.e, | |
ad.v, | |
ad.cid, | |
ad.impurls | |
from data_extract.requests_batch_parquet, unnest(ads) as t(ad) | |
where dt=Date ('2019-06-26') | |
limit 100; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment