Skip to content

Instantly share code, notes, and snippets.

@itochu0523
Last active October 26, 2016 08:23
Show Gist options
  • Save itochu0523/e40ecc8b988dbdd2a0bc48a2e2bd9bb7 to your computer and use it in GitHub Desktop.
Save itochu0523/e40ecc8b988dbdd2a0bc48a2e2bd9bb7 to your computer and use it in GitHub Desktop.
-- INSERT INTO id_mapping -- Presto
With ssuid_table as
(
select
session_id
,max(ssuid) as ssuid
from tmp_id_mapping
group by session_id
),
cluid_table as
(
select
session_id
,max(cluid) as cluid
from tmp_id_mapping
group by session_id
),
tmp_id_mapping as
(
SELECT
(CASE WHEN b.ssuid is NULL and b.referrer_ssuid is not NULL THEN b.referrer_ssuid ELSE b.ssuid END) as ssuid
,b.fp_id as fp_id
,b.cluid as cluid
,b.td_global_id as td_global_id
,b.referrer_ssuid as referrer_ssuid
,b.session_id as session_id
FROM
(
SELECT
a.ssuid as ssuid
,a.fp_id as fp_id
,a.cluid as cluid
,a.td_global_id as td_global_id
,a.referrer_ssuid as referrer_ssuid
,a.session_id as session_id
FROM
(
SELECT
t.ssuid as ssuid
,t.fp_id as fp_id
,t.cluid as cluid
,t.td_global_id as td_global_id
--,replace(regexp_extract(url_extract_query(td_referrer),'ssuid=\d+'),'ssuid=','') as referrer_ssuid -- presto
,parse_url(t.td_referrer, 'QUERY', 'ssuid') as referrer_ssuid -- Hive
,TD_SESSIONIZE(time, 3600, t.tag) as session_id
,count(1)
FROM
(
SELECT
time
,td_ip
,concat(concat(concat(td_ip,td_os),td_browser),td_screen) as tag
,td_referrer
,ssuid
,fp_id
,cluid
,td_global_id
FROM catalina_glico
distribute by td_ip
sort by td_ip,time
) t
GROUP BY
1,2,3,4,5,6
) as a
) as b
)
INSERT INTO TABLE id_mapping -- Hive
SELECT
c.session_id
,d.cluid
,c.ssuid
FROM
ssuid_table c
JOIN
cluid_table d
ON c.session_id = d.session_id
ORDER BY 1,2,3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment