Last active
May 6, 2021 06:54
-
-
Save hamakn/152b9939b306fdec4ea70b9fe3f86eab to your computer and use it in GitHub Desktop.
BigQueryどう書く
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 180sec以内の1つ前のAPI callを出す | |
-- precondition: t is not overwrapped by recoding milliseconds | |
with | |
Log as( | |
select | |
-- user_id:1 A=>B=>C | |
1 as user_id, "apiA" as api_name, TIMESTAMP '2021-05-06 12:00:00' as t | |
union all select 1 as user_id, "apiB" as api_name, TIMESTAMP '2021-05-06 12:01:00' as t | |
union all select 1 as user_id, "apiC" as api_name, TIMESTAMP '2021-05-06 12:02:00' as t | |
-- user_id:2 A=>C | |
union all select 2 as user_id, "apiA" as api_name, TIMESTAMP '2021-05-06 09:00:00' as t | |
union all select 2 as user_id, "apiC" as api_name, TIMESTAMP '2021-05-06 09:01:00' as t | |
-- user_id:3 D=(long time)=>C | |
union all select 3 as user_id, "apiD" as api_name, TIMESTAMP '2021-05-06 08:00:00' as t | |
union all select 3 as user_id, "apiC" as api_name, TIMESTAMP '2021-05-06 08:03:00.1' as t -- millisec is OK | |
-- user_id:4 A | |
union all select 4 as user_id, "apiA" as api_name, TIMESTAMP '2021-05-06 10:00:00' as t | |
), | |
LogWithUnixtime as( | |
select | |
*, | |
UNIX_MILLIS(TIMESTAMP(cast(t as datetime))) as logged_at | |
from | |
Log | |
), | |
LogWithSincePreviousAPICall as( | |
select | |
*, | |
min(logged_at) | |
over( | |
partition by user_id | |
order by logged_at asc | |
rows 1 preceding | |
) as previous_logged_at -- NOTE: when logged_at == previous_logged_at, it means no previous log | |
from | |
LogWithUnixtime | |
), | |
LogWithPreviousCall as( | |
select | |
*, | |
if( | |
logged_at = previous_logged_at or -- no previous log | |
logged_at - previous_logged_at >= 180000, -- or duration is too high | |
null, -- no previous log | |
lag(api_name) | |
over( | |
partition by user_id | |
order by logged_at asc | |
) | |
) as previuos_api_name | |
from | |
LogWithSincePreviousAPICall | |
) | |
select | |
* | |
from | |
LogWithPreviousCall |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- logからアクセス時間を計測 | |
with | |
-- create log | |
AccessLog as( | |
select | |
1 as account_id, TIMESTAMP '2017-06-20 00:00:00' as t | |
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:00:10' as t | |
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:01:00' as t | |
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:05:00' as t | |
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:05:00' as t | |
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:06:20' as t | |
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:09:00' as t | |
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:15:00' as t | |
union all select 2 as account_id, TIMESTAMP '2017-06-20 00:05:00' as t | |
union all select 2 as account_id, TIMESTAMP '2017-06-20 00:05:30' as t | |
), | |
AccessLogWithUnixTime as( | |
select | |
*, | |
UNIX_SECONDS(TIMESTAMP(cast(t as datetime))) as logged_at | |
from | |
AccessLog | |
), | |
-- sessionのつながりを出す | |
AccessLogWithLinkedLog as( | |
select | |
account_id, | |
logged_at, | |
-- 180秒でsessionがつながっているとみなす | |
min(logged_at) over (partition by account_id order by logged_at range between 180 preceding and 180 following) as min_linked_logged_at, | |
max(logged_at) over (partition by account_id order by logged_at range between 180 preceding and 180 following) as max_linked_logged_at | |
from | |
AccessLogWithUnixTime | |
group by | |
-- NOTE: 同一時刻のlogがあるのでgroup byする | |
account_id, | |
logged_at | |
), | |
-- sessionの開始or開始のログを判定して抽出する | |
SessionStartOrEnd as( | |
select | |
account_id, | |
logged_at, | |
if( | |
logged_at = min_linked_logged_at, | |
true, | |
false | |
) as session_start, | |
if( | |
logged_at = max_linked_logged_at, | |
true, | |
false | |
) as session_end | |
from | |
AccessLogWithLinkedLog | |
where | |
-- sessionの開始or終了のログのみ残す | |
logged_at = min_linked_logged_at | |
or | |
logged_at = max_linked_logged_at | |
), | |
-- session時間を算出する | |
SessionSeconds as( | |
select | |
account_id, | |
logged_at, | |
if( | |
session_start = true and session_end = false, | |
max(logged_at) over (partition by account_id order by logged_at rows between current row and 1 following) - logged_at, | |
0 | |
) as session_seconds | |
from | |
SessionStartOrEnd | |
) | |
select | |
account_id, | |
sum(session_seconds) as total_session_seconds | |
from | |
SessionSeconds | |
where | |
session_seconds > 0 | |
group by | |
account_id | |
order by | |
account_id | |
-- result | |
-- account_id | total_session_seconds | |
-- -----------+----------------------- | |
-- 1 | 300 | |
-- 2 | 30 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment