Skip to content

Instantly share code, notes, and snippets.

@hamakn
Last active May 6, 2021 06:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hamakn/152b9939b306fdec4ea70b9fe3f86eab to your computer and use it in GitHub Desktop.
Save hamakn/152b9939b306fdec4ea70b9fe3f86eab to your computer and use it in GitHub Desktop.
BigQueryどう書く
-- 180sec以内の1つ前のAPI callを出す
-- precondition: t is not overwrapped by recoding milliseconds
with
Log as(
select
-- user_id:1 A=>B=>C
1 as user_id, "apiA" as api_name, TIMESTAMP '2021-05-06 12:00:00' as t
union all select 1 as user_id, "apiB" as api_name, TIMESTAMP '2021-05-06 12:01:00' as t
union all select 1 as user_id, "apiC" as api_name, TIMESTAMP '2021-05-06 12:02:00' as t
-- user_id:2 A=>C
union all select 2 as user_id, "apiA" as api_name, TIMESTAMP '2021-05-06 09:00:00' as t
union all select 2 as user_id, "apiC" as api_name, TIMESTAMP '2021-05-06 09:01:00' as t
-- user_id:3 D=(long time)=>C
union all select 3 as user_id, "apiD" as api_name, TIMESTAMP '2021-05-06 08:00:00' as t
union all select 3 as user_id, "apiC" as api_name, TIMESTAMP '2021-05-06 08:03:00.1' as t -- millisec is OK
-- user_id:4 A
union all select 4 as user_id, "apiA" as api_name, TIMESTAMP '2021-05-06 10:00:00' as t
),
LogWithUnixtime as(
select
*,
UNIX_MILLIS(TIMESTAMP(cast(t as datetime))) as logged_at
from
Log
),
LogWithSincePreviousAPICall as(
select
*,
min(logged_at)
over(
partition by user_id
order by logged_at asc
rows 1 preceding
) as previous_logged_at -- NOTE: when logged_at == previous_logged_at, it means no previous log
from
LogWithUnixtime
),
LogWithPreviousCall as(
select
*,
if(
logged_at = previous_logged_at or -- no previous log
logged_at - previous_logged_at >= 180000, -- or duration is too high
null, -- no previous log
lag(api_name)
over(
partition by user_id
order by logged_at asc
)
) as previuos_api_name
from
LogWithSincePreviousAPICall
)
select
*
from
LogWithPreviousCall
-- logからアクセス時間を計測
with
-- create log
AccessLog as(
select
1 as account_id, TIMESTAMP '2017-06-20 00:00:00' as t
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:00:10' as t
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:01:00' as t
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:05:00' as t
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:05:00' as t
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:06:20' as t
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:09:00' as t
union all select 1 as account_id, TIMESTAMP '2017-06-20 00:15:00' as t
union all select 2 as account_id, TIMESTAMP '2017-06-20 00:05:00' as t
union all select 2 as account_id, TIMESTAMP '2017-06-20 00:05:30' as t
),
AccessLogWithUnixTime as(
select
*,
UNIX_SECONDS(TIMESTAMP(cast(t as datetime))) as logged_at
from
AccessLog
),
-- sessionのつながりを出す
AccessLogWithLinkedLog as(
select
account_id,
logged_at,
-- 180秒でsessionがつながっているとみなす
min(logged_at) over (partition by account_id order by logged_at range between 180 preceding and 180 following) as min_linked_logged_at,
max(logged_at) over (partition by account_id order by logged_at range between 180 preceding and 180 following) as max_linked_logged_at
from
AccessLogWithUnixTime
group by
-- NOTE: 同一時刻のlogがあるのでgroup byする
account_id,
logged_at
),
-- sessionの開始or開始のログを判定して抽出する
SessionStartOrEnd as(
select
account_id,
logged_at,
if(
logged_at = min_linked_logged_at,
true,
false
) as session_start,
if(
logged_at = max_linked_logged_at,
true,
false
) as session_end
from
AccessLogWithLinkedLog
where
-- sessionの開始or終了のログのみ残す
logged_at = min_linked_logged_at
or
logged_at = max_linked_logged_at
),
-- session時間を算出する
SessionSeconds as(
select
account_id,
logged_at,
if(
session_start = true and session_end = false,
max(logged_at) over (partition by account_id order by logged_at rows between current row and 1 following) - logged_at,
0
) as session_seconds
from
SessionStartOrEnd
)
select
account_id,
sum(session_seconds) as total_session_seconds
from
SessionSeconds
where
session_seconds > 0
group by
account_id
order by
account_id
-- result
-- account_id | total_session_seconds
-- -----------+-----------------------
-- 1 | 300
-- 2 | 30
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment