Skip to content

Instantly share code, notes, and snippets.

@doryokujin
Last active May 13, 2021 12:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save doryokujin/c13a6111076612d6bc10ce0664e38ed0 to your computer and use it in GitHub Desktop.
Save doryokujin/c13a6111076612d6bc10ce0664e38ed0 to your computer and use it in GitHub Desktop.
『Treasure Data でアクセスログ分析の限界に挑む』その② 〜アクセスに基づいたユーザーセグメントの作成 月次編(アクティビティ)〜 ref: https://qiita.com/doryokujin/items/605158dc61e378685265
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */
/*
SELECT target_month, segment, COUNT(1) AS cnt
FROM
(
*/
SELECT
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month,
IF(this.td_client_id IS NOT NULL, this.td_client_id, all_past.td_client_id) AS td_client_id,
CASE
WHEN this.td_client_id IS NOT NULL AND all_past.td_client_id IS NOT NULL THEN 'active'
WHEN this.td_client_id IS NULL AND all_past.td_client_id IS NOT NULL THEN 'non_active'
WHEN this.td_client_id IS NOT NULL AND all_past.td_client_id IS NULL THEN 'new'
END AS segment,
TD_TIME_FORMAT(min_time, 'yyyy-MM-dd', 'JST') AS min_time,
TD_TIME_FORMAT(max_time, 'yyyy-MM-dd', 'JST') AS max_time
FROM
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去,厳密には以下を用いる */
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST') ) */
GROUP BY td_client_id
) all_past
FULL OUTER JOIN
(
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST')
GROUP BY td_client_id
) this
ON all_past.td_client_id = this.td_client_id
/*
)
GROUP BY target_month, segment
ORDER BY cnt DESC
*/
SELECT
target_month,
kv['active'] AS active,
kv['non_active'] AS non_active,
kv['new'] AS new,
kv['active']+kv['non_active']+kv['new'] AS total
FROM (
SELECT target_month, map_agg(segment, cnt) kv
FROM segment_monthly_acctivity
GROUP BY target_month
)
ORDER BY target_month
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */
/*
SELECT target_month, activity_past_1, activity_past_2, COUNT(1) AS cnt,
MIN(min_time_past_1) AS min_time_past_1, MAX(max_time_past_1) AS max_time_past_1,
MIN(min_time_past_2) AS min_time_past_2, MAX(max_time_past_2) AS max_time_past_2
FROM
(
*/
SELECT
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month,
past_1month.td_client_id,
past_1month.segment AS activity_past_1,
past_2month.segment AS activity_past_2,
past_1month.min_time AS min_time_past_1, past_1month.max_time AS max_time_past_1,
past_2month.min_time AS min_time_past_2, past_2month.max_time AS max_time_past_2
FROM
(
SELECT
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id,
CASE
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active'
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active'
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome'
END AS segment,
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time,
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time
FROM
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST') ) */
GROUP BY td_client_id
) past_month_ago
FULL OUTER JOIN
(
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */
GROUP BY td_client_id
) past_month
ON past_month_ago.td_client_id = past_month.td_client_id
) past_1month
LEFT OUTER JOIN /* 集合 past_1month は past_2month を内包する */
(
SELECT
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id,
CASE
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active'
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active'
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome'
END AS segment,
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time,
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time
FROM
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-2M', 'JST') /* 1ヶ月前より過去 */
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST')-1, 'JST' )) */
GROUP BY td_client_id
) past_month_ago
FULL OUTER JOIN
(
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M/-1M', 'JST') /* 前々月1ヶ月間 */
GROUP BY td_client_id
) past_month
ON past_month_ago.td_client_id = past_month.td_client_id
) past_2month
ON past_1month.td_client_id = past_2month.td_client_id
/*
)
GROUP BY target_month, activity_past_1, activity_past_2
*/
ORDER BY activity_past_1, activity_past_2
SELECT target_month, segment_name, cnt
FROM segment_monthly_activity s
JOIN
( SELECT activity_past_1, activity_past_2, segment_name FROM segment_monthly_activity_master ) m
ON s.activity_past_1 = m.activity_past_1 AND s.activity_past_2 = m.activity_past_2
ORDER BY target_month
LEFT OUTER JOIN
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */
AND td_title = 'お問い合わせ - Treasure Data'
GROUP BY td_client_id
) past_month_cv
ON past_month.td_client_id = past_month_cv.td_client_id
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */
/**/
SELECT target_month, activity_past_1, activity_past_2, COUNT(1) AS cnt,
MIN(min_time_past_1) AS min_time_past_1, MAX(max_time_past_1) AS max_time_past_1,
MIN(min_time_past_2) AS min_time_past_2, MAX(max_time_past_2) AS max_time_past_2
FROM
(
/**/
SELECT
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month,
CASE
WHEN past_1month.segment = 'active' AND past_1month.with_cv = 1 THEN 'active_cv'
WHEN past_1month.segment = 'active' AND past_1month.with_cv = 0 THEN 'active'
WHEN past_1month.segment = 'welcome' AND past_1month.with_cv = 1 THEN 'welcome_cv'
WHEN past_1month.segment = 'welcome' AND past_1month.with_cv = 0 THEN 'welcome'
ELSE past_1month.segment
END AS activity_past_1,
CASE
WHEN past_2month.segment = 'active' AND past_2month.with_cv = 1 THEN 'active_cv'
WHEN past_2month.segment = 'active' AND past_2month.with_cv = 0 THEN 'active'
WHEN past_2month.segment = 'welcome' AND past_2month.with_cv = 1 THEN 'welcome_cv'
WHEN past_2month.segment = 'welcome' AND past_2month.with_cv = 0 THEN 'welcome'
ELSE past_2month.segment
END AS activity_past_2,
past_1month.min_time AS min_time_past_1, past_1month.max_time AS max_time_past_1,
past_2month.min_time AS min_time_past_2, past_2month.max_time AS max_time_past_2
FROM
(
SELECT
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id,
CASE
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active'
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active'
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome'
END AS segment,
IF(past_month_cv.td_client_id IS NOT NULL,1,0) AS with_cv,
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time,
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time
FROM
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */
GROUP BY td_client_id
) past_month_ago
FULL OUTER JOIN
(
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */
GROUP BY td_client_id
) past_month
ON past_month_ago.td_client_id = past_month.td_client_id
LEFT OUTER JOIN
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */
AND td_title = 'お問い合わせ - Treasure Data'
GROUP BY td_client_id
) past_month_cv
ON past_month.td_client_id = past_month_cv.td_client_id
) past_1month
LEFT OUTER JOIN /* 集合 past_1month は past_2month を内包する */
(
SELECT
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id,
CASE
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active'
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active'
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome'
END AS segment,
IF(past_month_cv.td_client_id IS NOT NULL,1,0) AS with_cv,
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time,
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time
FROM
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-2M', 'JST') /* 1ヶ月前より過去 */
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST')-1, 'JST' )) */
GROUP BY td_client_id
) past_month_ago
FULL OUTER JOIN
(
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M/-1M', 'JST') /* 前々月1ヶ月間 */
GROUP BY td_client_id
) past_month
ON past_month_ago.td_client_id = past_month.td_client_id
LEFT OUTER JOIN
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-1M/-1M', 'JST') /* 前々月1ヶ月間 */
AND td_title = 'お問い合わせ - Treasure Data'
GROUP BY td_client_id
) past_month_cv
ON past_month.td_client_id = past_month_cv.td_client_id
) past_2month
ON past_1month.td_client_id = past_2month.td_client_id
/**/
)
GROUP BY target_month, activity_past_1, activity_past_2
/**/
ORDER BY activity_past_1, activity_past_2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment