Skip to content

Instantly share code, notes, and snippets.

@doryokujin
Created July 22, 2012 09:04
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save doryokujin/3158973 to your computer and use it in GitHub Desktop.
Save doryokujin/3158973 to your computer and use it in GitHub Desktop.
退会に関する分析
-- 0.1 ログインインターバルの分布 --
-- 当日入会->辞めた人もインターバルは1となるので1回以上プレイしている人に限定 --
td query -w -d your_app -f csv -o dist_of_login_interval.csv "
SELECT ROUND((datediff(latest_login, registered_day)+1)/login_times) AS login_interval, COUNT(*) AS cnt
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login
FROM login
GROUP BY v['uid']
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
JOIN
(
SELECT uid, COUNT(*) AS login_times
FROM
(
SELECT v['uid'] AS uid, from_unixtime(time,'yyyy-MM-dd' ) AS login_day, COUNT(*) AS login_times_each_day
FROM login
GROUP BY v['uid'], from_unixtime(time,'yyyy-MM-dd' )
) tmp
GROUP BY uid
HAVING 1 < COUNT(*)
) t3
ON
(t2.uid=t3.uid)
GROUP BY ROUND((datediff(latest_login, registered_day)+1)/login_times)
ORDER BY login_interval
"
-- 0.2 退会ユーザー vs 生存ユーザー --
td query -w -d your_app "
SELECT dead.cnt AS dead_cnt, survaival.cnt AS survaival_cnt, ROUND(dead.cnt/(dead.cnt+survaival.cnt)*1000)/10 AS dead_rate
FROM
(
SELECT COUNT(*) AS cnt, 1 AS one
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 <= datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
) dead
JOIN
(
SELECT COUNT(*) AS cnt, 1 AS one
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 > datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
) survaival
ON
(dead.one=survaival.one)
"
-- 1.1 ログイン回数 segmented by {退会,生存} ユーザー --
td query -w -d your_app -f csv -o dist_of_login_times_segmented_by.csv "
SELECT
IF(left_group.cnt IS NULL,continue_group.login_times,left_group.login_times) AS login_times,
IF(left_group.cnt IS NULL,0,left_group.cnt) AS left_group,
IF(continue_group.cnt IS NULL,0,continue_group.cnt) AS continue_group,
IF(left_group_all.cnt IS NULL,0,left_group_all.cnt) AS left_group_all,
IF(continue_group_all.cnt IS NULL,0,continue_group_all.cnt) AS continue_group_all,
ROUND(IF(left_group.cnt IS NULL,0,left_group.cnt)/IF(left_group_all.cnt IS NULL,1,left_group_all.cnt)*1000)/100 AS left_group_rate,
ROUND(IF(continue_group.cnt IS NULL,0,continue_group.cnt)/IF(continue_group_all.cnt IS NULL,1,continue_group_all.cnt)*1000)/100 AS continue_group_rate
FROM
(
SELECT login_times, COUNT(*) AS cnt, 'left' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 <= datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
JOIN
(
SELECT uid, COUNT(*) AS login_times
FROM
(
SELECT v['uid'] AS uid, from_unixtime(time,'yyyy-MM-dd' ) AS login_day, COUNT(*) AS login_times_each_day
FROM login
GROUP BY v['uid'], from_unixtime(time,'yyyy-MM-dd' )
) tmp
GROUP BY uid
) t3
ON
(t2.uid=t3.uid)
GROUP BY login_times
) left_group
JOIN
(
SELECT COUNT(*) AS cnt, 'left' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 <= datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
) left_group_all
ON
(left_group.type=left_group_all.type)
FULL OUTER JOIN
(
SELECT login_times, COUNT(*) AS cnt, 'continue' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 > datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
JOIN
(
SELECT uid, COUNT(*) AS login_times
FROM
(
SELECT v['uid'] AS uid, from_unixtime(time,'yyyy-MM-dd' ) AS login_day, COUNT(*) AS login_times_each_day
FROM login
GROUP BY v['uid'], from_unixtime(time,'yyyy-MM-dd' )
) tmp
GROUP BY uid
) t3
ON
(t2.uid=t3.uid)
GROUP BY login_times
) continue_group
ON
(left_group.login_times=continue_group.login_times)
JOIN
(
SELECT COUNT(*) AS cnt, 'continue' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 > datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
) continue_group_all
ON
(continue_group.type=continue_group_all.type)
ORDER BY login_times
"
-- 2.1 ログインインターバル segmented by {退会,生存} ユーザー --
-- 当日入会->辞めた人もインターバルは1となるので1回以上プレイしている人に限定 --
td query -w -d your_app -f csv -o dist_of_login_interval_segmented_by.csv "
SELECT
IF(left_group.cnt IS NULL,continue_group.login_interval,left_group.login_interval) AS login_interval,
IF(left_group.cnt IS NULL,0,left_group.cnt) AS left_group,
IF(continue_group.cnt IS NULL,0,continue_group.cnt) AS continue_group,
IF(left_group_all.cnt IS NULL,0,left_group_all.cnt) AS left_group_all,
IF(continue_group_all.cnt IS NULL,0,continue_group_all.cnt) AS continue_group_all,
IF(left_group.cnt IS NULL,0,left_group.cnt)/IF(left_group_all.cnt IS NULL,1,left_group_all.cnt) AS left_group_rate,
IF(continue_group.cnt IS NULL,0,continue_group.cnt)/IF(continue_group_all.cnt IS NULL,1,continue_group_all.cnt) AS continue_group_rate
FROM
(
SELECT ROUND((datediff(latest_login, registered_day)+1)/login_times) AS login_interval, COUNT(*) AS cnt, 'left' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 <= datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
JOIN
(
SELECT uid, COUNT(*) AS login_times
FROM
(
SELECT v['uid'] AS uid, from_unixtime(time,'yyyy-MM-dd' ) AS login_day, COUNT(*) AS login_times_each_day
FROM login
GROUP BY v['uid'], from_unixtime(time,'yyyy-MM-dd' )
) tmp
GROUP BY uid
HAVING 1 < COUNT(*)
) t3
ON
(t2.uid=t3.uid)
GROUP BY ROUND((datediff(latest_login, registered_day)+1)/login_times)
) left_group
JOIN
(
SELECT COUNT(*) AS cnt, 'left' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 <= datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
JOIN
(
SELECT uid, COUNT(*) AS login_times
FROM
(
SELECT v['uid'] AS uid, from_unixtime(time,'yyyy-MM-dd' ) AS login_day, COUNT(*) AS login_times_each_day
FROM login
GROUP BY v['uid'], from_unixtime(time,'yyyy-MM-dd' )
) tmp
GROUP BY uid
HAVING 1 < COUNT(*)
) t3
ON
(t2.uid=t3.uid)
) left_group_all
ON
(left_group.type=left_group_all.type)
FULL OUTER JOIN
(
SELECT ROUND((datediff(latest_login, registered_day)+1)/login_times) AS login_interval, COUNT(*) AS cnt, 'continue' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 > datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
JOIN
(
SELECT uid, COUNT(*) AS login_times
FROM
(
SELECT v['uid'] AS uid, from_unixtime(time,'yyyy-MM-dd' ) AS login_day, COUNT(*) AS login_times_each_day
FROM login
GROUP BY v['uid'], from_unixtime(time,'yyyy-MM-dd' )
) tmp
GROUP BY uid
HAVING 1 < COUNT(*)
) t3
ON
(t2.uid=t3.uid)
GROUP BY ROUND((datediff(latest_login, registered_day)+1)/login_times)
) continue_group
ON
(left_group.login_interval=continue_group.login_interval)
JOIN
(
SELECT COUNT(*) AS cnt, 'continue' AS type
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 > datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
JOIN
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
HAVING from_unixtime(MAX(time),'yyyy-MM-dd' ) <= date_sub('2012-07-20', 7 )
) t2
ON
(t1.uid=t2.uid)
JOIN
(
SELECT uid, COUNT(*) AS login_times
FROM
(
SELECT v['uid'] AS uid, from_unixtime(time,'yyyy-MM-dd' ) AS login_day, COUNT(*) AS login_times_each_day
FROM login
GROUP BY v['uid'], from_unixtime(time,'yyyy-MM-dd' )
) tmp
GROUP BY uid
HAVING 1 < COUNT(*)
) t3
ON
(t2.uid=t3.uid)
) continue_group_all
ON
(continue_group.type=continue_group_all.type)
ORDER BY login_interval
"
-- 4.1 日付をセグメントにした退会ユーザーの分布 --
td query -w -d your_app -f csv -o dist_of_dead_cnt_segmented_by_date.csv "
SELECT IF(left_day IS NULL,registered_day,left_day) AS day, registered_cnt, left_cnt, 1-registered_cnt/left_cnt AS increasing_rate, registered_cnt-left_cnt AS diff
FROM
(
SELECT latest_login AS left_day, COUNT(*) AS left_cnt
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login, datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
FROM login
GROUP BY v['uid']
HAVING 7 <= datediff('2012-07-20',from_unixtime(MAX(time),'yyyy-MM-dd'))
) t1
GROUP BY latest_login
) o1
FULL OUTER JOIN
(
SELECT registered_day, COUNT(*) AS registered_cnt
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS registered_day
FROM register
GROUP BY v['uid']
) t
GROUP BY registered_day
) o2
ON
(o1.left_day=o2.registered_day)
ORDER BY day
"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment