Skip to content

Instantly share code, notes, and snippets.

View mzumi's full-sized avatar
🏠
Working from home

mzumi

🏠
Working from home
View GitHub Profile
import pandas as pd
dau = pd.read_csv("section3-dau.csv", parse_dates=['log_date'])
dpu = pd.read_csv("section3-dpu.csv", parse_dates=['log_date'])
install = pd.read_csv("section3-install.csv", parse_dates=['install_date'])
# dauとインストール情報を結合させる
dau_install = pd.merge(dau, install, on=['app_name', 'user_id'])
# 上記の情報に、課金情報を結合させる
user_info = pd.merge(dau_install, dpu, on=['log_date', 'app_name', 'user_id'], how='left').fillna(0)
# 年月の単位の列を追加する
user_info['log_month'] = user_info['log_date'].apply(lambda x: str(x)[0:7])
user_info['install_month'] = user_info['install_date'].apply(lambda x: str(x)[0:7])
# アクセスした年月の列を追加
user_info['log_month'] = user_info['log_date'].apply(lambda x: str(x)[0:7])
# インストールした年月の列を追加
user_info['install_month'] = user_info['install_date'].apply(lambda x: str(x)[0:7])
user_info['new_user'] = user_info['log_month'] == user_info['install_month']
user_info.groupby(['app_name', 'log_month', 'new_user'])['payment'].sum().unstack()
user_info.groupby(['app_name', 'log_month', 'new_user'])['payment'].sum().unstack().plot(kin
d='bar', stacked=True)
purchasers = user_info[user_info['payment'] > 0]
purchasers.groupby(['app_name', 'log_month', 'user_id'])['payment'].sum().apply(lambda x: int(x) / 2000 * 2000)
payment_info = purchasers.groupby(['app_name', 'log_month', 'user_id'], as_index=False)['payment'].sum()
payment_info['payment_band'] = payment_info['payment'].apply(lambda x: int(x) / 2000 * 2000)