-
-
Save Evryjazz/c426f9c2ecf5be5708aa5186f9a671c9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#libraries | |
import sys | |
sys.path.append('/opt/insight-repositories/') | |
from pyLBC import * | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from datetime import datetime, timedelta | |
# request set up | |
path_to_write = "/data/insight/jupyter/jupyter_notebooks/julienz/retention/extracts/" | |
ref_path = "/data/insight/jupyter/jupyter_notebooks/julienz/retention/" | |
os.chdir(ref_path) | |
# date | |
if len(sys.argv) > 1: | |
ref_date = toStrDateIso(sys.argv[1]) | |
else: | |
ref_date = date.today() | |
current_date = toStrDateIso(ref_date + timedelta(days = -2)) | |
previous_date = toStrDateIso(ref_date + timedelta(days = -3)) | |
# environment | |
responsive = "&space={s:562498}" | |
ios = "&space={s:562494}" | |
android = "&space={s:562490}" | |
env_list = [responsive, ios, android] | |
env_name = ["responsive", "ios", "android"] | |
env_custom_var = ["cl_364618", "cl_362815", "cl_362806"] | |
def uv_cat(df_row): | |
# user categorization as new column | |
if df_row.new_visitor_previous_date == "-" and pd.isnull(df_row.new_visitor_current_date): | |
return "lost" | |
if df_row.new_visitor_previous_date == "New Visitors" and pd.isnull(df_row.new_visitor_current_date): | |
return "lost" | |
if df_row.new_visitor_previous_date == "New Visitors" and df_row.new_visitor_current_date == "-": | |
return "loyal" | |
if df_row.new_visitor_previous_date == "-" and df_row.new_visitor_current_date == "-": | |
return "loyal" | |
if pd.isnull(df_row.new_visitor_previous_date) and df_row.new_visitor_current_date == "New Visitors": | |
return "new" | |
# xiti request for all platform | |
for idx, env in enumerate(env_list): | |
current_date_name = 'df_{}_{}'.format(env_name[idx], current_date.replace('-', '_')) | |
previous_date_name = 'df_{}_{}'.format(env_name[idx], previous_date.replace('-', '_')) | |
print ('getting {} information for {}'.format(env_name[idx], current_date)) | |
# xiti request execution | |
sub_request = "columns={d_uv_id," + env_custom_var[idx] + ",m_vu}"\ | |
"&sort={-m_vu}"\ | |
+ env\ | |
+ "&period={D:" + current_date + "}" | |
xiti_get_results_parallel_page(sub_request, path_to_write, current_date_name, "julien.zanni+databot@schibsted.com", "Mpd32chiloelbc$") | |
# file concatenation and folder cleaning | |
os.system("sh /opt/insight-repositories/pyLBC/reporting/concat_and_clean.sh " + current_date_name) | |
for idx, env in enumerate(env_list): | |
current_date_name = 'df_{}_{}'.format(env_name[idx], current_date.replace('-', '_')) | |
previous_date_name = 'df_{}_{}'.format(env_name[idx], previous_date.replace('-', '_')) | |
print ('analyzing {} information for {}'.format(env_name[idx], current_date)) | |
# make previous_date and current_date global csv as df | |
previous_date_name = 'global_' + previous_date_name + '.csv' | |
df_previous_date = pd.read_csv(path_to_write + previous_date_name) | |
current_date_name = 'global_' + current_date_name + '.csv' | |
df_current_date = pd.read_csv(path_to_write + current_date_name) | |
# outer join and rename columns | |
print ('merging {} and {}'.format(previous_date, current_date)) | |
df_merge = pd.merge(df_previous_date, df_current_date, how="outer", on="d_uv_id", suffixes=["_previous_date", "_current_date"]) | |
# rewriting columns | |
df_merge = df_merge.rename(columns={env_custom_var[idx] + '_previous_date': 'new_visitor_previous_date', | |
env_custom_var[idx] + '_current_date': 'new_visitor_current_date'}) | |
print ('renaming {} columns for {}'.format(env_name[idx], current_date)) | |
# apply function () | |
print ('applying uv_cat column to df_merge for {}'.format(current_date_name)) | |
df_merge['uv_cat'] = df_merge.apply(uv_cat, axis=1) | |
# retention calculation | |
print ('retention calculation') | |
active_user_previous_date = df_previous_date.d_uv_id.nunique() | |
active_user_current_date = df_current_date.d_uv_id.nunique() | |
lost = len(df_merge[df_merge.uv_cat == 'lost']) | |
loyal = len(df_merge[df_merge.uv_cat == 'loyal']) | |
new = len(df_merge[df_merge.uv_cat == 'new']) | |
retention = active_user_current_date - new / active_user_previous_date * 100 | |
# create tmp dataframe | |
data_tmp = {'retention': retention, 'platform' : env_name[idx]} | |
df_tmp = pd.DataFrame(index=[current_date], data=data_tmp) | |
# append date and retention value to the csv | |
df_tmp.to_csv('daily_retention.csv', mode='a', header=False) | |
print ('daily_retention.csv successfully updated with {} data'.format(current_date)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment