Skip to content

Instantly share code, notes, and snippets.

@karamanbk
Created May 3, 2019 15:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karamanbk/e5ffa4f99ae8c49b4aaf9f0c1f941da9 to your computer and use it in GitHub Desktop.
Save karamanbk/e5ffa4f99ae8c49b4aaf9f0c1f941da9 to your computer and use it in GitHub Desktop.
#identify which users are active by looking at their revenue per month
tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index()
#create retention matrix with crosstab
tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()
tx_retention.head()
#create an array of dictionary which keeps Retained & Total User count for each month
months = tx_retention.columns[2:]
retention_array = []
for i in range(len(months)-1):
retention_data = {}
selected_month = months[i+1]
prev_month = months[i]
retention_data['InvoiceYearMonth'] = int(selected_month)
retention_data['TotalUserCount'] = tx_retention[selected_month].sum()
retention_data['RetainedUserCount'] = tx_retention[(tx_retention[selected_month]>0) & (tx_retention[prev_month]>0)][selected_month].sum()
retention_array.append(retention_data)
#convert the array to dataframe and calculate Retention Rate
tx_retention = pd.DataFrame(retention_array)
tx_retention['RetentionRate'] = tx_retention['RetainedUserCount']/tx_retention['TotalUserCount']
#plot the retention rate graph
plot_data = [
go.Scatter(
x=tx_retention.query("InvoiceYearMonth<201112")['InvoiceYearMonth'],
y=tx_retention.query("InvoiceYearMonth<201112")['RetentionRate'],
name="organic"
)
]
plot_layout = go.Layout(
xaxis={"type": "category"},
title='Monthly Retention Rate'
)
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment