This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#using pct_change() function to see monthly percentage change | |
tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change() | |
#showing first 5 rows | |
tx_revenue.head() | |
#visualization - line graph | |
plot_data = [ | |
go.Scatter( | |
x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a new dataframe for no. of order by using quantity field | |
tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index() | |
#print the dataframe | |
tx_monthly_sales | |
#plot | |
plot_data = [ | |
go.Bar( | |
x=tx_monthly_sales['InvoiceYearMonth'], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a new dataframe for average revenue by taking the mean of it | |
tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index() | |
#print the dataframe | |
tx_monthly_order_avg | |
#plot the bar chart | |
plot_data = [ | |
go.Bar( | |
x=tx_monthly_order_avg['InvoiceYearMonth'], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#creating a new dataframe with UK customers only | |
tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True) | |
#creating monthly active customers dataframe by counting unique Customer IDs | |
tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index() | |
#print the dataframe | |
tx_monthly_active | |
#plotting the output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a dataframe contaning CustomerID and first purchase date | |
tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index() | |
tx_min_purchase.columns = ['CustomerID','MinPurchaseDate'] | |
tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month) | |
#merge first purchase date column to our main dataframe (tx_uk) | |
tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID') | |
tx_uk.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#identify which users are active by looking at their revenue per month | |
tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index() | |
#create retention matrix with crosstab | |
tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index() | |
tx_retention.head() | |
#create an array of dictionary which keeps Retained & Total User count for each month | |
months = tx_retention.columns[2:] |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
from datetime import datetime, timedelta | |
import pandas as pd | |
%matplotlib inline | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
from __future__ import division | |
import plotly.plotly as py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a generic user dataframe to keep CustomerID and new segmentation scores | |
tx_user = pd.DataFrame(tx_data['CustomerID'].unique()) | |
tx_user.columns = ['CustomerID'] | |
#get the max purchase date for each customer and create a dataframe with it | |
tx_max_purchase = tx_uk.groupby('CustomerID').InvoiceDate.max().reset_index() | |
tx_max_purchase.columns = ['CustomerID','MaxPurchaseDate'] | |
#we take our observation point as the max invoice date in our dataset | |
tx_max_purchase['Recency'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#build 4 clusters for recency and add it to dataframe | |
kmeans = KMeans(n_clusters=4) | |
kmeans.fit(tx_user[['Recency']]) | |
tx_user['RecencyCluster'] = kmeans.predict(tx_user[['Recency']]) | |
#function for ordering cluster numbers | |
def order_cluster(cluster_field_name, target_field_name,df,ascending): | |
new_cluster_field_name = 'new_' + cluster_field_name | |
df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index() | |
df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True) |
OlderNewer