karamanbk

## g1_m_revenue_growth.py
#using pct_change() function to see monthly percentage change
tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change()

#showing first 5 rows
tx_revenue.head()

#visualization - line graph
plot_data = [
    go.Scatter(
        x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'],

## g1_m_order.py
#create a new dataframe for no. of order by using quantity field
tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index()

#print the dataframe
tx_monthly_sales

#plot
plot_data = [
    go.Bar(
        x=tx_monthly_sales['InvoiceYearMonth'],

## g1_m_avg_rev.py
# create a new dataframe for average revenue by taking the mean of it
tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index()

#print the dataframe
tx_monthly_order_avg

#plot the bar chart
plot_data = [
    go.Bar(
        x=tx_monthly_order_avg['InvoiceYearMonth'],

## g1_m_active_users.py
#creating a new dataframe with UK customers only
tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True)

#creating monthly active customers dataframe by counting unique Customer IDs
tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()

#print the dataframe
tx_monthly_active

#plotting the output

## g1_new_user.py
#create a dataframe contaning CustomerID and first purchase date
tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index()
tx_min_purchase.columns = ['CustomerID','MinPurchaseDate']
tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month)

#merge first purchase date column to our main dataframe (tx_uk)
tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID')

tx_uk.head()

## g1_m_retention.py
#identify which users are active by looking at their revenue per month
tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index()

#create retention matrix with crosstab
tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()

tx_retention.head()

#create an array of dictionary which keeps Retained & Total User count for each month
months = tx_retention.columns[2:]

## g1_jpn.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                karamanbk
                / g1_jpn.ipynb
            
            
              Created
              May 3, 2019 17:15
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## g2_g1_recap.py
# import libraries
from datetime import datetime, timedelta
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division

import plotly.plotly as py

## g2_calc_recency.py
#create a generic user dataframe to keep CustomerID and new segmentation scores
tx_user = pd.DataFrame(tx_data['CustomerID'].unique())
tx_user.columns = ['CustomerID']

#get the max purchase date for each customer and create a dataframe with it
tx_max_purchase = tx_uk.groupby('CustomerID').InvoiceDate.max().reset_index()
tx_max_purchase.columns = ['CustomerID','MaxPurchaseDate']

#we take our observation point as the max invoice date in our dataset
tx_max_purchase['Recency'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days

## g2_recency_cluster.py
#build 4 clusters for recency and add it to dataframe
kmeans = KMeans(n_clusters=4)
kmeans.fit(tx_user[['Recency']])
tx_user['RecencyCluster'] = kmeans.predict(tx_user[['Recency']])

#function for ordering cluster numbers
def order_cluster(cluster_field_name, target_field_name,df,ascending):
    new_cluster_field_name = 'new_' + cluster_field_name
    df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
    df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)
	#using pct_change() function to see monthly percentage change
	tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change()

	#showing first 5 rows
	tx_revenue.head()

	#visualization - line graph
	plot_data = [
	go.Scatter(
	x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'],
	#create a new dataframe for no. of order by using quantity field
	tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index()

	#print the dataframe
	tx_monthly_sales

	#plot
	plot_data = [
	go.Bar(
	x=tx_monthly_sales['InvoiceYearMonth'],
	# create a new dataframe for average revenue by taking the mean of it
	tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index()

	#print the dataframe
	tx_monthly_order_avg

	#plot the bar chart
	plot_data = [
	go.Bar(
	x=tx_monthly_order_avg['InvoiceYearMonth'],
	#creating a new dataframe with UK customers only
	tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True)

	#creating monthly active customers dataframe by counting unique Customer IDs
	tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()

	#print the dataframe
	tx_monthly_active

	#plotting the output
	#create a dataframe contaning CustomerID and first purchase date
	tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index()
	tx_min_purchase.columns = ['CustomerID','MinPurchaseDate']
	tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month)

	#merge first purchase date column to our main dataframe (tx_uk)
	tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID')

	tx_uk.head()
	#identify which users are active by looking at their revenue per month
	tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index()

	#create retention matrix with crosstab
	tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()

	tx_retention.head()

	#create an array of dictionary which keeps Retained & Total User count for each month
	months = tx_retention.columns[2:]
	# import libraries
	from datetime import datetime, timedelta
	import pandas as pd
	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division

	import plotly.plotly as py
	#create a generic user dataframe to keep CustomerID and new segmentation scores
	tx_user = pd.DataFrame(tx_data['CustomerID'].unique())
	tx_user.columns = ['CustomerID']

	#get the max purchase date for each customer and create a dataframe with it
	tx_max_purchase = tx_uk.groupby('CustomerID').InvoiceDate.max().reset_index()
	tx_max_purchase.columns = ['CustomerID','MaxPurchaseDate']

	#we take our observation point as the max invoice date in our dataset
	tx_max_purchase['Recency'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days
	#build 4 clusters for recency and add it to dataframe
	kmeans = KMeans(n_clusters=4)
	kmeans.fit(tx_user[['Recency']])
	tx_user['RecencyCluster'] = kmeans.predict(tx_user[['Recency']])

	#function for ordering cluster numbers
	def order_cluster(cluster_field_name, target_field_name,df,ascending):
	new_cluster_field_name = 'new_' + cluster_field_name
	df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
	df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)