Skip to content

Instantly share code, notes, and snippets.

from sklearn.cluster import KMeans
sse={}
tx_recency = tx_user[['Recency']]
for k in range(1, 10):
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(tx_recency)
tx_recency["clusters"] = kmeans.labels_
sse[k] = kmeans.inertia_
plt.figure()
plt.plot(list(sse.keys()), list(sse.values()))
#get order counts for each user and create a dataframe with it
tx_frequency = tx_uk.groupby('CustomerID').InvoiceDate.count().reset_index()
tx_frequency.columns = ['CustomerID','Frequency']
#add this data to our main dataframe
tx_user = pd.merge(tx_user, tx_frequency, on='CustomerID')
#plot the histogram
plot_data = [
go.Histogram(
#k-means
kmeans = KMeans(n_clusters=4)
kmeans.fit(tx_user[['Frequency']])
tx_user['FrequencyCluster'] = kmeans.predict(tx_user[['Frequency']])
#order the frequency cluster
tx_user = order_cluster('FrequencyCluster', 'Frequency',tx_user,True)
#see details of each cluster
tx_user.groupby('FrequencyCluster')['Frequency'].describe()
#calculate revenue for each customer
tx_uk['Revenue'] = tx_uk['UnitPrice'] * tx_uk['Quantity']
tx_revenue = tx_uk.groupby('CustomerID').Revenue.sum().reset_index()
#merge it with our main dataframe
tx_user = pd.merge(tx_user, tx_revenue, on='CustomerID')
#plot the histogram
plot_data = [
go.Histogram(
#apply clustering
kmeans = KMeans(n_clusters=4)
kmeans.fit(tx_user[['Revenue']])
tx_user['RevenueCluster'] = kmeans.predict(tx_user[['Revenue']])
#order the cluster numbers
tx_user = order_cluster('RevenueCluster', 'Revenue',tx_user,True)
#show details of the dataframe
#calculate overall score and use mean() to see details
tx_user['OverallScore'] = tx_user['RecencyCluster'] + tx_user['FrequencyCluster'] + tx_user['RevenueCluster']
tx_user.groupby('OverallScore')['Recency','Frequency','Revenue'].mean()
tx_user['Segment'] = 'Low-Value'
tx_user.loc[tx_user['OverallScore']>2,'Segment'] = 'Mid-Value'
tx_user.loc[tx_user['OverallScore']>4,'Segment'] = 'High-Value'
#import libraries
from datetime import datetime, timedelta,date
import pandas as pd
%matplotlib inline
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division
#Partner
df_plot = df_data.groupby('Partner').Churn.mean().reset_index()
plot_data = [
go.Bar(
x=df_plot['Partner'],
y=df_plot['Churn'],
width = [0.5, 0.5],
marker=dict(
color=['green', 'blue'])
)
#plotting monthly charge
df_plot = df_data.copy()
df_plot['MonthlyCharges'] = df_plot['MonthlyCharges'].astype(int)
df_plot = df_plot.groupby('MonthlyCharges').Churn.mean().reset_index()
plot_data = [
go.Scatter(
x=df_plot['MonthlyCharges'],
y=df_plot['Churn'],