karamanbk

## g2_recency_elbow.py
from sklearn.cluster import KMeans

sse={}
tx_recency = tx_user[['Recency']]
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k, max_iter=1000).fit(tx_recency)
    tx_recency["clusters"] = kmeans.labels_
    sse[k] = kmeans.inertia_
plt.figure()
plt.plot(list(sse.keys()), list(sse.values()))

## g2_frequency.py
#get order counts for each user and create a dataframe with it
tx_frequency = tx_uk.groupby('CustomerID').InvoiceDate.count().reset_index()
tx_frequency.columns = ['CustomerID','Frequency']

#add this data to our main dataframe
tx_user = pd.merge(tx_user, tx_frequency, on='CustomerID')

#plot the histogram
plot_data = [
    go.Histogram(

## g2_frequency_cluster.py
#k-means
kmeans = KMeans(n_clusters=4)
kmeans.fit(tx_user[['Frequency']])
tx_user['FrequencyCluster'] = kmeans.predict(tx_user[['Frequency']])

#order the frequency cluster
tx_user = order_cluster('FrequencyCluster', 'Frequency',tx_user,True)

#see details of each cluster
tx_user.groupby('FrequencyCluster')['Frequency'].describe()

## g2_revenue.py
#calculate revenue for each customer
tx_uk['Revenue'] = tx_uk['UnitPrice'] * tx_uk['Quantity']
tx_revenue = tx_uk.groupby('CustomerID').Revenue.sum().reset_index()

#merge it with our main dataframe
tx_user = pd.merge(tx_user, tx_revenue, on='CustomerID')

#plot the histogram
plot_data = [
    go.Histogram(

## g2_revenue_clustering.py
#apply clustering
kmeans = KMeans(n_clusters=4)
kmeans.fit(tx_user[['Revenue']])
tx_user['RevenueCluster'] = kmeans.predict(tx_user[['Revenue']])


#order the cluster numbers
tx_user = order_cluster('RevenueCluster', 'Revenue',tx_user,True)

#show details of the dataframe

## g2_overall_score.py
#calculate overall score and use mean() to see details
tx_user['OverallScore'] = tx_user['RecencyCluster'] + tx_user['FrequencyCluster'] + tx_user['RevenueCluster']
tx_user.groupby('OverallScore')['Recency','Frequency','Revenue'].mean()

## g2_name_clusters.py
tx_user['Segment'] = 'Low-Value'
tx_user.loc[tx_user['OverallScore']>2,'Segment'] = 'Mid-Value'
tx_user.loc[tx_user['OverallScore']>4,'Segment'] = 'High-Value'

## g4_intro.py
#import libraries

from datetime import datetime, timedelta,date
import pandas as pd
%matplotlib inline
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division

## g4_cat_eda.py
#Partner
df_plot = df_data.groupby('Partner').Churn.mean().reset_index()
plot_data = [
    go.Bar(
        x=df_plot['Partner'],
        y=df_plot['Churn'],
        width = [0.5, 0.5],
        marker=dict(
        color=['green', 'blue'])
    )

## g4_num_eda.py
#plotting monthly charge
df_plot = df_data.copy()
df_plot['MonthlyCharges'] = df_plot['MonthlyCharges'].astype(int)
df_plot = df_plot.groupby('MonthlyCharges').Churn.mean().reset_index()


plot_data = [
    go.Scatter(
        x=df_plot['MonthlyCharges'],
        y=df_plot['Churn'],
	from sklearn.cluster import KMeans

	sse={}
	tx_recency = tx_user[['Recency']]
	for k in range(1, 10):
	kmeans = KMeans(n_clusters=k, max_iter=1000).fit(tx_recency)
	tx_recency["clusters"] = kmeans.labels_
	sse[k] = kmeans.inertia_
	plt.figure()
	plt.plot(list(sse.keys()), list(sse.values()))
	#get order counts for each user and create a dataframe with it
	tx_frequency = tx_uk.groupby('CustomerID').InvoiceDate.count().reset_index()
	tx_frequency.columns = ['CustomerID','Frequency']

	#add this data to our main dataframe
	tx_user = pd.merge(tx_user, tx_frequency, on='CustomerID')

	#plot the histogram
	plot_data = [
	go.Histogram(
	#k-means
	kmeans = KMeans(n_clusters=4)
	kmeans.fit(tx_user[['Frequency']])
	tx_user['FrequencyCluster'] = kmeans.predict(tx_user[['Frequency']])

	#order the frequency cluster
	tx_user = order_cluster('FrequencyCluster', 'Frequency',tx_user,True)

	#see details of each cluster
	tx_user.groupby('FrequencyCluster')['Frequency'].describe()
	#calculate revenue for each customer
	tx_uk['Revenue'] = tx_uk['UnitPrice'] * tx_uk['Quantity']
	tx_revenue = tx_uk.groupby('CustomerID').Revenue.sum().reset_index()

	#merge it with our main dataframe
	tx_user = pd.merge(tx_user, tx_revenue, on='CustomerID')

	#plot the histogram
	plot_data = [
	go.Histogram(
	#apply clustering
	kmeans = KMeans(n_clusters=4)
	kmeans.fit(tx_user[['Revenue']])
	tx_user['RevenueCluster'] = kmeans.predict(tx_user[['Revenue']])


	#order the cluster numbers
	tx_user = order_cluster('RevenueCluster', 'Revenue',tx_user,True)

	#show details of the dataframe
	#calculate overall score and use mean() to see details
	tx_user['OverallScore'] = tx_user['RecencyCluster'] + tx_user['FrequencyCluster'] + tx_user['RevenueCluster']
	tx_user.groupby('OverallScore')['Recency','Frequency','Revenue'].mean()
	tx_user['Segment'] = 'Low-Value'
	tx_user.loc[tx_user['OverallScore']>2,'Segment'] = 'Mid-Value'
	tx_user.loc[tx_user['OverallScore']>4,'Segment'] = 'High-Value'
	#import libraries

	from datetime import datetime, timedelta,date
	import pandas as pd
	%matplotlib inline
	from sklearn.metrics import classification_report,confusion_matrix
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division
	#Partner
	df_plot = df_data.groupby('Partner').Churn.mean().reset_index()
	plot_data = [
	go.Bar(
	x=df_plot['Partner'],
	y=df_plot['Churn'],
	width = [0.5, 0.5],
	marker=dict(
	color=['green', 'blue'])
	)
	#plotting monthly charge
	df_plot = df_data.copy()
	df_plot['MonthlyCharges'] = df_plot['MonthlyCharges'].astype(int)
	df_plot = df_plot.groupby('MonthlyCharges').Churn.mean().reset_index()


	plot_data = [
	go.Scatter(
	x=df_plot['MonthlyCharges'],
	y=df_plot['Churn'],