Skip to content

Instantly share code, notes, and snippets.

View g_series_9.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View g9_twa_dataset.py
#create hv segment
df_hv = pd.DataFrame()
df_hv['customer_id'] = np.array([count for count in range(20000)])
df_hv['segment'] = np.array(['high-value' for _ in range(20000)])
df_hv['group'] = 'control'
df_hv.loc[df_hv.index<10000,'group'] = 'test'
df_hv.loc[df_hv.group == 'control', 'purchase_count'] = np.random.poisson(0.6, 10000)
df_hv.loc[df_hv.group == 'test', 'purchase_count'] = np.random.poisson(0.8, 10000)
View g9_owa_dataset.py
#create hv segment
df_hv = pd.DataFrame()
df_hv['customer_id'] = np.array([count for count in range(30000)])
df_hv['segment'] = np.array(['high-value' for _ in range(30000)])
df_hv['group'] = 'A'
df_hv.loc[df_hv.index>=10000,'group'] = 'B'
df_hv.loc[df_hv.index>=20000,'group'] = 'C'
df_hv.loc[df_hv.group == 'A', 'purchase_count'] = np.random.poisson(0.4, 10000)
df_hv.loc[df_hv.group == 'B', 'purchase_count'] = np.random.poisson(0.6, 10000)
View g9_density_graph.py
test_results = df_hv[df_hv.group == 'test'].purchase_count
control_results = df_hv[df_hv.group == 'control'].purchase_count
hist_data = [test_results, control_results]
group_labels = ['test', 'control']
# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, bin_size=.5,
curve_type='normal',show_rug=False)
View g_series_9_intro.py
#import libraries
from datetime import datetime, timedelta,date
import pandas as pd
%matplotlib inline
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division
from sklearn.cluster import KMeans
View g_series_8.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View g_series_8_intro.py
from datetime import datetime, timedelta,date
import pandas as pd
%matplotlib inline
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division
from sklearn.cluster import KMeans
View g_series_7.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View g_series_7_intro.py
from datetime import datetime, timedelta,date
import pandas as pd
%matplotlib inline
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division #must if you use python 2
from sklearn.cluster import KMeans
View data.csv
We can't make this file beautiful and searchable because it's too large.
recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
6,329.08,1,1,Rural,1,Web,No Offer,0
7,180.65,0,1,Surburban,1,Web,Buy One Get One,0
9,675.83,1,0,Rural,1,Web,Discount,0
2,45.34,1,0,Urban,0,Web,Buy One Get One,0
6,134.83,0,1,Surburban,0,Phone,Buy One Get One,1
9,280.2,1,0,Surburban,1,Phone,Buy One Get One,0
9,46.42,0,1,Urban,0,Phone,Buy One Get One,0