This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import datetime | |
import string | |
import names | |
import matplotlib.pyplot as plt | |
import seaborn as sns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ADJECTIVES = [ | |
'cool','smart','beautiful','funky','dorky', 'babyish', 'back', 'bad', 'baggy', 'bare', 'barren', 'dorky', 'bad', | |
'calculating', 'calm', 'candid', 'canine', 'capital', 'carefree', 'hairy', 'half', 'handmade', 'handsome', 'handy', | |
'crazy', 'deliberate' | |
] | |
PEOPLE = list(set([names.get_first_name().lower() for i in range(10000)])) | |
PRODUCTS = [ | |
'airplane','banana','train','bow','map','skull' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_dummy_names(adj,sub,number_names=10): | |
""" | |
function generates random name combinations of the provided adjectives and subjects | |
>>> name_generator(adj=['cool','strong'],sub=['harry','kate'],number_names=3) | |
['cool_harry', 'strong_kate', 'strong_harry', 'cool_kate'] | |
""" | |
if number_names > len(adj)*len(sub): | |
raise ValueError(f""" | |
Can at most genereate {len(adj)*len(sub) -1} names, increase adj or sub to allow for more names | |
""") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_dummy_dataframe( | |
dummy_products, | |
dummy_customers, | |
dummy_customer_types = ['company','private','government'], | |
first_date=datetime.datetime(2014,1,1), | |
last_date=datetime.datetime(2018,12,31), | |
data_points=1000): | |
customer_type = {customer:np.random.choice(['company','private','government']) for customer in dummy_customers} | |
product_prices = {product:np.random.randint(100,10000) for product in dummy_products} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
customers = generate_dummy_names(ADJECTIVES, PEOPLE, 15000) | |
# we create 15000 fake customer names: ['smart_randy', 'canine_carol', 'carefree_cheryl', ...] | |
products = generate_dummy_names(ADJECTIVES, PRODUCTS, 10) | |
# we create 10 fake products: ['carefree_skull', 'bare_map', 'calculating_banana', 'funky_train', 'cool_train', ...] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
customer_data = pd.DataFrame(df.groupby('customer')['order_date'].min()) | |
customer_data.columns = ['customer_first_order'] | |
customer_data.head(2) | |
# combine first booking with our dummy data | |
df = pd.merge(df,customer_data.reset_index(),on='customer') | |
df.head(2) | |
#determine if a order is a repeat order or first order | |
df['type_of_order'] = np.where(df['order_date'] != df['customer_first_order'], 'repeat', 'first') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_cohort_analysis(df, metric, record_type='all', period_agg='quarterly', fig=True, size=10, save_fig=True): | |
""" | |
For metric use 'number_of_orders', 'number_of_items_bought' or 'total_order_value' | |
For record_type use 'all' or specific customer_type ['private','company','government'] | |
no_fig controlls the output of a figure, by default True (i.e. no figure) | |
""" | |
dataset = df.copy() | |
if record_type != 'all': | |
dataset = df[df.customer_type == record_type].copy() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = generate_dummy_data(products, customers) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _generate_cohorts(dataset, metric): | |
cohorts = dataset.groupby(['cohort','order_period']).agg({ | |
'order_id':pd.Series.nunique, | |
'order_size':sum, | |
'basket_size':sum | |
}) | |
cohorts.columns = ['number_of_orders','number_of_items_bought','total_order_value'] | |
cohorts = cohorts[metric].unstack(0) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
generate_cohort_analysis(df=df, metric='number_of_orders') | |
generate_cohort_analysis(df=df, metric='number_of_orders', period_agg='monthly') | |
generate_cohort_analysis(df=df, metric='number_of_items_bought') | |
generate_cohort_analysis(df=df, metric='number_of_items_bought', record_type='private') | |
... |
OlderNewer