FBosler

## cohorts_imports.py
import pandas as pd
import numpy as np
import datetime
import string
import names
import matplotlib.pyplot as plt
import seaborn as sns

## seed_data.py
ADJECTIVES = [
    'cool','smart','beautiful','funky','dorky', 'babyish', 'back', 'bad', 'baggy', 'bare', 'barren', 'dorky', 'bad',
    'calculating', 'calm', 'candid', 'canine', 'capital', 'carefree', 'hairy', 'half', 'handmade', 'handsome', 'handy',
    'crazy', 'deliberate'
]

PEOPLE = list(set([names.get_first_name().lower() for i in range(10000)]))

PRODUCTS = [
    'airplane','banana','train','bow','map','skull'

## helper_functions.py
def generate_dummy_names(adj,sub,number_names=10):
    """
    function generates random name combinations of the provided adjectives and subjects
    >>> name_generator(adj=['cool','strong'],sub=['harry','kate'],number_names=3)
    ['cool_harry', 'strong_kate', 'strong_harry', 'cool_kate']
    """
    if number_names > len(adj)*len(sub):
        raise ValueError(f"""
            Can at most genereate {len(adj)*len(sub) -1} names, increase adj or sub to allow for more names
            """)

## generate_dummy_data.py
def generate_dummy_dataframe(
    dummy_products,
    dummy_customers,
    dummy_customer_types = ['company','private','government'],
    first_date=datetime.datetime(2014,1,1),
    last_date=datetime.datetime(2018,12,31),
    data_points=1000):

    customer_type = {customer:np.random.choice(['company','private','government']) for customer in dummy_customers}
    product_prices = {product:np.random.randint(100,10000) for product in dummy_products}

## dummy_data.py
customers = generate_dummy_names(ADJECTIVES, PEOPLE, 15000)
# we create 15000 fake customer names: ['smart_randy', 'canine_carol', 'carefree_cheryl',  ...]

products = generate_dummy_names(ADJECTIVES, PRODUCTS, 10)
# we create 10 fake products: ['carefree_skull', 'bare_map', 'calculating_banana', 'funky_train', 'cool_train', ...]

## cohorts_enrichment.py
customer_data = pd.DataFrame(df.groupby('customer')['order_date'].min())
customer_data.columns = ['customer_first_order']
customer_data.head(2)

# combine first booking with our dummy data
df = pd.merge(df,customer_data.reset_index(),on='customer')
df.head(2)

#determine if a order is a repeat order or first order
df['type_of_order'] = np.where(df['order_date'] != df['customer_first_order'], 'repeat', 'first')

## generate_cohorts.py
def generate_cohort_analysis(df, metric, record_type='all', period_agg='quarterly', fig=True, size=10, save_fig=True):
    """
    For metric use 'number_of_orders', 'number_of_items_bought'  or 'total_order_value'
    For record_type use 'all' or specific customer_type ['private','company','government']
    no_fig controlls the output of a figure, by default True (i.e. no figure)
    """

    dataset = df.copy()
    if record_type != 'all':
        dataset = df[df.customer_type == record_type].copy()

## execute_create_dummy_data.py
df = generate_dummy_data(products, customers)

## cohort_analysis_helper.py
def _generate_cohorts(dataset, metric):
    cohorts = dataset.groupby(['cohort','order_period']).agg({
        'order_id':pd.Series.nunique,
        'order_size':sum,
        'basket_size':sum
    })
    cohorts.columns = ['number_of_orders','number_of_items_bought','total_order_value']

    cohorts = cohorts[metric].unstack(0)


## samples.py
generate_cohort_analysis(df=df, metric='number_of_orders')
generate_cohort_analysis(df=df, metric='number_of_orders', period_agg='monthly')
generate_cohort_analysis(df=df, metric='number_of_items_bought')
generate_cohort_analysis(df=df, metric='number_of_items_bought', record_type='private')
...
	import pandas as pd
	import numpy as np
	import datetime
	import string
	import names
	import matplotlib.pyplot as plt
	import seaborn as sns
	ADJECTIVES = [
	'cool','smart','beautiful','funky','dorky', 'babyish', 'back', 'bad', 'baggy', 'bare', 'barren', 'dorky', 'bad',
	'calculating', 'calm', 'candid', 'canine', 'capital', 'carefree', 'hairy', 'half', 'handmade', 'handsome', 'handy',
	'crazy', 'deliberate'
	]

	PEOPLE = list(set([names.get_first_name().lower() for i in range(10000)]))

	PRODUCTS = [
	'airplane','banana','train','bow','map','skull'
	def generate_dummy_names(adj,sub,number_names=10):
	"""
	function generates random name combinations of the provided adjectives and subjects
	>>> name_generator(adj=['cool','strong'],sub=['harry','kate'],number_names=3)
	['cool_harry', 'strong_kate', 'strong_harry', 'cool_kate']
	"""
	if number_names > len(adj)*len(sub):
	raise ValueError(f"""
	Can at most genereate {len(adj)*len(sub) -1} names, increase adj or sub to allow for more names
	""")
	def generate_dummy_dataframe(
	dummy_products,
	dummy_customers,
	dummy_customer_types = ['company','private','government'],
	first_date=datetime.datetime(2014,1,1),
	last_date=datetime.datetime(2018,12,31),
	data_points=1000):

	customer_type = {customer:np.random.choice(['company','private','government']) for customer in dummy_customers}
	product_prices = {product:np.random.randint(100,10000) for product in dummy_products}
	customers = generate_dummy_names(ADJECTIVES, PEOPLE, 15000)
	# we create 15000 fake customer names: ['smart_randy', 'canine_carol', 'carefree_cheryl', ...]

	products = generate_dummy_names(ADJECTIVES, PRODUCTS, 10)
	# we create 10 fake products: ['carefree_skull', 'bare_map', 'calculating_banana', 'funky_train', 'cool_train', ...]
	customer_data = pd.DataFrame(df.groupby('customer')['order_date'].min())
	customer_data.columns = ['customer_first_order']
	customer_data.head(2)

	# combine first booking with our dummy data
	df = pd.merge(df,customer_data.reset_index(),on='customer')
	df.head(2)

	#determine if a order is a repeat order or first order
	df['type_of_order'] = np.where(df['order_date'] != df['customer_first_order'], 'repeat', 'first')
	def generate_cohort_analysis(df, metric, record_type='all', period_agg='quarterly', fig=True, size=10, save_fig=True):
	"""
	For metric use 'number_of_orders', 'number_of_items_bought' or 'total_order_value'
	For record_type use 'all' or specific customer_type ['private','company','government']
	no_fig controlls the output of a figure, by default True (i.e. no figure)
	"""

	dataset = df.copy()
	if record_type != 'all':
	dataset = df[df.customer_type == record_type].copy()
	def _generate_cohorts(dataset, metric):
	cohorts = dataset.groupby(['cohort','order_period']).agg({
	'order_id':pd.Series.nunique,
	'order_size':sum,
	'basket_size':sum
	})
	cohorts.columns = ['number_of_orders','number_of_items_bought','total_order_value']

	cohorts = cohorts[metric].unstack(0)
	generate_cohort_analysis(df=df, metric='number_of_orders')
	generate_cohort_analysis(df=df, metric='number_of_orders', period_agg='monthly')
	generate_cohort_analysis(df=df, metric='number_of_items_bought')
	generate_cohort_analysis(df=df, metric='number_of_items_bought', record_type='private')
	...