Skip to content

Instantly share code, notes, and snippets.

@FBosler
Last active August 19, 2019 05:11
Show Gist options
  • Save FBosler/24169adbc0cfba62381228fc6c8b4026 to your computer and use it in GitHub Desktop.
Save FBosler/24169adbc0cfba62381228fc6c8b4026 to your computer and use it in GitHub Desktop.
generates dummy data for cohorts project
def generate_dummy_dataframe(
dummy_products,
dummy_customers,
dummy_customer_types = ['company','private','government'],
first_date=datetime.datetime(2014,1,1),
last_date=datetime.datetime(2018,12,31),
data_points=1000):
customer_type = {customer:np.random.choice(['company','private','government']) for customer in dummy_customers}
product_prices = {product:np.random.randint(100,10000) for product in dummy_products}
df = pd.DataFrame({
'order_id' : [generate_dummy_order_id() for i in range(data_points)],
'order_date' : [np.random.choice(pd.date_range(first_date,last_date)) for i in range(data_points)],
'customer' : [np.random.choice(dummy_customers) for i in range(data_points)],
'product' : [np.random.choice(dummy_products) for i in range(data_points)],
'order_size': [np.random.randint(1,5) for i in range(data_points)]
})
df['customer_type'] = df['customer'].map(customer_type)
df['product_price'] = df['product'].map(product_prices)
df['basket_size'] = df['order_size']*df['product_price']
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment