Last active
August 19, 2019 05:11
-
-
Save FBosler/24169adbc0cfba62381228fc6c8b4026 to your computer and use it in GitHub Desktop.
generates dummy data for cohorts project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_dummy_dataframe( | |
dummy_products, | |
dummy_customers, | |
dummy_customer_types = ['company','private','government'], | |
first_date=datetime.datetime(2014,1,1), | |
last_date=datetime.datetime(2018,12,31), | |
data_points=1000): | |
customer_type = {customer:np.random.choice(['company','private','government']) for customer in dummy_customers} | |
product_prices = {product:np.random.randint(100,10000) for product in dummy_products} | |
df = pd.DataFrame({ | |
'order_id' : [generate_dummy_order_id() for i in range(data_points)], | |
'order_date' : [np.random.choice(pd.date_range(first_date,last_date)) for i in range(data_points)], | |
'customer' : [np.random.choice(dummy_customers) for i in range(data_points)], | |
'product' : [np.random.choice(dummy_products) for i in range(data_points)], | |
'order_size': [np.random.randint(1,5) for i in range(data_points)] | |
}) | |
df['customer_type'] = df['customer'].map(customer_type) | |
df['product_price'] = df['product'].map(product_prices) | |
df['basket_size'] = df['order_size']*df['product_price'] | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment