Skip to content

Instantly share code, notes, and snippets.

@broschke
Created July 6, 2020 18:23
Show Gist options
  • Save broschke/44241aa58f695eba5c4fe1d020b76f0e to your computer and use it in GitHub Desktop.
Save broschke/44241aa58f695eba5c4fe1d020b76f0e to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import datetime
# capture current week and same week last year
date_list = []
date = datetime.datetime.now().date()
for i in range(7):
date_list.append(date - datetime.timedelta(days=i))
date_list.append((date - datetime.timedelta(days=364)) - datetime.timedelta(days=i))
# create date dict
date_dict = {'date': date_list}
# create brand and chain scale dict for company data
brand = {'brand':
['Brand_1', 'Brand_2', 'Brand_3', 'Brand_4', 'Brand_5',
'Brand_6', 'Brand_7', 'Brand_8', 'Brand_9', 'Brand_10'],
'chain_scale':
['Upper Midscale', 'Midscale', 'Midscale', 'Economy', 'Midscale',
'Economy', 'Upper Midscale', 'Upscale', 'Upscale', 'Economy']
}
# create market type dict
market_type = {'market_type': ['Resort', 'Airport', 'Urban', 'Interstate', 'Small Town', 'Suburban']}
# create state and distric of columbia dict
state = {'state':
['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL',
'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA',
'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE',
'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI',
'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
}
# create chain scale dict for industry data
chain_scale = {'chain_scale': ['Upscale', 'Upper Middle', 'Midscale', 'Economy']}
def census(state):
'''Assigns census division based on State value.'''
if (state in ['IL', 'IN', 'MI', 'OH', 'WI']):
return 'East North Central'
if (state in ['AL', 'KY', 'MS', 'TN']):
return 'East South Central'
if (state in ['NJ', 'NY', 'PA']):
return 'Middle Atlantic'
if (state in ['AZ', 'CO', 'ID', 'MT', 'NM', 'NV', 'UT', 'WY']):
return 'Mountain'
if (state in ['CT', 'MA', 'ME', 'NH', 'RI', 'VT']):
return 'New England'
if (state in ['AK', 'CA', 'HI', 'OR', 'WA']):
return 'Pacific'
if (state in ['DC', 'DE', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'WV']):
return 'South Atlantic'
if (state in ['IA', 'KS', 'MN', 'MO', 'ND', 'NE', 'SD']):
return 'West North Central'
if (state in ['AR', 'LA', 'OK', 'TX']):
return 'West South Central'
def merge_frames(df1, df2):
'''
Used to create a cartesian product of two dataframes
Parameters:
:param df1: (Pandas df) df1
:param df2: (Pandas df) df2
Returns:
:returns: (Pandas df) df
'''
df1['tmp'] = '1'
df2['tmp'] = '1'
df = df1.merge(df2, on='tmp')
df = df.drop('tmp', axis=1)
return df
def random_data(df, rand_type, min, max):
"""
Used to generate random values to fill dataframe column.
Parameters:
:param df: (Pandas df) df
:param rand_type: np.random.randint for intergers or np.random.uniform for float
:min: Minimum value of random data
:max: Maximum value of random data
Returns:
:returns: (Pandas column)
"""
if rand_type == 'randint':
col = np.random.randint(min, max, size=len(df))
else:
col = np.random.uniform(low=min, high=max, size=len(df))
return col
# assemble dataframes
df_ind = pd.DataFrame({'brand': ['Industry']})
df_ind_cs = pd.DataFrame(chain_scale)
df_brand = pd.DataFrame(brand)
df_state = pd.DataFrame(state)
df_date = pd.DataFrame(date_dict)
df_market = pd.DataFrame(market_type)
#create cartesian industry frame
df_industry = merge_frames(df_ind, df_ind_cs)
df_industry = merge_frames(df_industry, df_date)
df_industry['source'] = 'industry'
#create cartesian company frame
df_company = merge_frames(df_brand, df_state)
df_company = merge_frames(df_company, df_date)
df_company = merge_frames(df_company, df_market)
df_company['source'] = 'choice'
#create cartesian company frame
df_compset = merge_frames(df_brand, df_state)
df_compset = merge_frames(df_compset, df_date)
df_compset = merge_frames(df_compset, df_market)
df_compset['source'] = 'compset'
#apply census function to company and compset frame
df_company['census_division'] = df_company['state'].apply(census)
df_compset['census_division'] = df_compset['state'].apply(census)
#create random values for revenue, demand and supply for company data
df_company['revenue'] = random_data(df_company, 'randint', 5000, 10000)
df_company['supply'] = random_data(df_company, 'randint', 50, 100)
df_company['demand_multiplier'] = random_data(df_company, 'uniform', 0.3, 1)
df_company['demand'] = (df_company.supply * df_company.demand_multiplier).round(0)
#create random values for revenue, demand and supply for compset data
df_compset['revenue'] = random_data(df_compset, 'randint', 5000, 10000)
df_compset['supply'] = random_data(df_compset, 'randint', 50, 100)
df_compset['demand_multiplier'] = random_data(df_compset, 'uniform', 0.3, 1)
df_compset['demand'] = (df_compset.supply * df_compset.demand_multiplier).round(0)
#create random values for revenue, demand and supply for industry data
df_industry['revenue'] = random_data(df_industry, 'randint', 5000, 8000)
df_industry['supply'] = random_data(df_industry, 'randint', 50, 100)
df_industry['demand_multiplier'] = random_data(df_industry, 'uniform', 0.3, 1)
df_industry['demand'] = (df_industry.supply * df_industry.demand_multiplier).round(0)
# combine data frames
df = pd.concat([df_company, df_industry, df_compset], axis=0, ignore_index=True)
# export to csv
df.to_csv('df.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment