carleen/airbnb_dummy_variable.py

## airbnb_dummy_variable.py
# Change some of the variables in room type, prevents errors
df.room_type = df.room_type.map({'Private room': 'private_room',
                                'Entire home/apt': 'entire_property',
                                'Shared room': 'shared_room'})

# Adjusts variables for host response times
df.host_response_time = df.host_response_time.map({'within an hour': 'lt_hour',
                                                 'within a few hours': 'lt_few_hours',
                                                 'within a day': 'lt_day',
                                                 'a few days or more': 'gt_day'})

# These are the variables that we need to create into dummy variables
dummy_var_list = ['host_is_superhost', 'room_type', 'property_type', 'instant_bookable',
                 'neighborhood_label', 'bathroom_type', 'host_response_time']

# Iterate through all of the "dummy variables"
for v in dummy_var_list:
    df = pd.concat([df, pd.get_dummies(df[v], prefix=v)], axis=1)
    df = df.drop(v, axis=1)

# Feature list, so you don't have to manually type all variables
feature_list = df.columns[df.columns!='price']
correlations(df, 'price', feature_list)

model1 = f'''price ~'''
for f in feature_list:
    model1+= f''' {f} +'''
model1 = model1.rstrip(' +')
result1 = models.bootstrap_linear_regression(model1, data=df)
models.describe_bootstrap_lr(result1)
	# Change some of the variables in room type, prevents errors
	df.room_type = df.room_type.map({'Private room': 'private_room',
	'Entire home/apt': 'entire_property',
	'Shared room': 'shared_room'})

	# Adjusts variables for host response times
	df.host_response_time = df.host_response_time.map({'within an hour': 'lt_hour',
	'within a few hours': 'lt_few_hours',
	'within a day': 'lt_day',
	'a few days or more': 'gt_day'})

	# These are the variables that we need to create into dummy variables
	dummy_var_list = ['host_is_superhost', 'room_type', 'property_type', 'instant_bookable',
	'neighborhood_label', 'bathroom_type', 'host_response_time']

	# Iterate through all of the "dummy variables"
	for v in dummy_var_list:
	df = pd.concat([df, pd.get_dummies(df[v], prefix=v)], axis=1)
	df = df.drop(v, axis=1)

	# Feature list, so you don't have to manually type all variables
	feature_list = df.columns[df.columns!='price']
	correlations(df, 'price', feature_list)

	model1 = f'''price ~'''
	for f in feature_list:
	model1+= f''' {f} +'''
	model1 = model1.rstrip(' +')
	result1 = models.bootstrap_linear_regression(model1, data=df)
	models.describe_bootstrap_lr(result1)