L-Lewis/airbnb-xgboost.py

## airbnb-xgboost.py
import xgboost as xgb

# Fitting the model
xgb_reg = xgb.XGBRegressor()
xgb_reg.fit(X_train, y_train)
training_preds_xgb_reg = xgb_reg.predict(X_train)
val_preds_xgb_reg = xgb_reg.predict(X_test)

# Printing the results
print(f"Time taken to run: {round((xgb_reg_end - xgb_reg_start)/60,1)} minutes")
print("\nTraining MSE:", round(mean_squared_error(y_train, training_preds_xgb_reg),4))
print("Validation MSE:", round(mean_squared_error(y_test, val_preds_xgb_reg),4))
print("\nTraining r2:", round(r2_score(y_train, training_preds_xgb_reg),4))
print("Validation r2:", round(r2_score(y_test, val_preds_xgb_reg),4))

# Producing a dataframe of feature importances
ft_weights_xgb_reg = pd.DataFrame(xgb_reg.feature_importances_, columns=['weight'], index=X_train.columns)
ft_weights_xgb_reg.sort_values('weight', inplace=True)

# Plotting feature importances
plt.figure(figsize=(8,20))
plt.barh(ft_weights_xgb_reg.index, ft_weights_xgb_reg.weight, align='center')
plt.title("Feature importances in the XGBoost model", fontsize=14)
plt.xlabel("Feature importance")
plt.margins(y=0.01)
plt.show()
	import xgboost as xgb

	# Fitting the model
	xgb_reg = xgb.XGBRegressor()
	xgb_reg.fit(X_train, y_train)
	training_preds_xgb_reg = xgb_reg.predict(X_train)
	val_preds_xgb_reg = xgb_reg.predict(X_test)

	# Printing the results
	print(f"Time taken to run: {round((xgb_reg_end - xgb_reg_start)/60,1)} minutes")
	print("\nTraining MSE:", round(mean_squared_error(y_train, training_preds_xgb_reg),4))
	print("Validation MSE:", round(mean_squared_error(y_test, val_preds_xgb_reg),4))
	print("\nTraining r2:", round(r2_score(y_train, training_preds_xgb_reg),4))
	print("Validation r2:", round(r2_score(y_test, val_preds_xgb_reg),4))

	# Producing a dataframe of feature importances
	ft_weights_xgb_reg = pd.DataFrame(xgb_reg.feature_importances_, columns=['weight'], index=X_train.columns)
	ft_weights_xgb_reg.sort_values('weight', inplace=True)

	# Plotting feature importances
	plt.figure(figsize=(8,20))
	plt.barh(ft_weights_xgb_reg.index, ft_weights_xgb_reg.weight, align='center')
	plt.title("Feature importances in the XGBoost model", fontsize=14)
	plt.xlabel("Feature importance")
	plt.margins(y=0.01)
	plt.show()