Skip to content

Instantly share code, notes, and snippets.

@francoisstamant
Created January 22, 2022 18:59
Show Gist options
  • Save francoisstamant/ad3a8ee03a8cf39e3b237decf659222d to your computer and use it in GitHub Desktop.
Save francoisstamant/ad3a8ee03a8cf39e3b237decf659222d to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
df = pd.read_csv('house_prices.csv', sep=';')
#One hot encoding
neighborhoods = pd.get_dummies(df.Neighborhood, prefix='In_')
houses = pd.concat([df,neighborhoods], axis=1)
houses = houses.drop(['Neighborhood','House_Id'], axis=1)
data_to_use = houses.head(196)
potential_houses = houses.tail(4)
#Train-test split of the data
x = data_to_use.loc[:, data_to_use.columns != 'Price']
y = data_to_use['Price']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=123)
### Build models
#I build 3 simple models, i.e. a linear regression, a random forest and gradient boosting regressor
# ----------------------- Linear Regression --------------------------------------
from statsmodels.api import OLS
reg = OLS(y_train, X_train).fit()
# Use the forest's predict method on the test data
predictions = reg.predict(X_test)
# Calculate the absolute errors
errors = abs(predictions - y_test)
# Print out the mean absolute error (mae)
print('Mean Absolute Error of Regression:', round(np.mean(errors), 2), '$.')
# --------------------- Random forest ------------------------------------------------------
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(max_depth=5, random_state=1234)
regr.fit(X_train, y_train)
# Use the forest's predict method on the test data
predictions = regr.predict(X_test)
# Calculate the absolute errors
errors = abs(predictions - y_test)
# Print out the mean absolute error (mae)
print('Mean Absolute Error of Random Forest:', round(np.mean(errors), 2), '$.')
# --------------------- Gradient Boosting ------------------------------------------------------
from sklearn.ensemble import GradientBoostingRegressor
reg2 = GradientBoostingRegressor()
reg2.fit(X_train, y_train)
# Use the forest's predict method on the test data
predictions = reg2.predict(X_test)
# Calculate the absolute errors
errors = abs(predictions - y_test)
# Print out the mean absolute error (mae)
print('Mean Absolute Error of XGBoost:', round(np.mean(errors), 2), '$.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment