This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV | |
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import mean_squared_error, mean_absolute_error | |
from sklearn.decomposition import PCA | |
from sklearn.neighbors import KNeighborsRegressor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.info() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.describe() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = pd.concat([df, pd.get_dummies(df["Type"]), pd.get_dummies(df["Method"]), pd.get_dummies(df["Regionname"])], axis=1) | |
df = df.drop(["Suburb", "Address", "SellerG", "CouncilArea", "Type", "Method", "Regionname"], 1) | |
df['Date'] = [pd.Timestamp(x).timestamp() for x in df["Date"]] | |
df = df.dropna() | |
df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X = df.drop("Price", 1) | |
Y = df["Price"] | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lr = LinearRegression() | |
lr.fit(X_train, Y_train) | |
print("Linear Regression R^2 Score: ", lr.score(X_train, Y_train)) | |
print("Linear Regression Test R^2 Score: ", lr.score(X_test, Y_test)) | |
y_pred = lr.predict(X_test) | |
print("Mean Squared Error: ", mean_squared_error(y_pred, Y_test)) | |
print("Mean Absolute Error: ", mean_absolute_error(y_pred, Y_test)) | |
print("Cross Validation Score: ", cross_val_score(lr, X_test, Y_test, cv=5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rfr = RandomForestRegressor(n_estimators=1000, max_depth=5, n_jobs=-1, random_state=12) | |
rfr.fit(X_train, Y_train) | |
print("Random Forest R^2 Score: ", rfr.score(X_train, Y_train)) | |
print("Random Forest Test R^2 Score: ", rfr.score(X_test, Y_test)) | |
y_pred = rfr.predict(X_test) | |
print("Mean Squared Error: ", mean_squared_error(y_pred, Y_test)) | |
print("Mean Absolute Error: ", mean_absolute_error(y_pred, Y_test)) | |
print("Cross Validation Score: ", cross_val_score(rfr, X_test, Y_test, cv=5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gbr = GradientBoostingRegressor(n_estimators=1000, max_depth=5, random_state=22) | |
gbr.fit(X_train, Y_train) | |
print("Gradient Boosting R^2 Score: ", gbr.score(X_train, Y_train)) | |
print("Gradient Boosting Test R^2 Score: ", gbr.score(X_test, Y_test)) | |
y_pred = gbr.predict(X_test) | |
print("Mean Squared Error: ", mean_squared_error(y_pred, Y_test)) | |
print("Mean Absolute Error: ", mean_absolute_error(y_pred, Y_test)) | |
print("Cross Validation Score: ", cross_val_score(gbr, X_test, Y_test, cv=5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
params = {'n_estimators':[500, 1000, 1500, 2000], 'max_depth':[3, 5, 8]} | |
gbr = GradientBoostingRegressor() | |
gbr_grid = GridSearchCV(gbr, params, cv=5) | |
gbr_grid.fit(X_train, Y_train) | |
print("Grid Search Gradient Boosting Score: ", gbr_grid.score(X_train, Y_train)) | |
print("Grid Search Gradient Boosting Test Score: ", gbr_grid.score(X_test, Y_test)) | |
print("Grid Search Gradient Boosting Best Parameters: ", gbr_grid.best_params_) |
OlderNewer