Skip to content

Instantly share code, notes, and snippets.

View vivekpadia70's full-sized avatar
🏠
Working from home

vivekpadia70 vivekpadia70

🏠
Working from home
View GitHub Profile
df.describe()
df = pd.concat([df, pd.get_dummies(df["Type"]), pd.get_dummies(df["Method"]), pd.get_dummies(df["Regionname"])], axis=1)
df = df.drop(["Suburb", "Address", "SellerG", "CouncilArea", "Type", "Method", "Regionname"], 1)
df['Date'] = [pd.Timestamp(x).timestamp() for x in df["Date"]]
df = df.dropna()
df.head()
X = df.drop("Price", 1)
Y = df["Price"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
lr = LinearRegression()
lr.fit(X_train, Y_train)
print("Linear Regression R^2 Score: ", lr.score(X_train, Y_train))
print("Linear Regression Test R^2 Score: ", lr.score(X_test, Y_test))
y_pred = lr.predict(X_test)
print("Mean Squared Error: ", mean_squared_error(y_pred, Y_test))
print("Mean Absolute Error: ", mean_absolute_error(y_pred, Y_test))
print("Cross Validation Score: ", cross_val_score(lr, X_test, Y_test, cv=5))
rfr = RandomForestRegressor(n_estimators=1000, max_depth=5, n_jobs=-1, random_state=12)
rfr.fit(X_train, Y_train)
print("Random Forest R^2 Score: ", rfr.score(X_train, Y_train))
print("Random Forest Test R^2 Score: ", rfr.score(X_test, Y_test))
y_pred = rfr.predict(X_test)
print("Mean Squared Error: ", mean_squared_error(y_pred, Y_test))
print("Mean Absolute Error: ", mean_absolute_error(y_pred, Y_test))
print("Cross Validation Score: ", cross_val_score(rfr, X_test, Y_test, cv=5))
gbr = GradientBoostingRegressor(n_estimators=1000, max_depth=5, random_state=22)
gbr.fit(X_train, Y_train)
print("Gradient Boosting R^2 Score: ", gbr.score(X_train, Y_train))
print("Gradient Boosting Test R^2 Score: ", gbr.score(X_test, Y_test))
y_pred = gbr.predict(X_test)
print("Mean Squared Error: ", mean_squared_error(y_pred, Y_test))
print("Mean Absolute Error: ", mean_absolute_error(y_pred, Y_test))
print("Cross Validation Score: ", cross_val_score(gbr, X_test, Y_test, cv=5))
params = {'n_estimators':[500, 1000, 1500, 2000], 'max_depth':[3, 5, 8]}
gbr = GradientBoostingRegressor()
gbr_grid = GridSearchCV(gbr, params, cv=5)
gbr_grid.fit(X_train, Y_train)
print("Grid Search Gradient Boosting Score: ", gbr_grid.score(X_train, Y_train))
print("Grid Search Gradient Boosting Test Score: ", gbr_grid.score(X_test, Y_test))
print("Grid Search Gradient Boosting Best Parameters: ", gbr_grid.best_params_)
knr = KNeighborsRegressor(weights='distance', n_neighbors=200)
knr.fit(X_train, Y_train)
print("KNN Score: ", knr.score(X_train, Y_train))
print("KNN Test Score: ", knr.score(X_test, Y_test))