Created
October 29, 2018 09:07
-
-
Save nithyadurai87/9ecfcbf04593d245e26316d52b0708e1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet | |
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor, GradientBoostingRegressor | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.neural_network import MLPRegressor | |
from sklearn.model_selection import train_test_split,cross_val_score | |
from sklearn.externals import joblib | |
from sklearn.metrics import mean_squared_error | |
from azure.storage.blob import BlockBlobService | |
import matplotlib.pyplot as plt | |
from math import sqrt | |
import numpy as np | |
import os | |
df = pd.read_csv('./training_data.csv') | |
i = list(df.columns.values) | |
i.pop(i.index('SalePrice')) | |
df0 = df[i+['SalePrice']] | |
df = df0.select_dtypes(include=['integer','float']) | |
X = df[list(df.columns)[:-1]] | |
y = df['SalePrice'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y) | |
def linear(): | |
regressor = LinearRegression() | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def ridge(): | |
regressor = Ridge(alpha=.3, normalize=True) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def lasso(): | |
regressor = Lasso(alpha=0.00009, normalize=True) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def elasticnet(): | |
regressor = ElasticNet(alpha=1, l1_ratio=0.5, normalize=False) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def randomforest(): | |
regressor = RandomForestRegressor(n_estimators=15,min_samples_split=15,criterion='mse',max_depth=None) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
print("Selected Features for RamdomForest",regressor.feature_importances_) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def perceptron(): | |
regressor = MLPRegressor(hidden_layer_sizes=(5000,), activation='relu', solver='adam', max_iter=1000) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
print("Co-efficients of Perceptron",regressor.coefs_) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def decisiontree(): | |
regressor = DecisionTreeRegressor(min_samples_split=30,max_depth=None) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
print("Selected Features for DecisionTrees",regressor.feature_importances_) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def adaboost(): | |
regressor = AdaBoostRegressor(random_state=8, loss='exponential').fit(X_train, y_train) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
print("Selected Features for Adaboost",regressor.feature_importances_) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def extratrees(): | |
regressor = ExtraTreesRegressor(n_estimators=50).fit(X_train, y_train) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
print("Selected Features for Extratrees",regressor.feature_importances_) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
def gradientboosting(): | |
regressor = GradientBoostingRegressor(loss='ls',n_estimators=500, min_samples_split=15).fit(X_train, y_train) | |
regressor.fit(X_train, y_train) | |
y_predictions = regressor.predict(X_test) | |
print("Selected Features for Gradientboosting",regressor.feature_importances_) | |
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | |
print ("Score, RMSE values") | |
print ("Linear = ",linear()) | |
print ("Ridge = ",ridge()) | |
print ("Lasso = ",lasso()) | |
print ("ElasticNet = ",elasticnet()) | |
print ("RandomForest = ",randomforest()) | |
print ("Perceptron = ",perceptron()) | |
print ("DecisionTree = ",decisiontree()) | |
print ("AdaBoost = ",adaboost()) | |
print ("ExtraTrees = ",extratrees()) | |
print ("GradientBoosting = ",gradientboosting()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment