Skip to content

Instantly share code, notes, and snippets.

@gauravgola96
Last active November 28, 2017 15:07
Show Gist options
  • Save gauravgola96/bb118b1e3c6a0e7f0aced5d132e0c666 to your computer and use it in GitHub Desktop.
Save gauravgola96/bb118b1e3c6a0e7f0aced5d132e0c666 to your computer and use it in GitHub Desktop.
Ridge regression
# imputing missing values
train['Item_Visibility'] = train['Item_Visibility'].replace(0,np.mean(train['Item_Visibility']))
train['Outlet_Establishment_Year'] = 2013 - train['Outlet_Establishment_Year']
train['Outlet_Size'].fillna('Small',inplace=True)
# creating dummy variables to convert categorical into numeric values
mylist = list(train1.select_dtypes(include=['object']).columns)
dummies = pd.get_dummies(train[mylist], prefix= mylist)
train.drop(mylist, axis=1, inplace = True)
X = pd.concat([train,dummies], axis =1 )
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
%matplotlib inline
train = pd.read_csv('training.csv')
test = pd.read_csv('testing.csv')
# importing linear regression
from sklearn from sklearn.linear_model import LinearRegression
lreg = LinearRegression()
# for cross validation
from sklearn.model_selection import train_test_split
X = train.drop('Item_Outlet_Sales',1)
x_train, x_cv, y_train, y_cv = train_test_split(X,train.Item_Outlet_Sales, test_size =0.3)
# training a linear regression model on train
lreg.fit(x_train,y_train)
# predicting on cv
pred_cv = lreg.predict(x_cv)
# calculating mse
mse = np.mean((pred_cv - y_cv)**2)
mse
# evaluation using r-square
lreg.score(x_cv,y_cv)
from sklearn.linear_model import Ridge
## training the model
ridgeReg = Ridge(alpha=0.05, normalize=True)
ridgeReg.fit(x_train,y_train)
pred = ridgeReg.predict(x_cv)
calculating mse
mse = np.mean((pred_cv - y_cv)**2)
mse ## calculating score ridgeReg.score(x_cv,y_cv) 0.5691
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment