This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Taking care of missing data using Python | |
from sklearn.preprocessing import Imputer | |
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis=0) | |
imputer = imputer.fit(X[:,1:3]) | |
X[:, 1:3] = imputer.transform(X[:, 1:3]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Taking care of missing data using R | |
dataset$Age = ifelse(is.na(dataset$Age), | |
ave(dataset$Age, FUN= function(x) mean(x, na.rm = TRUE)), | |
dataset$Age) | |
dataset$Salary = ifelse(is.na(dataset$Salary), | |
ave(dataset$Salary, FUN= function(x) mean(x, na.rm = TRUE)), | |
dataset$Salary) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Encoding categorical data | |
# Encoding the Independent Variable | |
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | |
labelencoder_X = LabelEncoder() | |
X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) | |
onehotencoder = OneHotEncoder(categorical_features = [0]) | |
X = onehotencoder.fit_transform(X).toarray() | |
#Avoiding dummy variable trap | |
X = X[:, 1:] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Splitting the dataset into train and test set | |
from sklearn.cross_validation import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Feature Scaling | |
from sklearn.preprocessing import StandardScaler | |
sc_X = StandardScaler() | |
X_train = sc_X.fit_transform(X_train) | |
X_test = sc_X.transform(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fitting Simple Linear Regression to the Training set | |
from sklearn.linear_model import LinearRegression | |
regressor = LinearRegression() | |
regressor.fit(X_train, y_train) | |
# Predicting the Test set results | |
y_pred = regressor.predict(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Encoding categorical data | |
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | |
labelencoder = LabelEncoder() | |
X[:, 3] = labelencoder.fit_transform(X[:, 3]) | |
onehotencoder = OneHotEncoder(categorical_features = [3]) | |
X = onehotencoder.fit_transform(X).toarray() | |
# Avoiding the Dummy Variable Trap | |
X = X[:, 1:] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fitting Multiple Linear Regression to the Training set | |
from sklearn.linear_model import LinearRegression | |
regressor = LinearRegression() | |
regressor.fit(X_train, y_train) | |
# Predicting the Test set results | |
y_pred = regressor.predict(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Visualising the Training set results | |
plt.scatter(X_train, y_train, color = 'red') | |
plt.plot(X_train, regressor.predict(X_train), color = 'blue') | |
plt.title('Salary vs Experience (Training set)') | |
plt.xlabel('Years of Experience') | |
plt.ylabel('Salary') | |
plt.show() | |
# Visualising the Test set results | |
plt.scatter(X_test, y_test, color = 'red') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Building the optimal model using backward elimination | |
import statsmodels.formula.api as sm | |
#Appending the contant as first col of the dataset for readability. | |
X = np.append(arr = np.ones((50, 1)).astype(int), values = X, axis = 1) | |
#X_optimized initialized with all features. | |
X_opt = X[:, [0,1,2,3,4,5]] | |
#Fitting using OLS | |
regressor_OLS = sm.OLS(endog = y, exog= X_opt).fit() | |
#Summary for results, any feature who can be eliminated ? | |
regressor_OLS.summary() |
OlderNewer