# -*- coding: utf-8 -*- | |
""" | |
Created on Fri Dec 21 18:59:49 2018 | |
@author: Nhan Tran | |
""" | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
# Importing the dataset | |
dataset = pd.read_csv('./Sample Data/PART 2. REGRESSION - Polynomial Regression - Polynomial_Regression/Polynomial_Regression/Position_Salaries.csv') | |
X = dataset.iloc[:, 1:2].values | |
y = dataset.iloc[:, 2].values | |
# Splitting the dataset into the Training set and Test set | |
from sklearn.model_selection import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
""" | |
# Scaling | |
from sklearn.preprocessing import StandardScaler | |
sc_X = StandardScaler() | |
X_train = sc_X.fit_transform(X_train) | |
X_test = sc_X.transform(X_test) | |
""" | |
# Fitting Linear Regression to the dataset | |
from sklearn.linear_model import LinearRegression | |
lin_reg = LinearRegression() | |
lin_reg.fit(X, y) | |
# Visualizing the Linear Regression results | |
def viz_linear(): | |
plt.scatter(X, y, color='red') | |
plt.plot(X, lin_reg.predict(X), color='blue') | |
plt.title('Truth or Bluff (Linear Regression)') | |
plt.xlabel('Position level') | |
plt.ylabel('Salary') | |
plt.show() | |
return | |
viz_linear() | |
# Fitting Polynomial Regression to the dataset | |
from sklearn.preprocessing import PolynomialFeatures | |
poly_reg = PolynomialFeatures(degree=4) | |
X_poly = poly_reg.fit_transform(X) | |
pol_reg = LinearRegression() | |
pol_reg.fit(X_poly, y) | |
# Visualizing the Polymonial Regression results | |
def viz_polymonial(): | |
plt.scatter(X, y, color='red') | |
plt.plot(X, pol_reg.predict(poly_reg.fit_transform(X)), color='blue') | |
plt.title('Truth or Bluff (Linear Regression)') | |
plt.xlabel('Position level') | |
plt.ylabel('Salary') | |
plt.show() | |
return | |
viz_polymonial() | |
# Additional feature | |
# Making the plot line (Blue one) more smooth | |
def viz_polymonial_smooth(): | |
X_grid = np.arange(min(X), max(X), 0.1) | |
X_grid = X_grid.reshape(len(X_grid), 1) #Why do we need to reshape? (https://www.tutorialspoint.com/numpy/numpy_reshape.htm) | |
# Visualizing the Polymonial Regression results | |
plt.scatter(X, y, color='red') | |
plt.plot(X_grid, pol_reg.predict(poly_reg.fit_transform(X_grid)), color='blue') | |
plt.title('Truth or Bluff (Linear Regression)') | |
plt.xlabel('Position level') | |
plt.ylabel('Salary') | |
plt.show() | |
return | |
viz_polymonial_smooth() | |
# Predicting a new result with Linear Regression | |
lin_reg.predict([[5.5]]) | |
#output should be 249500 | |
# Predicting a new result with Polymonial Regression | |
pol_reg.predict(poly_reg.fit_transform([[5.5]])) | |
#output should be 132148.43750003 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment