Last active
June 27, 2020 17:25
-
-
Save tharunpeddisetty/ec38725548d23ac52854d4025b8a5403 to your computer and use it in GitHub Desktop.
Baby Steps Towards Data Science: Linear Regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import statsmodels.api as sm | |
#Do not forget to change your file path. I haven't changed mine for your reference | |
dataset = pd.read_csv('/Users/tharunpeddisetty/Desktop/Machine Learning/Python/Salary_Data.csv') | |
X = dataset.iloc[:,:-1].values | |
Y = dataset.iloc[:,-1].values | |
#Splitting data into training and testing set | |
from sklearn.model_selection import train_test_split | |
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=1) | |
#Training the model | |
from sklearn.linear_model import LinearRegression | |
regressor = LinearRegression() | |
regressor.fit(X_train, Y_train) | |
#Predicting on test set | |
y_pred = regressor.predict(X_test) | |
#Visualizing the training set results | |
plt.scatter(X_train,Y_train, color='red') | |
plt.plot(X_train,regressor.predict(X_train),color='blue') #plots the curve of a function | |
plt.title('Salary Vs Experience (Training Set)') | |
plt.xlabel('Years of Experience') | |
plt.ylabel('Salary') | |
plt.show() | |
#Visualizing the test set results | |
plt.scatter(X_test,Y_test, color='red') | |
plt.plot(X_train,regressor.predict(X_train),color='blue') #plots the curve of a function. arguments do not change because the reg line is made from a unique equation from training set. If we use X_test, y_pred. We get same line. | |
plt.title('Salary Vs Experience (Testing Set)') | |
plt.xlabel('Years of Experience') | |
plt.ylabel('Salary') | |
plt.show() | |
#finding intercerpt and coeff | |
print(regressor.coef_) | |
print(regressor.intercept_) | |
#finding prediction for 12 years of experience | |
print(regressor.predict([[12]])) #since the predict method always expects a 2D array as input |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
YearsExperience | Salary | |
---|---|---|
1.1 | 39343.00 | |
1.3 | 46205.00 | |
1.5 | 37731.00 | |
2.0 | 43525.00 | |
2.2 | 39891.00 | |
2.9 | 56642.00 | |
3.0 | 60150.00 | |
3.2 | 54445.00 | |
3.2 | 64445.00 | |
3.7 | 57189.00 | |
3.9 | 63218.00 | |
4.0 | 55794.00 | |
4.0 | 56957.00 | |
4.1 | 57081.00 | |
4.5 | 61111.00 | |
4.9 | 67938.00 | |
5.1 | 66029.00 | |
5.3 | 83088.00 | |
5.9 | 81363.00 | |
6.0 | 93940.00 | |
6.8 | 91738.00 | |
7.1 | 98273.00 | |
7.9 | 101302.00 | |
8.2 | 113812.00 | |
8.7 | 109431.00 | |
9.0 | 105582.00 | |
9.5 | 116969.00 | |
9.6 | 112635.00 | |
10.3 | 122391.00 | |
10.5 | 121872.00 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment