Skip to content

Instantly share code, notes, and snippets.

@sachinsdate
Created June 19, 2019 10:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sachinsdate/08a8cc11744cb230c7f040fe2801b877 to your computer and use it in GitHub Desktop.
Save sachinsdate/08a8cc11744cb230c7f040fe2801b877 to your computer and use it in GitHub Desktop.
Linear Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
df = pd.read_csv('uciml_auto_city_highway_mpg.csv', header=0)
#Plot the original data set
df.plot.scatter(x='City MPG', y='Highway MPG')
plt.show()
# Create the Train and Test datasets for the Linear Regression Model
X = df.iloc[:, 0:1].values
y = df.iloc[:, 1:2].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Use all the default params while creating the linear regressor
lin_reg = LinearRegression()
#Train the regressor on the training data set
lin_reg.fit(X_train, y_train)
# print out the coorelation coefficient for the training dataset
print('r='+str(lin_reg.score(X_train, y_train)))
# Plot the regression line superimposed on the training dataset
plt.xlabel('City MPG')
plt.ylabel('Highway MPG')
plt.scatter(X_train, y_train, color = 'blue')
plt.plot(X_train, lin_reg.predict(X_train), color = 'black')
plt.show()
# Plot the predicted and actual values for the holdout dataset
plt.xlabel('City MPG')
plt.ylabel('Highway MPG')
actuals = plt.scatter(X_test, y_test, marker='o', color = 'lightblue', label='Actual values')
predicted = plt.scatter(X_test, lin_reg.predict(X_test), marker='+', color = 'black', label='Predicted values')
plt.legend(handles=[predicted, actuals])
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment