Skip to content

Instantly share code, notes, and snippets.

View Pratik-Shukla-22's full-sized avatar

Pratik-Shukla-22

View GitHub Profile
#Downloading the data from GitHub:
!wget https://raw.githubusercontent.com/Pratik-Shukla-22/Simple-Linear-Regression/main/Fuel_Consumption.csv
#Import the required libraries:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#Read the csv file:
data = pd.read_csv("Fuel_Consumption.csv")
#Downloading the data from GitHub:
!wget https://raw.githubusercontent.com/Pratik-Shukla-22/Simple-Linear-Regression/main/Fuel_Consumption.csv
#Import the required libraries:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split
#Plot the bar graph for actual and predicted values:
A_P_data.head(10).plot(kind='bar',figsize=(12,6))
plt.show()
#Error calculations:
res = (predicted_test - test_y)
RSS = (res*res).sum()
print("Residual Sum of Squares: ",RSS)
#Output:
Residual Sum of Squares: CO2EMISSIONS 252847.165191
#Create a dataframe for Actual and Predicted values:
A_P_data = pd.DataFrame({"Actual":data["CO2EMISSIONS"],"Predicted":predicted_data[:][0][0]})
print(A_P_data.head())
#Output:
Actual Predicted
0 196 204.68597
1 221 204.68597
2 136 204.68597
#Plot the regression line for complete data:
plt.scatter(data[["ENGINESIZE"]],data[["CO2EMISSIONS"]])
plt.plot(data[["ENGINESIZE"]],predicted_data,color="red")
plt.xlabel("Engine_Size")
plt.ylabel("Emission")
plt.show()
#Plot the regression line for testing data:
plt.scatter(test_x,test_y)
plt.plot(test_x,predicted_test,color="red")
plt.xlabel("Engine_Size")
plt.ylabel("Emission")
plt.show()
#Plot the regression line for training data:
plt.scatter(train_x,train_y)
plt.plot(train_x,predicted_train,color="red")
plt.xlabel("Engine_Size")
plt.ylabel("Emission")
plt.show()
#Predicting values for the whole dataset:
predicted_data = regr.predict(data[["ENGINESIZE"]])
predicted_data[0:5]
#Output:
array([[204.68597017],
[220.3853239 ],
[185.06177802],
[263.55854664],
#Predicting the values for the testing dataset:
predicted_test = regr.predict(test_x)
predicted_test[0:5]
#Output:
array([[342.05531526],
[220.3853239 ],
[220.3853239 ],
[232.15983919],