Skip to content

Instantly share code, notes, and snippets.

# Creating DataFrame out of Advertising.csv
df = pd.read_csv("Advertising.csv")
df.drop("Unnamed: 0", axis=1,inplace=True)
# Separating Independent and dependent variables
X=df.drop(['sales’'],axis=1) Y=df.sales
# Fit Linear Regression
lr = LinearRegression()
model=lr.fit(X,Y)
y_pred1 = model.predict(X)
print("R-squared: {0}".format(metrics..r2_score(Y,ypred1)))
plt.scatter(ypred, (Y-ypred1))
plt.xlabel("Fitted values")
plt.ylabel("Residuals")
sns.pairplot(df)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 2)
X_poly = poly.fit_transform(X)  
poly.fit(X_poly, Y)
X_poly = sm.add_constant(X_poly)
results = sm.OLS(Y,X_poly).fit()
print(results.summary())
def calculate_vif(data):    
vif_df = pd.DataFrame(columns = ['Var', 'Vif'])    
x_var_names = data.columns   
for i in range(0, x_var_names.shape[0]):   
y = data[x_var_names[i]]   
x = data[x_var_names.drop([x_var_names[i]])]   
r_squared = sm.OLS(y,x).fit().rsquared   
vif = round(1/(1-r_squared),2)  
vif_df.loc[i] = [x_var_names[i], vif]   
return vif_df.sort_values(by = 'Vif', axis = 0, ascending=False, inplace=False)
plt.subplots(figsize=(8,4))
plt.subplot(1,2,1)
plt.title("Before")
sns.distplot(Y-ypred1 , fit=norm);
plt.xlabel('Residuals')
plt.subplot(1,2,2)
plt.title("After")
sns.distplot(Y-ypred2 , fit=norm);
plt.xlabel('Residuals')
plt.subplots(figsize=(10,5))
plt.subplot(1,2,1)
plt.title("Before")
plt.plot(Y,Y, color="red")
plt.scatter(ypred1, Y)
plt.xlabel("Fitted values")
plt.ylabel("Actuals")
plt.subplot(1,2,2)
plt.title("After")
plt.plot(Y,Y, color="red")
plt.scatter(ypred3, Y)
plt.xlabel("Fitted values")
plt.ylabel("Actuals")
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2].values