Skip to content

Instantly share code, notes, and snippets.

@ShrashtiSinghal
Last active August 9, 2020 16:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ShrashtiSinghal/b3f9733c5744f905ad6c90d079b129d9 to your computer and use it in GitHub Desktop.
Save ShrashtiSinghal/b3f9733c5744f905ad6c90d079b129d9 to your computer and use it in GitHub Desktop.
#Cross Validation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
import seaborn as sns
from sklearn.linear_model import LinearRegression, Lasso,ElasticNet, Ridge, MultiTaskLasso, LassoLars, OrthogonalMatchingPursuit
from sklearn.model_selection import train_test_split
from sklearn import metrics
from collections import defaultdict
class predit:
def bestFitLine(self):
datadict={"size":[1300,1491,1526,1533,1680,1680,1869,1890,1920,1936,1950,1953,2016,2117,3072,3182,3196,3842,2268,2280,2628,2645,3000],
"price":[124000,75500,86000,97000,85400,100000,106000,113000,122500,84500,151000,83000,106000,168500,178740,192500,215000,275000,173000, 179400,175500,172500,173733]}
df=pd.DataFrame.from_dict(datadict)
x_train, x_test, y_train, y_test = train_test_split(df["size"], df["price"], test_size= 0.2, random_state=0)
x_train= x_train.values.reshape(-1, 1)
y_train= y_train.values.reshape(-1, 1)
x_test = x_test.values.reshape(-1, 1)
models = []
models.append(('LR', LinearRegression()))
models.append(('LASSO', Lasso()))
models.append(('EN', ElasticNet()))
models.append(('Ridge', Ridge()))
models.append(('MultiTaskLasso', MultiTaskLasso()))
models.append(('LarsLasso', LassoLars()))
models.append(('OMP', OrthogonalMatchingPursuit()))
results = []
names = []
scoremap={}
for name, model in models:
kfold = RepeatedKFold(n_splits=4, n_repeats=5)
cv_results = cross_val_score(model, x_train, y_train, cv=kfold)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
scoremap[name]=cv_results
print('{}:{}'.format(name,cv_results))
return(scoremap)
Object= predit()
scoremap= Object.bestFitLine()
size=[1300,1491,1526,1533,1680,1680,1869,1890,1920,1936,1950,1953,2016,2117,3072,3182,3196,3842,2268,2280,2628,2645,3000]
print(scoremap)
plt.figure(figsize=(20, 10))
scoremap = pd.DataFrame(scoremap)
sns.boxplot(data=scoremap)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment