Created
May 5, 2016 00:01
-
-
Save dyerrington/a41dd8d7a1a9cea0c2ed58ce191a40a1 to your computer and use it in GitHub Desktop.
Basic end-to-end example for doing linear regression with sklearn with cross_validation / k-fold and Pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# k-fold regression | |
# we need our modules for this: | |
from sklearn.linear_model import LinearRegression | |
from sklearn.cross_validation import cross_val_score, cross_val_predict | |
from matplotlib import pyplot as plt | |
from sklearn import metrics | |
# Make the plots bigger | |
plt.rcParams['figure.figsize'] = 10, 10 | |
# Make plots show up! | |
% matplotlib inline | |
# Load test data | |
data = load_diabetes() | |
df = pd.DataFrame(data['data']) | |
# Setup our X (predictors), y (response / prediction target) | |
predictor_variables = [0,1,3,4,5] # Update these to the variables you want to use for linear regression | |
X = df[predictor_variables] | |
y = data['target'] | |
# init our linear regression class / object | |
lm = LinearRegression() | |
# Fit our training data | |
model = lm.fit(X, y) | |
# Perform 6-fold cross validation | |
scores = cross_val_score(lm, X, y, cv=6) | |
print "Cross-validated scores:", scores | |
# Make cross validated predictions | |
predictions = cross_val_predict(model, df, y, cv=6) | |
plt.scatter(y, predictions) | |
accuracy = metrics.r2_score(y, predictions) | |
print "Cross-Predicted Accuracy:", accuracy |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment