Created
August 12, 2018 13:30
-
-
Save bbengfort/87059f95c3bfa65feb81fa4bde6a838c to your computer and use it in GitHub Desktop.
Benchmark ResidualsPlot performance on a large dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.datasets import make_regression | |
from sklearn.model_selection import train_test_split as tts | |
from sklearn.linear_model import LinearRegression | |
from yellowbrick.regressor import ResidualsPlot | |
X, y = make_regression( | |
n_samples=150000, n_features=30, noise=0.3, bias=0.32, | |
n_informative=22, tail_strength=0.3, effective_rank=2 | |
) | |
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2) | |
_, ax = plt.subplots() | |
start = time.time() | |
oz = ResidualsPlot(LinearRegression()) | |
oz.fit(X_train, y_train) | |
print("fit time was {:0.3f} seconds".format(time.time() - start)) | |
start = time.time() | |
oz.score(X_test, y_test) | |
print("score time was {:0.3f} seconds".format(time.time()-start)) | |
start = time.time() | |
oz.poof("residuals.pdf") | |
print("poof time was {:0.3f} seconds".format(time.time()-start)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I got the following output using PDF (much shorter if rendering PNG):