Skip to content

Instantly share code, notes, and snippets.

@koaning
Created January 16, 2020 21:19
Show Gist options
  • Save koaning/523f8831d83bb5add6b9f8b640340e1a to your computer and use it in GitHub Desktop.
Save koaning/523f8831d83bb5add6b9f8b640340e1a to your computer and use it in GitHub Desktop.
demonstrates the diminishing dependence as features increase
import numpy as np
import matplotlib.pylab as plt
def run_with_samples(feats=1):
n = 1000
xs = np.random.uniform(0, 2, (n, feats))
ys = 1.5 + xs.sum(axis=1) + np.random.normal(0, 1, (n,))
size_subset = 500
n_samples = 2000
samples = np.zeros((n_samples, 2))
for i in range(n_samples):
idx = np.random.choice(np.arange(n), size=size_subset, replace=False)
X = xs[idx]
Y = ys[idx]
sk_model = LinearRegression().fit(X, Y)
samples[i, 0] = sk_model.intercept_
samples[i, 1] = sk_model.coef_[0]
return samples
plt.figure(figsize=(15, 3))
for idx, i in enumerate([1,2,4,8,16]):
samples = run_with_samples(feats=i)
plt.subplot(150 + idx + 1)
plt.scatter(samples[:, 0], samples[:, 1], alpha=0.2)
plt.title(f"feats={i}");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment