Last active
December 25, 2015 19:12
-
-
Save krishpop/6e6903994b208e6f2e0c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
def basic_linear_reg(x, y): | |
# linear reg on 2 axes | |
length = len(x) | |
sum_x = sum(x) | |
sum_y = sum(y) | |
sum_xsq = sum(map(lambda a: a * a, x)) | |
sum_xy = sum(map(lambda a, b: a * b, x, y)) | |
a = (sum_xy - sum_x * sum_y / length) / (sum_xsq - sum(x) ** 2 / length) | |
b = (sum_y - a * sum_x) / length | |
return [a, b] | |
x = np.random.uniform(0, 100, 1000) | |
y = np.log(x) + np.random.normal(0, 0.3, 1000) | |
plt.figure(1) | |
plt.scatter(x, y, label="log(x) with some noise") | |
plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function") | |
plt.xlabel("x") | |
plt.ylabel("f(x) = log(x)") | |
plt.legend(loc="best") | |
plt.title("A Basic Log Function") | |
plt.figure(2) | |
reg = basic_linear_reg(x, y) | |
r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100))) | |
plt.plot(r_x, r_y, c='g', label="linear regression") | |
plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals") | |
plt.xlabel("x") | |
plt.ylabel("y") | |
plt.legend(loc="best") | |
plt.figure(3) | |
r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100))) | |
plt.plot(r_x, r_y, c='g', label="linear regression") | |
plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals") | |
plt.xlabel("x") | |
plt.ylabel("y") | |
plt.legend(loc="best") | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.learning_curve import learning_curve | |
rfg = RandomForestRegressor() | |
r_x = np.array(r_x) | |
rfg.fit(x.reshape(-1, 1), y) | |
plt.figure(4) | |
plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="yellow", label = "random forest fit") | |
plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function") | |
plt.scatter(x, y, label="log(x) with some noise") | |
plt.xlabel("x") | |
plt.ylabel("y") | |
plt.figure(5) | |
plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="green", label = "random forest fit") | |
plt.scatter(x, y - rfg.predict(x.reshape(-1, 1)), c="r", label = "residual") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment