Skip to content

Instantly share code, notes, and snippets.

@krishpop
Last active December 25, 2015 19:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krishpop/6e6903994b208e6f2e0c to your computer and use it in GitHub Desktop.
Save krishpop/6e6903994b208e6f2e0c to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
def basic_linear_reg(x, y):
# linear reg on 2 axes
length = len(x)
sum_x = sum(x)
sum_y = sum(y)
sum_xsq = sum(map(lambda a: a * a, x))
sum_xy = sum(map(lambda a, b: a * b, x, y))
a = (sum_xy - sum_x * sum_y / length) / (sum_xsq - sum(x) ** 2 / length)
b = (sum_y - a * sum_x) / length
return [a, b]
x = np.random.uniform(0, 100, 1000)
y = np.log(x) + np.random.normal(0, 0.3, 1000)
plt.figure(1)
plt.scatter(x, y, label="log(x) with some noise")
plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function")
plt.xlabel("x")
plt.ylabel("f(x) = log(x)")
plt.legend(loc="best")
plt.title("A Basic Log Function")
plt.figure(2)
reg = basic_linear_reg(x, y)
r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100)))
plt.plot(r_x, r_y, c='g', label="linear regression")
plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals")
plt.xlabel("x")
plt.ylabel("y")
plt.legend(loc="best")
plt.figure(3)
r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100)))
plt.plot(r_x, r_y, c='g', label="linear regression")
plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals")
plt.xlabel("x")
plt.ylabel("y")
plt.legend(loc="best")
from sklearn.ensemble import RandomForestRegressor
from sklearn.learning_curve import learning_curve
rfg = RandomForestRegressor()
r_x = np.array(r_x)
rfg.fit(x.reshape(-1, 1), y)
plt.figure(4)
plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="yellow", label = "random forest fit")
plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function")
plt.scatter(x, y, label="log(x) with some noise")
plt.xlabel("x")
plt.ylabel("y")
plt.figure(5)
plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="green", label = "random forest fit")
plt.scatter(x, y - rfg.predict(x.reshape(-1, 1)), c="r", label = "residual")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment