Last active
August 29, 2015 14:10
-
-
Save jcrubino/347108b2541263fab6cc to your computer and use it in GitHub Desktop.
Basic Linear Regression Class and Function for Streaming and Static Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# basic linear regression in pure python (no numpy!) | |
# class and function usable for streams | |
# refactored from http://code.activestate.com/recipes/578914-simple-linear-regression-with-pure-python/ | |
import math | |
def mean(series): | |
return sum(series) / len(series) | |
def standard_deviation(series, ave): | |
normalization = len(series) - 1 | |
summation = sum((pow(x - ave, 2) for x in series) | |
return math.sqrt(Summation / normalization) | |
def correllation_coefficient(Xseries, Yseries, ave_X, ave_Y): | |
xy_sum, sum_sq_vx, sum_sq_vy = 0,0,0 | |
for X,Y in zip(Xseries,Yseries): | |
X_var = X - ave_X | |
Y_var = Y - ave_Y | |
xy_sum += X_var * Y_var | |
sum_sq_vx += X_var**2 | |
sum_sq_vy += Y_var**2 | |
return xy_sum / math.sqrt(sum_sq_vx * sum_sq_vy) | |
class LinearRegression(object): | |
""" | |
Compositional Class of Linear Regression primitives | |
Self Fits to Initialized Data | |
>> import numpy as np | |
>> X = np.random.normal(100, size=1000) | |
>> Y = np.array([(3*x+4) for x in X]) | |
>> linR = LinearRegression(X,Y) | |
>> print linR.predict(3.4) | |
>> Y = np.array([(2*x-5) for x in X]) | |
>> linR.update(X,Y) | |
>> print linR.predict(4.5) | |
>> while 1: | |
X = [] | |
Y = [] | |
for x,y in stream: # must provide your own x,y streaming data | |
X.append(x) | |
Y.append(y) | |
print linR.streaming_prediction(X,Y,lookahead) # lookahead = an x with an unknown y | |
""" | |
def __init__(self, X, Y): | |
self.X = X | |
self.Y = Y | |
self.fit() | |
def fit(self): | |
self.mean_x = mean(self.X) | |
self.mean_y = mean(self.Y) | |
self.std_x = standard_deviation(self.X, self.mean_x) | |
self.std_y = standard_deviation(self.Y, self.mean_y) | |
self.rho = correllation_coefficient(self.X,self.Y,self.mean_x, self.mean_y) | |
self.b = self.rho * (standard_deviation(self.Y, self.mean_y) / standard_deviation(X, self.mean_x)) | |
self.a = self.mean_y - self.b * self.mean_x | |
def update(self, X, Y): | |
self.X = X | |
self.Y = Y | |
self.fit() | |
def predict(self, x): | |
return self.b * x + self.a | |
def streaming_prediction(self,X,Y,x): | |
self.X = X | |
self.Y = Y | |
self.fit() | |
return self.predict(x) | |
def linear_fit_func(X,Y): | |
""" | |
one time fit regression, returns predictor function | |
""" | |
mean_x = mean(X) | |
mean_y = mean(Y) | |
stdv_x = standard_deviation(X,mean_x) | |
stdv_y = standard_deviation(Y, mean_y) | |
rho = correllation_coefficient(X,Y,mean_x,mean_y) | |
b = rho * stdv_y/stdv_x | |
a = mean_y - b * mean_x | |
return lambda x: b*x+a | |
def linear_fit_stream(X,Y,lead,mem={hX:[],hY:[] }): | |
""" | |
memoized linear fit function for streams of data | |
""" | |
if mem[hX] != None and mem[hY] != None and len(mem[hX]) == len(mem[hy]): | |
X = mem[hX].append(X) | |
Y = mem[hY].append(Y) | |
mean_x = mean(X) | |
mean_y = mean(Y) | |
stdv_x = standard_deviation(X,mean_x) | |
stdv_y = standard_deviation(Y, mean_y) | |
rho = correllation_coefficient(X,Y,mean_x,mean_y) | |
b = rho * stdv_y/stdv_x | |
a = mean_y - b * mean_x | |
return b*lead+1, mem = {hX:X, hY:Y} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment