Basic Linear Regression Class and Function for Streaming and Static Data
# basic linear regression in pure python (no numpy!) | |
# class and function usable for streams | |
# refactored from http://code.activestate.com/recipes/578914-simple-linear-regression-with-pure-python/ | |
import math | |
def mean(series): | |
return sum(series) / len(series) | |
def standard_deviation(series, ave): | |
normalization = len(series) - 1 | |
summation = sum((pow(x - ave, 2) for x in series) | |
return math.sqrt(Summation / normalization) | |
def correllation_coefficient(Xseries, Yseries, ave_X, ave_Y): | |
xy_sum, sum_sq_vx, sum_sq_vy = 0,0,0 | |
for X,Y in zip(Xseries,Yseries): | |
X_var = X - ave_X | |
Y_var = Y - ave_Y | |
xy_sum += X_var * Y_var | |
sum_sq_vx += X_var**2 | |
sum_sq_vy += Y_var**2 | |
return xy_sum / math.sqrt(sum_sq_vx * sum_sq_vy) | |
class LinearRegression(object): | |
""" | |
Compositional Class of Linear Regression primitives | |
Self Fits to Initialized Data | |
>> import numpy as np | |
>> X = np.random.normal(100, size=1000) | |
>> Y = np.array([(3*x+4) for x in X]) | |
>> linR = LinearRegression(X,Y) | |
>> print linR.predict(3.4) | |
>> Y = np.array([(2*x-5) for x in X]) | |
>> linR.update(X,Y) | |
>> print linR.predict(4.5) | |
>> while 1: | |
X = [] | |
Y = [] | |
for x,y in stream: # must provide your own x,y streaming data | |
X.append(x) | |
Y.append(y) | |
print linR.streaming_prediction(X,Y,lookahead) # lookahead = an x with an unknown y | |
""" | |
def __init__(self, X, Y): | |
self.X = X | |
self.Y = Y | |
self.fit() | |
def fit(self): | |
self.mean_x = mean(self.X) | |
self.mean_y = mean(self.Y) | |
self.std_x = standard_deviation(self.X, self.mean_x) | |
self.std_y = standard_deviation(self.Y, self.mean_y) | |
self.rho = correllation_coefficient(self.X,self.Y,self.mean_x, self.mean_y) | |
self.b = self.rho * (standard_deviation(self.Y, self.mean_y) / standard_deviation(X, self.mean_x)) | |
self.a = self.mean_y - self.b * self.mean_x | |
def update(self, X, Y): | |
self.X = X | |
self.Y = Y | |
self.fit() | |
def predict(self, x): | |
return self.b * x + self.a | |
def streaming_prediction(self,X,Y,x): | |
self.X = X | |
self.Y = Y | |
self.fit() | |
return self.predict(x) | |
def linear_fit_func(X,Y): | |
""" | |
one time fit regression, returns predictor function | |
""" | |
mean_x = mean(X) | |
mean_y = mean(Y) | |
stdv_x = standard_deviation(X,mean_x) | |
stdv_y = standard_deviation(Y, mean_y) | |
rho = correllation_coefficient(X,Y,mean_x,mean_y) | |
b = rho * stdv_y/stdv_x | |
a = mean_y - b * mean_x | |
return lambda x: b*x+a | |
def linear_fit_stream(X,Y,lead,mem={hX:[],hY:[] }): | |
""" | |
memoized linear fit function for streams of data | |
""" | |
if mem[hX] != None and mem[hY] != None and len(mem[hX]) == len(mem[hy]): | |
X = mem[hX].append(X) | |
Y = mem[hY].append(Y) | |
mean_x = mean(X) | |
mean_y = mean(Y) | |
stdv_x = standard_deviation(X,mean_x) | |
stdv_y = standard_deviation(Y, mean_y) | |
rho = correllation_coefficient(X,Y,mean_x,mean_y) | |
b = rho * stdv_y/stdv_x | |
a = mean_y - b * mean_x | |
return b*lead+1, mem = {hX:X, hY:Y} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment