Skip to content

Instantly share code, notes, and snippets.

@jcrubino
Last active August 29, 2015 14:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrubino/347108b2541263fab6cc to your computer and use it in GitHub Desktop.
Save jcrubino/347108b2541263fab6cc to your computer and use it in GitHub Desktop.
Basic Linear Regression Class and Function for Streaming and Static Data
# basic linear regression in pure python (no numpy!)
# class and function usable for streams
# refactored from http://code.activestate.com/recipes/578914-simple-linear-regression-with-pure-python/
import math
def mean(series):
return sum(series) / len(series)
def standard_deviation(series, ave):
normalization = len(series) - 1
summation = sum((pow(x - ave, 2) for x in series)
return math.sqrt(Summation / normalization)
def correllation_coefficient(Xseries, Yseries, ave_X, ave_Y):
xy_sum, sum_sq_vx, sum_sq_vy = 0,0,0
for X,Y in zip(Xseries,Yseries):
X_var = X - ave_X
Y_var = Y - ave_Y
xy_sum += X_var * Y_var
sum_sq_vx += X_var**2
sum_sq_vy += Y_var**2
return xy_sum / math.sqrt(sum_sq_vx * sum_sq_vy)
class LinearRegression(object):
"""
Compositional Class of Linear Regression primitives
Self Fits to Initialized Data
>> import numpy as np
>> X = np.random.normal(100, size=1000)
>> Y = np.array([(3*x+4) for x in X])
>> linR = LinearRegression(X,Y)
>> print linR.predict(3.4)
>> Y = np.array([(2*x-5) for x in X])
>> linR.update(X,Y)
>> print linR.predict(4.5)
>> while 1:
X = []
Y = []
for x,y in stream: # must provide your own x,y streaming data
X.append(x)
Y.append(y)
print linR.streaming_prediction(X,Y,lookahead) # lookahead = an x with an unknown y
"""
def __init__(self, X, Y):
self.X = X
self.Y = Y
self.fit()
def fit(self):
self.mean_x = mean(self.X)
self.mean_y = mean(self.Y)
self.std_x = standard_deviation(self.X, self.mean_x)
self.std_y = standard_deviation(self.Y, self.mean_y)
self.rho = correllation_coefficient(self.X,self.Y,self.mean_x, self.mean_y)
self.b = self.rho * (standard_deviation(self.Y, self.mean_y) / standard_deviation(X, self.mean_x))
self.a = self.mean_y - self.b * self.mean_x
def update(self, X, Y):
self.X = X
self.Y = Y
self.fit()
def predict(self, x):
return self.b * x + self.a
def streaming_prediction(self,X,Y,x):
self.X = X
self.Y = Y
self.fit()
return self.predict(x)
def linear_fit_func(X,Y):
"""
one time fit regression, returns predictor function
"""
mean_x = mean(X)
mean_y = mean(Y)
stdv_x = standard_deviation(X,mean_x)
stdv_y = standard_deviation(Y, mean_y)
rho = correllation_coefficient(X,Y,mean_x,mean_y)
b = rho * stdv_y/stdv_x
a = mean_y - b * mean_x
return lambda x: b*x+a
def linear_fit_stream(X,Y,lead,mem={hX:[],hY:[] }):
"""
memoized linear fit function for streams of data
"""
if mem[hX] != None and mem[hY] != None and len(mem[hX]) == len(mem[hy]):
X = mem[hX].append(X)
Y = mem[hY].append(Y)
mean_x = mean(X)
mean_y = mean(Y)
stdv_x = standard_deviation(X,mean_x)
stdv_y = standard_deviation(Y, mean_y)
rho = correllation_coefficient(X,Y,mean_x,mean_y)
b = rho * stdv_y/stdv_x
a = mean_y - b * mean_x
return b*lead+1, mem = {hX:X, hY:Y}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment