Basic Linear Regression Class and Function for Streaming and Static Data
 # basic linear regression in pure python (no numpy!) # class and function usable for streams # refactored from http://code.activestate.com/recipes/578914-simple-linear-regression-with-pure-python/ import math def mean(series): return sum(series) / len(series) def standard_deviation(series, ave): normalization = len(series) - 1 summation = sum((pow(x - ave, 2) for x in series) return math.sqrt(Summation / normalization) def correllation_coefficient(Xseries, Yseries, ave_X, ave_Y): xy_sum, sum_sq_vx, sum_sq_vy = 0,0,0 for X,Y in zip(Xseries,Yseries): X_var = X - ave_X Y_var = Y - ave_Y xy_sum += X_var * Y_var sum_sq_vx += X_var**2 sum_sq_vy += Y_var**2 return xy_sum / math.sqrt(sum_sq_vx * sum_sq_vy) class LinearRegression(object): """ Compositional Class of Linear Regression primitives Self Fits to Initialized Data >> import numpy as np >> X = np.random.normal(100, size=1000) >> Y = np.array([(3*x+4) for x in X]) >> linR = LinearRegression(X,Y) >> print linR.predict(3.4) >> Y = np.array([(2*x-5) for x in X]) >> linR.update(X,Y) >> print linR.predict(4.5) >> while 1: X = [] Y = [] for x,y in stream: # must provide your own x,y streaming data X.append(x) Y.append(y) print linR.streaming_prediction(X,Y,lookahead) # lookahead = an x with an unknown y """ def __init__(self, X, Y): self.X = X self.Y = Y self.fit() def fit(self): self.mean_x = mean(self.X) self.mean_y = mean(self.Y) self.std_x = standard_deviation(self.X, self.mean_x) self.std_y = standard_deviation(self.Y, self.mean_y) self.rho = correllation_coefficient(self.X,self.Y,self.mean_x, self.mean_y) self.b = self.rho * (standard_deviation(self.Y, self.mean_y) / standard_deviation(X, self.mean_x)) self.a = self.mean_y - self.b * self.mean_x def update(self, X, Y): self.X = X self.Y = Y self.fit() def predict(self, x): return self.b * x + self.a def streaming_prediction(self,X,Y,x): self.X = X self.Y = Y self.fit() return self.predict(x) def linear_fit_func(X,Y): """ one time fit regression, returns predictor function """ mean_x = mean(X) mean_y = mean(Y) stdv_x = standard_deviation(X,mean_x) stdv_y = standard_deviation(Y, mean_y) rho = correllation_coefficient(X,Y,mean_x,mean_y) b = rho * stdv_y/stdv_x a = mean_y - b * mean_x return lambda x: b*x+a def linear_fit_stream(X,Y,lead,mem={hX:[],hY:[] }): """ memoized linear fit function for streams of data """ if mem[hX] != None and mem[hY] != None and len(mem[hX]) == len(mem[hy]): X = mem[hX].append(X) Y = mem[hY].append(Y) mean_x = mean(X) mean_y = mean(Y) stdv_x = standard_deviation(X,mean_x) stdv_y = standard_deviation(Y, mean_y) rho = correllation_coefficient(X,Y,mean_x,mean_y) b = rho * stdv_y/stdv_x a = mean_y - b * mean_x return b*lead+1, mem = {hX:X, hY:Y}