Created
June 27, 2014 06:16
-
-
Save leonaburime/4228c37a7214baf9c3d4 to your computer and use it in GitHub Desktop.
Linear Regression for single variable. For multivariable check out my gradientDescent.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import dataMunge, pdb | |
from sklearn import linear_model | |
input = [ | |
[95, 85], | |
[85, 95], | |
[80, 70], | |
[70, 65], | |
[60, 70] | |
] | |
#http://aimotion.blogspot.com/2011/10/machine-learning-with-python-linear.html | |
class LinearRegression: | |
equation, first_coefficient, second_coefficient, coefficient_of_determination = None, None, None, None | |
clf = linear_model.LinearRegression() | |
def __init__(self, **kwargs): | |
pass | |
def solve(self, array=input, print_results=True): | |
#Lets fit the data | |
#self.multivariateRegression(input,output, filtered_input, print_results) | |
Array = np.array( array ) | |
if ( len(Array[0]) == 2 ): | |
self.solveCoefficients( Array[:,0], Array[:,1] ) | |
X, Y = [ [i] for i in Array[:,0] ], Array[:,1] | |
self.clf.fit ( X, Y ) | |
self.coe = self.clf.score( X, Y) | |
self.data_labels = ['x'] + ['x' + str(i) for i in range(len(self.clf.coef_))] | |
if print_results: | |
message ="\nEquation via sklearn: %.3f + " % self.clf.intercept_; | |
message += ' + '.join( [str(x[0])+x[1] for x in zip(np.around(self.clf.coef_, decimals=3), self.data_labels)] ) | |
message +=". Coefficient of Determination is %.3f." % self.coe | |
print message | |
#Solve for b0 and b1 | |
def solveCoefficients(self, input, output): | |
array_len = len(input) | |
input_average, output_average = np.mean(input), np.mean(output) | |
input_deviation, output_deviation = np.subtract( input, input_average),\ | |
np.subtract( output, output_average) | |
sum_of_squares_input = np.square( np.subtract(input, input_average) ) | |
self.first_coefficient = sum( np.multiply( input_deviation, output_deviation))/ \ | |
sum(sum_of_squares_input) | |
self.second_coefficient = np.subtract( output_average, \ | |
np.multiply( self.first_coefficient, input_average ) ) | |
coe = (1.0/array_len) * sum(np.multiply(input_deviation, output_deviation) )/ (np.std(input)* np.std(output) ) | |
self.coefficient_of_determination = coe*coe | |
print "Equation: %.3fx + %.3f. Coefficient of Determination is %.3f" \ | |
% ( self.second_coefficient, self.first_coefficient, self.coefficient_of_determination) | |
return self.first_coefficient, self.second_coefficient | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment