Skip to content

Instantly share code, notes, and snippets.

@MartinBodocky
Created April 25, 2014 00:53
Show Gist options
  • Save MartinBodocky/11274564 to your computer and use it in GitHub Desktop.
Save MartinBodocky/11274564 to your computer and use it in GitHub Desktop.
First linear regression in Python
__author__ = 'martinbodocky'
from numpy import *
import matplotlib.pyplot as plt
import csv
def loadDataSet(filename):
"""Load CSV comma formatted file without header,
which contains features values and target value in last column
"""
with open(filename) as csvfile:
csvlines = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
dataMat = []
targetMat = []
for row in csvlines:
numfeatures = len(row) - 1
dataRow = []
for num in row[:numfeatures]:
dataRow.append(float(num))
dataMat.append(dataRow)
targetMat.append(float(row[-1]))
return dataMat, targetMat
def normalEquation(xArr, yArr):
"""
Functions applies normal equation for find optimal thetas
"""
xMat = mat(xArr)
yMat = mat(yArr).T
xTx = xMat.T * xMat
if linalg.det(xTx) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = xTx.I * (xMat.T * yMat)
return ws
def showData(filename):
x, y = loadDataSet(filename)
xMat = mat(x)
yMat = mat(y)
#Basic scatter which displays data
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 0].flatten().A[0], yMat.T[:, 0].flatten().A[0])
plt.show()
def showDataWithRegression(filename):
x, y = loadDataSet(filename)
xMat = mat(x)
yMat = mat(y)
# Linear regression by use Normal Equation
theta = normalEquation(x, y)
#compute target values
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:, 0].flatten().A[0], yMat.T[:, 0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yTarget = xCopy * theta
ax.plot(xCopy[:, 0], yTarget)
plt.show()
showData("Data/ex1data1.txt")
showDataWithRegression("Data/ex1data1.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment