Skip to content

Instantly share code, notes, and snippets.

@akshaydixi
Created July 28, 2013 18:12
Show Gist options
  • Save akshaydixi/6099491 to your computer and use it in GitHub Desktop.
Save akshaydixi/6099491 to your computer and use it in GitHub Desktop.
Program for the Applied Machine Learning Problem
from __future__ import division
import pylab as pl
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
XFILETRAIN = 'x_train.txt'
YFILETRAIN = 'y_train.txt'
XFILETEST = 'x_test.txt'
YFILETEST = 'y_test.txt'
EPSILON = input("Epsilon: ")
trainFileX = file(XFILETRAIN,'r')
trainFileY = file(YFILETRAIN,'r')
testFileX = file(XFILETEST,'r')
testFileY = file(YFILETEST,'r')
trainFileXData = trainFileX.readlines()
trainFileYData = trainFileY.readlines()
testFileXData = testFileX.readlines()
testFileYData = testFileY.readlines()
trainFileXData = [eval(element.strip()) for element in trainFileXData]
trainFileYData = [eval(element.strip()) for element in trainFileYData]
testFileXData = [eval(element.strip()) for element in testFileXData]
testFileYData = [eval(element.strip()) for element in testFileYData]
trainData = {}
testData = {}
labelData = {}
conditionData = {}
testConditionData = {}
testLabelData = {}
for i in xrange(len(trainFileXData)):
trainData[i] = [trainFileXData[i],trainFileYData[i]]
for i in xrange(len(testFileXData)):
testData[i] = [testFileXData[i],testFileYData[i]]
TMAX = len(trainData.keys())
TESTMAX = len(testData.keys())
def condition(t):
ydel = (trainData[t+1][1] - trainData[t][1]) / trainData[t][1]
xdel = (trainData[t+1][0] - trainData[t][0]) / trainData[t][0]
conditionData[t] = ydel-xdel > EPSILON
return conditionData[t]
def testCondition(t):
ydel = (testData[t+1][1] - testData[t][1]) / testData[t][1]
xdel = (testData[t+1][0] - testData[t][0]) / testData[t][0]
testConditionData[t] = ydel-xdel > EPSILON
return testConditionData[t]
def conditionsAfter(x,dataMax,conditionData):
values = []
for t in range(x,dataMax):
try:
values.append(conditionData[t])
except:
pass
return values
def generateLabel(t,dataMax,conditionData,labelData):
if conditionData[t] == True:
if False in conditionsAfter(t+1,dataMax,conditionData):
labelData[t] = 1
else:
labelData[t] = 0
def generateFVector(dataset,t):
return [1, dataset[t][0],dataset[t][1],(dataset[t+1][0]-dataset[t][0])/(dataset[t][0]),(dataset[t+1][1]-dataset[t][1])/(dataset[t][1]),dataset[t+1][0] - dataset[t][0],dataset[t+1][1] - dataset[t][1]]
for t in xrange(0,TMAX-1):
condition(t)
for t in xrange(0,TESTMAX-1):
testCondition(t)
for t in xrange(0,TESTMAX-1):
generateLabel(t,TESTMAX,testConditionData,testLabelData)
for t in xrange(0,TMAX-1):
generateLabel(t,TMAX,conditionData,labelData)
fVectors = []
newFVectors = []
target = []
for t in labelData.keys():
fVectors.append(generateFVector(trainData,t))
target.append(labelData[t])
for t in testData.keys():
try:
newFVectors.append(generateFVector(testData,t))
except:
pass
clf = SVC()
clf = clf.fit(fVectors,target)
predict = clf.predict(newFVectors)
predictedValues = []
total = 0
hits = 0
for i in testLabelData:
total += 1
predictedValues.append(predict[i])
if testLabelData[i] == predict[i]:
hits+=1
print 'Accuracy : ',hits*100/total
cm = confusion_matrix(testLabelData.values(),predictedValues)
pl.matshow(cm)
pl.title('Confusion Matrix')
pl.colorbar()
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment