Created
July 28, 2013 18:12
-
-
Save akshaydixi/6099491 to your computer and use it in GitHub Desktop.
Program for the Applied Machine Learning Problem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import pylab as pl | |
from sklearn.svm import SVC | |
from sklearn.metrics import confusion_matrix | |
XFILETRAIN = 'x_train.txt' | |
YFILETRAIN = 'y_train.txt' | |
XFILETEST = 'x_test.txt' | |
YFILETEST = 'y_test.txt' | |
EPSILON = input("Epsilon: ") | |
trainFileX = file(XFILETRAIN,'r') | |
trainFileY = file(YFILETRAIN,'r') | |
testFileX = file(XFILETEST,'r') | |
testFileY = file(YFILETEST,'r') | |
trainFileXData = trainFileX.readlines() | |
trainFileYData = trainFileY.readlines() | |
testFileXData = testFileX.readlines() | |
testFileYData = testFileY.readlines() | |
trainFileXData = [eval(element.strip()) for element in trainFileXData] | |
trainFileYData = [eval(element.strip()) for element in trainFileYData] | |
testFileXData = [eval(element.strip()) for element in testFileXData] | |
testFileYData = [eval(element.strip()) for element in testFileYData] | |
trainData = {} | |
testData = {} | |
labelData = {} | |
conditionData = {} | |
testConditionData = {} | |
testLabelData = {} | |
for i in xrange(len(trainFileXData)): | |
trainData[i] = [trainFileXData[i],trainFileYData[i]] | |
for i in xrange(len(testFileXData)): | |
testData[i] = [testFileXData[i],testFileYData[i]] | |
TMAX = len(trainData.keys()) | |
TESTMAX = len(testData.keys()) | |
def condition(t): | |
ydel = (trainData[t+1][1] - trainData[t][1]) / trainData[t][1] | |
xdel = (trainData[t+1][0] - trainData[t][0]) / trainData[t][0] | |
conditionData[t] = ydel-xdel > EPSILON | |
return conditionData[t] | |
def testCondition(t): | |
ydel = (testData[t+1][1] - testData[t][1]) / testData[t][1] | |
xdel = (testData[t+1][0] - testData[t][0]) / testData[t][0] | |
testConditionData[t] = ydel-xdel > EPSILON | |
return testConditionData[t] | |
def conditionsAfter(x,dataMax,conditionData): | |
values = [] | |
for t in range(x,dataMax): | |
try: | |
values.append(conditionData[t]) | |
except: | |
pass | |
return values | |
def generateLabel(t,dataMax,conditionData,labelData): | |
if conditionData[t] == True: | |
if False in conditionsAfter(t+1,dataMax,conditionData): | |
labelData[t] = 1 | |
else: | |
labelData[t] = 0 | |
def generateFVector(dataset,t): | |
return [1, dataset[t][0],dataset[t][1],(dataset[t+1][0]-dataset[t][0])/(dataset[t][0]),(dataset[t+1][1]-dataset[t][1])/(dataset[t][1]),dataset[t+1][0] - dataset[t][0],dataset[t+1][1] - dataset[t][1]] | |
for t in xrange(0,TMAX-1): | |
condition(t) | |
for t in xrange(0,TESTMAX-1): | |
testCondition(t) | |
for t in xrange(0,TESTMAX-1): | |
generateLabel(t,TESTMAX,testConditionData,testLabelData) | |
for t in xrange(0,TMAX-1): | |
generateLabel(t,TMAX,conditionData,labelData) | |
fVectors = [] | |
newFVectors = [] | |
target = [] | |
for t in labelData.keys(): | |
fVectors.append(generateFVector(trainData,t)) | |
target.append(labelData[t]) | |
for t in testData.keys(): | |
try: | |
newFVectors.append(generateFVector(testData,t)) | |
except: | |
pass | |
clf = SVC() | |
clf = clf.fit(fVectors,target) | |
predict = clf.predict(newFVectors) | |
predictedValues = [] | |
total = 0 | |
hits = 0 | |
for i in testLabelData: | |
total += 1 | |
predictedValues.append(predict[i]) | |
if testLabelData[i] == predict[i]: | |
hits+=1 | |
print 'Accuracy : ',hits*100/total | |
cm = confusion_matrix(testLabelData.values(),predictedValues) | |
pl.matshow(cm) | |
pl.title('Confusion Matrix') | |
pl.colorbar() | |
pl.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment