Skip to content

Instantly share code, notes, and snippets.

@dubkov
Last active September 24, 2015 11:04
Show Gist options
  • Save dubkov/68f923c37510e69bc190 to your computer and use it in GitHub Desktop.
Save dubkov/68f923c37510e69bc190 to your computer and use it in GitHub Desktop.
decisiontree-ka4ki
#!/usr/bin/env python2
import numpy as np
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
X = []
y = []
for line in open("mammographic_masses.data"):
line = line.split(',')
if not '?' in line:
X.append(map(int,line))
X = np.array(X)
print X
XX = X[:,range(0,5)]
YY = X[:,5]
for size in range(6,10):
Xlearn = XX[:len(XX)/10*size]
Ylearn = YY[:len(YY)/10*size]
Xtest = XX[len(XX)/10*size:]
Ytest = YY[len(YY)/10*size:]
clf = DecisionTreeClassifier().fit(Xlearn, Ylearn)
filename = "dtree" + str(size) + ".dot"
with open(filename, 'w') as f:
f = tree.export_graphviz(clf, out_file=f)
Ypredicted = clf.predict(Xtest)
rP = 0
wP = 0
for i in range(len(XX)/10*(10-size)):
if Ypredicted[i] == Ytest[i]:
rP += 1
else:
wP += 1
print "TEST SIZE: ", size*10, "%"
print "right: ", rP
print "wrong: ", wP
print "accuracy: ", float(rP)/(rP+wP)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment