Created
January 3, 2013 02:50
-
-
Save gr33ndata/4440339 to your computer and use it in GitHub Desktop.
Solution for Kaggle Digit Recognizer using Nearest Centroid, https://www.kaggle.com/c/digit-recognizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Solution for Kaggle Digit Recognizer | |
# <https://www.kaggle.com/c/digit-recognizer> | |
# | |
# You need to install sklearn | |
# Solution done using Rocchio (Nearest Centroid) | |
# | |
# Get train and test data files from here:: | |
# <https://www.kaggle.com/c/digit-recognizer/data> | |
# | |
# Author: Tarek Amr (@gr33ndata) | |
from sklearn.neighbors.nearest_centroid import NearestCentroid | |
import numpy as np | |
x = [] | |
y = [] | |
fd = open('train.csv','r') | |
lines = fd.readlines() | |
fd.close() | |
for line in lines[1:]: | |
data = line.split(',') | |
x.append([int(i.strip()) for i in data[1:]]) | |
y.append(data[0]) | |
X = np.array(x) | |
Y = np.array(y) | |
rocchio = NearestCentroid() | |
rocchio.fit(X,Y) | |
fd = open('test.csv','r') | |
lines = fd.readlines() | |
fd.close() | |
fo = open('output.txt','w') | |
for line in lines[1:]: | |
data = line.split(',') | |
t = [int(i.strip()) for i in data] | |
fo.write('%s\n' % rocchio.predict([t])[0]) | |
fo.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment