Created
March 17, 2016 12:40
-
-
Save gaebor/96b640c6d7d07d5c8062 to your computer and use it in GitHub Desktop.
mnist digit recognition demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy\n", | |
"import theano\n", | |
"import theano.tensor as T\n", | |
"import time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data=numpy.loadtxt(\"train.csv\", skiprows=1, delimiter=\",\", dtype=numpy.float32)\n", | |
"test_data=numpy.loadtxt(\"test.csv\", skiprows=1, delimiter=\",\", dtype=numpy.float32)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"labels=numpy.zeros((data.shape[0],10), dtype=numpy.float32)\n", | |
"for i in range(data.shape[0]):\n", | |
" labels[i,int(data[i,0])]=1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"h=100\n", | |
"d=28*28\n", | |
"c=10\n", | |
"n=data.shape[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data_shared = theano.shared(data[:,1:])\n", | |
"labels_shared = theano.shared(labels)\n", | |
"M1 = theano.shared(numpy.random.rand(d,h).astype(numpy.float32)-0.5)\n", | |
"b1 = theano.shared(numpy.random.rand(h).astype(numpy.float32)-0.5)\n", | |
"M2 = theano.shared(numpy.random.rand(h,h).astype(numpy.float32)-0.5)\n", | |
"b2 = theano.shared(numpy.random.rand(h).astype(numpy.float32)-0.5)\n", | |
"M3 = theano.shared(numpy.random.rand(h,c).astype(numpy.float32)-0.5)\n", | |
"b3 = theano.shared(numpy.random.rand(c).astype(numpy.float32)-0.5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"hidden_layer = T.nnet.sigmoid(data_shared.dot(M1)+b1[None,:])\n", | |
"hidden_layer2 = T.nnet.sigmoid(hidden_layer.dot(M2)+b2[None,:])\n", | |
"prediction = T.nnet.softmax(hidden_layer2.dot(M3)+b3[None,:])\n", | |
"\n", | |
"error = T.sqr(prediction-labels_shared).sum(1).mean()\n", | |
"\n", | |
"grad_M1 = T.grad(error,M1)\n", | |
"grad_b1 = T.grad(error,b1)\n", | |
"grad_M2 = T.grad(error,M2)\n", | |
"grad_b2 = T.grad(error,b2)\n", | |
"grad_M3 = T.grad(error,M3)\n", | |
"grad_b3 = T.grad(error,b3)\n", | |
"\n", | |
"epsilon = T.scalar()\n", | |
"f = theano.function([epsilon],T.sqrt(error),updates=[(M1,M1-epsilon*grad_M1),\n", | |
" (M2,M2-epsilon*grad_M2),\n", | |
" (M3,M3-epsilon*grad_M3),\n", | |
" (b1,b1-epsilon*grad_b1), \n", | |
" (b2,b2-epsilon*grad_b2),\n", | |
" (b3,b3-epsilon*grad_b3)])\n", | |
"\n", | |
"input_data = T.matrix()\n", | |
"test_hidden_layer = T.nnet.sigmoid(input_data.dot(M1)+b1[None,:])\n", | |
"test_hidden_layer2 = T.nnet.sigmoid(test_hidden_layer.dot(M2)+b2[None,:])\n", | |
"test_prediction = T.nnet.softmax(test_hidden_layer2.dot(M3)+b3[None,:])\n", | |
"\n", | |
"predict = theano.function([input_data],test_prediction)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"f(0.1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"eps = 0.05\n", | |
"start = time.perf_counter()\n", | |
"for i in range(10):\n", | |
" for j in range(10):\n", | |
" f(eps)\n", | |
" print(f(0.0))\n", | |
"# print(time.perf_counter()-start)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"print(predict(data[:,1:]).argmax(1))\n", | |
"print(data[:,0].astype(int))\n", | |
"(predict(data[:,1:]).argmax(1)==data[:,0]).sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"numpy.savetxt(\"test_predictions.csv\",\n", | |
" numpy.array([range(1,test_data.shape[0]+1),predict(test_data).argmax(1)]).transpose(),\n", | |
" fmt='%d,%d',header='ImageId,Label', comments=\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment