Created
November 21, 2014 16:37
-
-
Save peterroelants/207bac20abd2adf5205d to your computer and use it in GitHub Desktop.
Illustration of computing the gradient of a sparse sum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:4a13ffd7eefcebc41dd8407752bb82616665a8867e1e2e6e28d10191a393b233" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import scipy.sparse as sp\n", | |
"import theano\n", | |
"import theano.tensor as T\n", | |
"import theano.sparse\n", | |
"\n", | |
"# Test out sparse theano computations\n", | |
"W = theano.sparse.csr_matrix(name='W', dtype=theano.config.floatX)\n", | |
"b = theano.sparse.csr_matrix(name='b', dtype=theano.config.floatX)\n", | |
"x = theano.sparse.csr_matrix(name='x', dtype=theano.config.floatX)\n", | |
"b_prime = theano.sparse.csr_matrix(name='b_prime', dtype=theano.config.floatX)\n", | |
"\n", | |
"# Define some input variables\n", | |
"x_vals = sp.csr_matrix(np.matrix([[0,0,1.0]]))\n", | |
"W_vals = sp.csr_matrix(np.matrix([[1,0],[0,2],[0,3.0]]))\n", | |
"b_vals = sp.csr_matrix(np.matrix([[0.5,0.2]]))\n", | |
"b_prime_vals = sp.csr_matrix(np.matrix([[0.1,0.4, 0.7]]))\n", | |
"\n", | |
"h = theano.sparse.structured_sigmoid(theano.sparse.structured_dot(x,W) + b)\n", | |
"print 'h.type: ', h.type\n", | |
"z = theano.sparse.structured_sigmoid(theano.sparse.structured_dot(h,W.T) + b_prime)\n", | |
"print 'z.type: ', z.type\n", | |
"\n", | |
"# Try to do a gradient\n", | |
"L = x-z\n", | |
"print 'L.type: ', L.type\n", | |
"cost = theano.sparse.sp_sum(L, sparse_grad=True)\n", | |
"print 'cost.type: ', cost.type\n", | |
"\n", | |
"gc = T.grad(cost, W)\n", | |
"print 'gc.type: ', gc.type" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"h.type: Sparse[float64, csr]\n", | |
"z.type: Sparse[float64, csr]\n", | |
"L.type: Sparse[float64, csr]\n", | |
"cost.type: TensorType(float64, scalar)\n", | |
"x: SparseVariable{csc,float64}\n", | |
"y: SparseVariable{csr,float64}\n" | |
] | |
}, | |
{ | |
"ename": "NotImplementedError", | |
"evalue": "", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-1-ad87f51dc949>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'cost.type: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcost\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0mgc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcost\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mW\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'gc.type: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36mgrad\u001b[0;34m(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected)\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 541\u001b[0m rval = _populate_grad_dict(var_to_app_to_idx,\n\u001b[0;32m--> 542\u001b[0;31m grad_dict, wrt, cost_name)\n\u001b[0m\u001b[1;32m 543\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36m_populate_grad_dict\u001b[0;34m(var_to_app_to_idx, grad_dict, wrt, cost_name)\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1261\u001b[0;31m \u001b[0mrval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0maccess_grad_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mwrt\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1262\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1263\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mrval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36maccess_grad_cache\u001b[0;34m(var)\u001b[0m\n\u001b[1;32m 1246\u001b[0m \u001b[0;31m# the next line is like sum(terms) but doesn't add an\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[0;31m# extraneous TensorConstant(0)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1248\u001b[0;31m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mterms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1249\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1250\u001b[0m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDisconnectedType\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1246\u001b[0m \u001b[0;31m# the next line is like sum(terms) but doesn't add an\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[0;31m# extraneous TensorConstant(0)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1248\u001b[0;31m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mterms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1249\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1250\u001b[0m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDisconnectedType\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/sparse/basic.py\u001b[0m in \u001b[0;36m__add__\u001b[0;34m(left, right)\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__add__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 264\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__radd__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/sparse/basic.py\u001b[0m in \u001b[0;36madd\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 2088\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mx_is_sparse_variable\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0my_is_sparse_variable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2089\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx_is_sparse_variable\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0my_is_sparse_variable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2090\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0madd_s_s\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2091\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mx_is_sparse_variable\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0my_is_sparse_variable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2092\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0madd_s_d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gof/op.pyc\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 491\u001b[0m \"\"\"\n\u001b[1;32m 492\u001b[0m \u001b[0mreturn_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'return_list'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 493\u001b[0;31m \u001b[0mnode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_node\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 494\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_stack_trace_on_call\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 495\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_tag_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/sparse/basic.py\u001b[0m in \u001b[0;36mmake_node\u001b[0;34m(self, x, y)\u001b[0m\n\u001b[1;32m 1881\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'x: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1882\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'y: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1883\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1884\u001b[0m return gof.Apply(self,\n\u001b[1;32m 1885\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mNotImplementedError\u001b[0m: " | |
] | |
} | |
], | |
"prompt_number": 1 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment