Skip to content

Instantly share code, notes, and snippets.

@peterroelants
Created November 21, 2014 16:37
Show Gist options
  • Save peterroelants/207bac20abd2adf5205d to your computer and use it in GitHub Desktop.
Save peterroelants/207bac20abd2adf5205d to your computer and use it in GitHub Desktop.
Illustration of computing the gradient of a sparse sum
{
"metadata": {
"name": "",
"signature": "sha256:4a13ffd7eefcebc41dd8407752bb82616665a8867e1e2e6e28d10191a393b233"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import scipy.sparse as sp\n",
"import theano\n",
"import theano.tensor as T\n",
"import theano.sparse\n",
"\n",
"# Test out sparse theano computations\n",
"W = theano.sparse.csr_matrix(name='W', dtype=theano.config.floatX)\n",
"b = theano.sparse.csr_matrix(name='b', dtype=theano.config.floatX)\n",
"x = theano.sparse.csr_matrix(name='x', dtype=theano.config.floatX)\n",
"b_prime = theano.sparse.csr_matrix(name='b_prime', dtype=theano.config.floatX)\n",
"\n",
"# Define some input variables\n",
"x_vals = sp.csr_matrix(np.matrix([[0,0,1.0]]))\n",
"W_vals = sp.csr_matrix(np.matrix([[1,0],[0,2],[0,3.0]]))\n",
"b_vals = sp.csr_matrix(np.matrix([[0.5,0.2]]))\n",
"b_prime_vals = sp.csr_matrix(np.matrix([[0.1,0.4, 0.7]]))\n",
"\n",
"h = theano.sparse.structured_sigmoid(theano.sparse.structured_dot(x,W) + b)\n",
"print 'h.type: ', h.type\n",
"z = theano.sparse.structured_sigmoid(theano.sparse.structured_dot(h,W.T) + b_prime)\n",
"print 'z.type: ', z.type\n",
"\n",
"# Try to do a gradient\n",
"L = x-z\n",
"print 'L.type: ', L.type\n",
"cost = theano.sparse.sp_sum(L, sparse_grad=True)\n",
"print 'cost.type: ', cost.type\n",
"\n",
"gc = T.grad(cost, W)\n",
"print 'gc.type: ', gc.type"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"h.type: Sparse[float64, csr]\n",
"z.type: Sparse[float64, csr]\n",
"L.type: Sparse[float64, csr]\n",
"cost.type: TensorType(float64, scalar)\n",
"x: SparseVariable{csc,float64}\n",
"y: SparseVariable{csr,float64}\n"
]
},
{
"ename": "NotImplementedError",
"evalue": "",
"output_type": "pyerr",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-ad87f51dc949>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'cost.type: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcost\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0mgc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcost\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mW\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'gc.type: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36mgrad\u001b[0;34m(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected)\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 541\u001b[0m rval = _populate_grad_dict(var_to_app_to_idx,\n\u001b[0;32m--> 542\u001b[0;31m grad_dict, wrt, cost_name)\n\u001b[0m\u001b[1;32m 543\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36m_populate_grad_dict\u001b[0;34m(var_to_app_to_idx, grad_dict, wrt, cost_name)\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1261\u001b[0;31m \u001b[0mrval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0maccess_grad_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mwrt\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1262\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1263\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mrval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36maccess_grad_cache\u001b[0;34m(var)\u001b[0m\n\u001b[1;32m 1246\u001b[0m \u001b[0;31m# the next line is like sum(terms) but doesn't add an\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[0;31m# extraneous TensorConstant(0)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1248\u001b[0;31m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mterms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1249\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1250\u001b[0m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDisconnectedType\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gradient.pyc\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1246\u001b[0m \u001b[0;31m# the next line is like sum(terms) but doesn't add an\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[0;31m# extraneous TensorConstant(0)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1248\u001b[0;31m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mterms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1249\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1250\u001b[0m \u001b[0mgrad_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDisconnectedType\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/sparse/basic.py\u001b[0m in \u001b[0;36m__add__\u001b[0;34m(left, right)\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__add__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 264\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__radd__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/sparse/basic.py\u001b[0m in \u001b[0;36madd\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 2088\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mx_is_sparse_variable\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0my_is_sparse_variable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2089\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx_is_sparse_variable\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0my_is_sparse_variable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2090\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0madd_s_s\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2091\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mx_is_sparse_variable\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0my_is_sparse_variable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2092\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0madd_s_d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/gof/op.pyc\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 491\u001b[0m \"\"\"\n\u001b[1;32m 492\u001b[0m \u001b[0mreturn_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'return_list'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 493\u001b[0;31m \u001b[0mnode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_node\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 494\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_stack_trace_on_call\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 495\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_tag_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/peterroelants/anaconda/lib/python2.7/site-packages/theano/sparse/basic.py\u001b[0m in \u001b[0;36mmake_node\u001b[0;34m(self, x, y)\u001b[0m\n\u001b[1;32m 1881\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'x: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1882\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'y: '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1883\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1884\u001b[0m return gof.Apply(self,\n\u001b[1;32m 1885\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNotImplementedError\u001b[0m: "
]
}
],
"prompt_number": 1
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment