MInner/keras_seq2seq.ipynb Secret

## keras_seq2seq.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using Theano backend.\n",
      "/home/usman/anaconda2/envs/keras/lib/python2.7/site-packages/theano/tensor/signal/downsample.py:5: UserWarning: downsample module has been moved to the pool module.\n",
      "  warnings.warn(\"downsample module has been moved to the pool module.\")\n"
     ]
    }
   ],
   "source": [
    "from seq2seq.layers.encoders import LSTMEncoder\n",
    "from seq2seq.layers.decoders import LSTMDecoder, LSTMDecoder2, AttentionDecoder\n",
    "from seq2seq.layers.bidirectional import Bidirectional\n",
    "from keras.layers.recurrent import LSTM\n",
    "from keras.layers.core import RepeatVector, Dense, TimeDistributedDense, Dropout, Activation\n",
    "from keras.models import Sequential\n",
    "import theano.tensor as T\n",
    "\n",
    "'''\n",
    "Papers:\n",
    "[1] Sequence to Sequence Learning with Neural Networks (http://arxiv.org/abs/1409.3215)\n",
    "[2] Learning Phrase Representations using RNN\n",
    "Encoder-Decoder for Statistical Machine Translation (http://arxiv.org/abs/1406.1078)\n",
    "[3] Neural Machine Translation by Jointly Learning to Align and Translate (http://arxiv.org/abs/1409.0473)\n",
    "'''\n",
    "\n",
    "class Seq2seqBase(Sequential):\n",
    "    '''\n",
    "    Abstract class for all Seq2seq models.\n",
    "    '''\n",
    "    wait_for_shape = False\n",
    "\n",
    "    def add(self, layer):\n",
    "        '''\n",
    "        For automatic shape inference in nested models.\n",
    "        '''\n",
    "        self.layers.append(layer)\n",
    "        n = len(self.layers)\n",
    "        if self.wait_for_shape or (n == 1 and not hasattr(layer, '_input_shape')):\n",
    "            self.wait_for_shape = True\n",
    "        elif n > 1:\n",
    "            layer.set_previous(self.layers[-2])\n",
    "\n",
    "    def set_previous(self, layer):\n",
    "        '''\n",
    "        For automatic shape inference in nested models.\n",
    "        '''\n",
    "        self.layers[0].set_previous(layer)\n",
    "        if self.wait_for_shape:\n",
    "            self.wait_for_shape = False\n",
    "            for i in range(1, len(self.layers)):\n",
    "                self.layers[i].set_previous(self.layers[i - 1])\n",
    "\n",
    "    def reset_states(self):\n",
    "        for l in self.layers:\n",
    "            if  hasattr(l, 'stateful'):\n",
    "                if l.stateful:\n",
    "                    l.reset_states()\n",
    "\n",
    "class SimpleSeq2seq(Seq2seqBase):\n",
    "    '''\n",
    "    Simple model for sequence to sequence learning.\n",
    "    The encoder encodes the input sequence to vector (called context vector)\n",
    "    The decoder decoder the context vector in to a sequence of vectors.\n",
    "    There is no one on one relation between the input and output sequence elements.\n",
    "    The input sequence and output sequence may differ in length.\n",
    "    Arguments:\n",
    "    output_dim : Required output dimension.\n",
    "    hidden_dim : The dimension of the internal representations of the model.\n",
    "    output_length : Length of the required output sequence.\n",
    "    depth : Used to create a deep Seq2seq model. For example, if depth = 3, \n",
    "            there will be 3 LSTMs on the enoding side and 3 LSTMs on the \n",
    "            decoding side. You can also specify depth as a tuple. For example,\n",
    "            if depth = (4, 5), 4 LSTMs will be added to the encoding side and\n",
    "            5 LSTMs will be added to the decoding side.\n",
    "    dropout : Dropout probability in between layers.\n",
    "    '''\n",
    "    def __init__(self, output_dim, hidden_dim, output_length, depth=1, dropout=0.25, **kwargs):\n",
    "        super(SimpleSeq2seq, self).__init__()\n",
    "        if type(depth) not in [list, tuple]:\n",
    "            depth = (depth, depth)\n",
    "        self.encoder = LSTM(hidden_dim, **kwargs)\n",
    "        self.decoder = LSTM(hidden_dim if depth[1]>1 else output_dim, return_sequences=True, **kwargs)\n",
    "        for i in range(1, depth[0]):\n",
    "            self.add(LSTM(hidden_dim, return_sequences=True, **kwargs))\n",
    "            self.add(Dropout(dropout))\n",
    "        self.add(self.encoder)\n",
    "        self.add(Dropout(dropout))\n",
    "        self.add(RepeatVector(output_length))\n",
    "        self.add(self.decoder)\n",
    "        for i in range(1, depth[1]):\n",
    "            self.add(LSTM(hidden_dim, return_sequences=True, **kwargs))\n",
    "            self.add(Dropout(dropout))\n",
    "        if depth[1] > 1:\n",
    "            self.add(TimeDistributedDense(output_dim))\n",
    "\n",
    "class Seq2seq(Seq2seqBase):\n",
    "    '''\n",
    "    Seq2seq model based on [1] and [2].\n",
    "    This model has the ability to transfer the encoder hidden state to the decoder's\n",
    "    hidden state(specified by the broadcast_state argument). Also, in deep models \n",
    "    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by \n",
    "    the inner_broadcast_state argument. You can switch between [1] based model and [2] \n",
    "    based model using the peek argument.(peek = True for [2], peek = False for [1]).\n",
    "    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.\n",
    "    [1] based model:\n",
    "        Encoder:\n",
    "        X = Input sequence\n",
    "        C = LSTM(X); The context vector\n",
    "        Decoder:\n",
    "        y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)\n",
    "        y(0) = LSTM(s0, C); C is the context vector from the encoder.\n",
    "    [2] based model:\n",
    "        Encoder:\n",
    "        X = Input sequence\n",
    "        C = LSTM(X); The context vector\n",
    "        Decoder:\n",
    "        y(t) = LSTM(s(t-1), y(t-1), C)\n",
    "        y(0) = LSTM(s0, C, C)\n",
    "        Where s is the hidden state of the LSTM (h and c), and C is the context vector \n",
    "        from the encoder.\n",
    "    Arguments:\n",
    "    output_dim : Required output dimension.\n",
    "    hidden_dim : The dimension of the internal representations of the model.\n",
    "    output_length : Length of the required output sequence.\n",
    "    depth : Used to create a deep Seq2seq model. For example, if depth = 3, \n",
    "            there will be 3 LSTMs on the enoding side and 3 LSTMs on the \n",
    "            decoding side. You can also specify depth as a tuple. For example,\n",
    "            if depth = (4, 5), 4 LSTMs will be added to the encoding side and\n",
    "            5 LSTMs will be added to the decoding side.\n",
    "    broadcast_state : Specifies whether the hidden state from encoder should be \n",
    "                      transfered to the deocder.\n",
    "    inner_broadcast_state : Specifies whether hidden states should be propogated \n",
    "                            throughout the LSTM stack in deep models.\n",
    "    peek : Specifies if the decoder should be able to peek at the context vector\n",
    "           at every timestep.\n",
    "    dropout : Dropout probability in between layers.\n",
    "    '''\n",
    "    def __init__(self, output_dim, hidden_dim, output_length, depth=1, \n",
    "                 broadcast_state=True, inner_broadcast_state=True, peek=False, \n",
    "                 dropout=0.25, **kwargs):\n",
    "        super(Seq2seq, self).__init__()\n",
    "        layers= []\n",
    "        if type(depth) not in [list, tuple]:\n",
    "            depth = (depth, depth)        \n",
    "        broadcast = (depth[0] > 1 and inner_broadcast_state) or broadcast_state\n",
    "\n",
    "        # layers process input few times (seq->seq) because of reture_seq = True\n",
    "        lstms = []\n",
    "        for i in range(1, depth[0]):\n",
    "            layer = LSTMEncoder(output_dim=hidden_dim, state_input=inner_broadcast_state and (i != 1), \n",
    "                                return_sequences=True, **kwargs)\n",
    "            layers.append(layer)\n",
    "            lstms.append(layer)\n",
    "            layers.append(Dropout(dropout))\n",
    "        \n",
    "        # ENCODER: layer encodes (seq->vec) everything into a single vector\n",
    "        encoder = LSTMEncoder(output_dim=hidden_dim, state_input=broadcast, **kwargs)\n",
    "        layers.append(encoder)\n",
    "        \n",
    "        # dense encoder-decoder (vec->vec)\n",
    "        layers.append(Dropout(dropout))\n",
    "        layers.append(Dense(hidden_dim if depth[1] > 1 else output_dim))\n",
    "        \n",
    "        # broadcast to decoder 0 [lstms] used here\n",
    "        lstms.append(encoder)\n",
    "        if inner_broadcast_state:\n",
    "            for i in range(len(lstms) - 1):\n",
    "                lstms[i].broadcast_state(lstms[i + 1])\n",
    "        \n",
    "        # DECODER: (vec->seq)\n",
    "        if peek:\n",
    "            decoder = LSTMDecoder2(hidden_dim=hidden_dim, output_length=output_length, \n",
    "                                   state_input=encoder if broadcast else False, **kwargs)\n",
    "        else:\n",
    "            decoder = LSTMDecoder(hidden_dim=hidden_dim, output_length=output_length, \n",
    "                                  state_input=encoder if broadcast else False, **kwargs)\n",
    "        layers.append(decoder)\n",
    "        if broadcast_state:\n",
    "            encoder.broadcast_state(decoder)\n",
    "        lstms = [decoder]\n",
    "        \n",
    "        # (seq->seq)\n",
    "        for i in range(1, depth[1]):\n",
    "            layer = LSTMEncoder(output_dim=hidden_dim, state_input=inner_broadcast_state and (i != 1), \n",
    "                                return_sequences=True, **kwargs)\n",
    "            layers.append(layer)\n",
    "            lstms.append(layer)\n",
    "            layers.append(Dropout(dropout))\n",
    "        \n",
    "        # boeadcast\n",
    "        if inner_broadcast_state:\n",
    "            for i in range(len(lstms) - 1):\n",
    "                lstms[i].broadcast_state(lstms[i + 1])\n",
    "        \n",
    "        # dense on output seq\n",
    "        if depth[1] > 1:\n",
    "            layers.append(TimeDistributedDense(output_dim))\n",
    "        \n",
    "        # \n",
    "        self.encoder = encoder\n",
    "        self.decoder = decoder\n",
    "        for l in layers:\n",
    "            self.add(l)\n",
    "        if depth[0] > 1:\n",
    "            self.layers[0].build()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The ipy_notify extension is already loaded. To reload it, use:\n",
      "  %reload_ext ipy_notify\n"
     ]
    }
   ],
   "source": [
    "%load_ext ipy_notify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<script id=\"notification-script\">\n",
       "if (Notification.permission == 'granted') {\n",
       "    var notification = new Notification(\"IPython command finished\", {\n",
       "        icon: \"http://icons.iconarchive.com/icons/cornmanthe3rd/plex/512/Other-python-icon.png\",\n",
       "        body: IPython.notebook.notebook_name\n",
       "    });\n",
       "    notification.onclick = function () {\n",
       "        window.focus();\n",
       "    }\n",
       "    // don't execute again if we open the window again\n",
       "    var script = document.getElementById(\"notification-script\");\n",
       "    script.parentElement.removeChild(script);\n",
       "}\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seq2seq\n",
    "\n",
    "model = Seq2seq(batch_input_shape=(16, 10, 5), hidden_dim=10, output_length=10, output_dim=5, depth=2)\n",
    "model.compile(loss='categorical_crossentropy', optimizer='rmsprop')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<seq2seq.layers.encoders.LSTMEncoder at 0x7fb57c8c1810>,\n",
       " <keras.layers.core.Dropout at 0x7fb57c8c1990>,\n",
       " <seq2seq.layers.encoders.LSTMEncoder at 0x7fb57b29dc90>,\n",
       " <keras.layers.core.Dropout at 0x7fb57c7e1a50>,\n",
       " <keras.layers.core.Dense at 0x7fb57a3b5cd0>,\n",
       " <seq2seq.layers.decoders.LSTMDecoder at 0x7fb57b29dd50>,\n",
       " <seq2seq.layers.encoders.LSTMEncoder at 0x7fb57c80a790>,\n",
       " <keras.layers.core.Dropout at 0x7fb57c80a610>,\n",
       " <keras.layers.core.TimeDistributedDense at 0x7fb57c80a4d0>]"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2\n",
      "  4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4 2 4]\n",
      " [4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4\n",
      "  8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7 4 8 7 7]\n",
      " [8 2 4 3 0 0 8 2 4 3 0 0 8 2 4 3 0 0 8 2 4 3 0 0 8 2 4 3 0 0 8 2 4 3 0 0 8\n",
      "  2 4 3 0 0 8 2 4 3 0 0 8 2 4 3 0 0 8 2 4 3 0 0]\n",
      " [8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8\n",
      "  0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4 8 0 4]\n",
      " [6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6\n",
      "  2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2]]\n",
      "---\n",
      "Testing if one_hot returns proper shape: True\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "def gendata(N = 100, L = 60, maxvalue=9):\n",
    "    sequences = []\n",
    "    for i in xrange(N):\n",
    "        l = np.random.randint(2, 7) # length of subseq\n",
    "        subseq = np.random.randint(0, maxvalue, size=l).tolist()\n",
    "        n_repeat = int( L / len(subseq)  ) + 1\n",
    "        seq = (subseq*n_repeat)[:L]\n",
    "        sequences.append(seq)\n",
    "    return np.vstack(sequences)\n",
    "\n",
    "print gendata(5)\n",
    "print '---'\n",
    "\n",
    "def onehot_transform(seqs):\n",
    "    \"\"\"\n",
    "    tansforms [[1, 2, 3]; [3, 2, 1]] -> [ [ [1 0 0], [0 1 0], [0 0 1] ]; [ [0 0 1], [0 1 0], [1, 0 0]] ]\n",
    "    \"\"\"\n",
    "    m = np.max(seqs)\n",
    "    T = np.zeros((seqs.shape[0], seqs.shape[1], m+1))\n",
    "    grid = np.mgrid[0:seqs.shape[0],0:seqs.shape[1]]\n",
    "    T[grid[0], grid[1], seqs] = 1\n",
    "    return T\n",
    "\n",
    "def test():\n",
    "    N=2\n",
    "    L=60\n",
    "    print 'Testing if one_hot returns proper shape:', abs( np.sum( onehot_transform(gendata(N=N, L=L)) ) - N*L ) < 0.1\n",
    "    \n",
    "test()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(16, 10, 5) (16, 10)\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "input_tensor = gendata(N=16, L=10, maxvalue=5)\n",
    "hot_input_tensor = onehot_transform(input_tensor)\n",
    "print hot_input_tensor.shape, input_tensor.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fb56e51b810>"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(hot_input_tensor, hot_input_tensor, nb_epoch=100, verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 0 2 4 3 1 0 2 4 3]\n",
      " [3 2 3 2 3 2 3 2 3 2]\n",
      " [1 2 0 4 2 3 1 2 0 4]\n",
      " [4 3 1 0 1 2 4 3 1 0]\n",
      " [2 1 3 0 2 1 3 0 2 1]\n",
      " [0 2 0 0 0 0 2 0 0 0]\n",
      " [3 2 3 3 2 3 3 2 3 3]\n",
      " [1 2 2 1 1 2 2 1 1 2]\n",
      " [4 1 4 4 1 4 4 1 4 4]\n",
      " [3 2 1 3 2 1 3 2 1 3]\n",
      " [2 0 2 0 2 0 2 0 2 0]\n",
      " [3 1 2 1 3 1 2 1 3 1]\n",
      " [2 3 2 3 2 3 2 3 2 3]\n",
      " [4 4 0 4 4 0 4 4 0 4]\n",
      " [2 0 1 4 2 0 1 4 2 0]\n",
      " [0 0 0 3 0 0 0 3 0 0]]\n",
      "[[2 2 2 3 3 2 2 2 2 2]\n",
      " [4 4 4 4 4 4 4 4 4 2]\n",
      " [2 2 2 2 2 2 2 2 2 2]\n",
      " [2 2 2 2 2 2 2 2 2 2]\n",
      " [1 1 1 2 2 2 2 2 2 2]\n",
      " [2 2 2 2 2 2 2 2 2 4]\n",
      " [2 2 3 3 3 3 3 3 3 2]\n",
      " [0 0 0 1 1 1 1 1 1 1]\n",
      " [4 4 1 1 1 1 1 1 1 1]\n",
      " [2 0 1 1 1 1 1 1 1 1]\n",
      " [2 0 1 1 1 1 1 1 1 1]\n",
      " [2 2 3 3 2 2 2 2 2 2]\n",
      " [4 2 2 2 3 2 2 2 2 2]\n",
      " [2 2 2 2 2 2 2 2 2 2]\n",
      " [2 3 3 3 3 3 3 3 3 3]\n",
      " [2 2 2 2 2 2 2 2 2 2]]\n"
     ]
    }
   ],
   "source": [
    "print input_tensor\n",
    "print np.argmax(model.predict(hot_input_tensor), axis=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 0 2 4 3 1 0 2 4 3]\n",
      " [3 2 3 2 3 2 3 2 3 2]\n",
      " [1 2 0 4 2 3 1 2 0 4]\n",
      " [4 3 1 0 1 2 4 3 1 0]\n",
      " [2 1 3 0 2 1 3 0 2 1]\n",
      " [0 2 0 0 0 0 2 0 0 0]\n",
      " [3 2 3 3 2 3 3 2 3 3]\n",
      " [1 2 2 1 1 2 2 1 1 2]\n",
      " [4 1 4 4 1 4 4 1 4 4]\n",
      " [3 2 1 3 2 1 3 2 1 3]\n",
      " [2 0 2 0 2 0 2 0 2 0]\n",
      " [3 1 2 1 3 1 2 1 3 1]\n",
      " [2 3 2 3 2 3 2 3 2 3]\n",
      " [4 4 0 4 4 0 4 4 0 4]\n",
      " [2 0 1 4 2 0 1 4 2 0]\n",
      " [0 0 0 3 0 0 0 3 0 0]]\n",
      "encoded into\n",
      "<keras.layers.core.Dense object at 0x7fb57a3b5cd0>\n",
      "(16, 10)\n",
      "and\n",
      "<seq2seq.layers.decoders.LSTMDecoder object at 0x7fb57b29dd50>\n",
      "[[ 0.04264491 -0.01881716 -0.02590882 -0.12370495  0.03260068  0.05061729\n",
      "   0.01201699  0.03020427 -0.01160898  0.01413456]\n",
      " [ 0.0690805   0.07048211  0.08011899  0.10983862  0.15186733  0.08080374\n",
      "   0.01396344 -0.02795714  0.0562647  -0.00180779]\n",
      " [-0.02284239 -0.02150108  0.00261095 -0.12820917  0.06299813  0.05633226\n",
      "   0.01172972  0.03888533  0.00121792  0.02643571]\n",
      " [ 0.00837865 -0.06794345 -0.01689796 -0.26407599  0.05620093  0.10595706\n",
      "  -0.02469149 -0.03178771 -0.15231736  0.0816633 ]\n",
      " [-0.00199749  0.01345279  0.03853586 -0.12070971  0.11767376  0.10153073\n",
      "  -0.00701311 -0.00540528 -0.05130403  0.06580654]\n",
      " [ 0.01124085 -0.0062993  -0.1877771  -0.19142954  0.01381196 -0.09842648\n",
      "   0.14795846  0.10811764 -0.02622064 -0.02105746]\n",
      " [ 0.14106213 -0.03308485 -0.08219486 -0.18088669  0.11233653  0.01444993\n",
      "   0.05371599 -0.01158028 -0.06524751  0.04055733]\n",
      " [-0.11882911  0.08696818  0.26738647  0.03102637  0.19891548  0.31485099\n",
      "  -0.1286688  -0.12843592 -0.13839638  0.121485  ]\n",
      " [ 0.05905736 -0.05767166 -0.11441714 -0.12242015 -0.09854279 -0.02599916\n",
      "   0.00869467 -0.02635502 -0.10908413  0.01614884]\n",
      " [ 0.04119926  0.04460258  0.06983641 -0.10506263  0.13153717  0.19829041\n",
      "  -0.03150085 -0.09591172 -0.18845084  0.10298534]\n",
      " [ 0.01120625  0.10376298 -0.09465919 -0.07943358  0.06117066  0.03235056\n",
      "   0.13035275  0.08542888 -0.0569892  -0.03162818]\n",
      " [ 0.0358665  -0.02569434  0.04516115 -0.24501607  0.11367137  0.19643421\n",
      "  -0.0580047  -0.08962525 -0.22091672  0.1329404 ]\n",
      " [ 0.01837818  0.05651388  0.14313778  0.13187358  0.17715909  0.11332844\n",
      "  -0.02543897 -0.03606117  0.0783357   0.00050336]\n",
      " [ 0.07393473 -0.02246912 -0.3048887   0.17911574 -0.18203561 -0.43948159\n",
      "   0.17820211  0.03284115  0.09179375 -0.09475127]\n",
      " [-0.09955024 -0.03791428  0.03033665 -0.07051268  0.02203141  0.02540016\n",
      "  -0.00583387  0.06445061  0.0829883   0.01351284]\n",
      " [ 0.04219577 -0.10621049 -0.24083367 -0.09842595 -0.04883322 -0.33776212\n",
      "   0.12281953  0.02523585  0.01433529 -0.01425093]]\n",
      "(16, 10)\n",
      "decoded into\n"
     ]
    },
    {
     "ename": "UnusedInputError",
     "evalue": "theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 0 is not part of the computational graph needed to compute the outputs: Elemwise{add,no_inplace}.0.\nTo make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mUnusedInputError\u001b[0m                          Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-84-5f6b6918fd54>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     16\u001b[0m \u001b[1;32mprint\u001b[0m \u001b[1;34m'decoded into'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     17\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 18\u001b[1;33m \u001b[1;32mprint\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_output\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0meval\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_input\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mto_decode\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     19\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     20\u001b[0m \u001b[0mdecoder\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtheano\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_output\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mon_unused_input\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'warn'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/usman/anaconda2/envs/keras/lib/python2.7/site-packages/theano/gof/graph.pyc\u001b[0m in \u001b[0;36meval\u001b[1;34m(self, inputs_to_values)\u001b[0m\n\u001b[0;32m    518\u001b[0m         \u001b[0minputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msorted\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs_to_values\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mid\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    519\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0minputs\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_fn_cache\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 520\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_fn_cache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtheano\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    521\u001b[0m         \u001b[0margs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0minputs_to_values\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mparam\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mparam\u001b[0m \u001b[1;32min\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    522\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/usman/anaconda2/envs/keras/lib/python2.7/site-packages/theano/compile/function.pyc\u001b[0m in \u001b[0;36mfunction\u001b[1;34m(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)\u001b[0m\n\u001b[0;32m    315\u001b[0m                    \u001b[0mon_unused_input\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mon_unused_input\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    316\u001b[0m                    \u001b[0mprofile\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mprofile\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 317\u001b[1;33m                    output_keys=output_keys)\n\u001b[0m\u001b[0;32m    318\u001b[0m     \u001b[1;31m# We need to add the flag check_aliased inputs if we have any mutable or\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    319\u001b[0m     \u001b[1;31m# borrowed used defined inputs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/usman/anaconda2/envs/keras/lib/python2.7/site-packages/theano/compile/pfunc.pyc\u001b[0m in \u001b[0;36mpfunc\u001b[1;34m(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys)\u001b[0m\n\u001b[0;32m    459\u001b[0m                          \u001b[0maccept_inplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maccept_inplace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    460\u001b[0m                          \u001b[0mprofile\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mprofile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mon_unused_input\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mon_unused_input\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 461\u001b[1;33m                          output_keys=output_keys)\n\u001b[0m\u001b[0;32m    462\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    463\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/usman/anaconda2/envs/keras/lib/python2.7/site-packages/theano/compile/function_module.pyc\u001b[0m in \u001b[0;36morig_function\u001b[1;34m(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys)\u001b[0m\n\u001b[0;32m   1769\u001b[0m                    \u001b[0mprofile\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mprofile\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1770\u001b[0m                    \u001b[0mon_unused_input\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mon_unused_input\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1771\u001b[1;33m                    \u001b[0moutput_keys\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moutput_keys\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1772\u001b[0m             defaults)\n\u001b[0;32m   1773\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/usman/anaconda2/envs/keras/lib/python2.7/site-packages/theano/compile/function_module.pyc\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, inputs, outputs, mode, accept_inplace, function_builder, profile, on_unused_input, fgraph, output_keys)\u001b[0m\n\u001b[0;32m   1408\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1409\u001b[0m         \u001b[1;31m# Check if some input variables are unused\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1410\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_check_unused_inputs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mon_unused_input\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1411\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1412\u001b[0m         \u001b[1;31m# Make a list of (SymbolicInput|SymblicInputKits, indices,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/usman/anaconda2/envs/keras/lib/python2.7/site-packages/theano/compile/function_module.pyc\u001b[0m in \u001b[0;36m_check_unused_inputs\u001b[1;34m(self, inputs, outputs, on_unused_input)\u001b[0m\n\u001b[0;32m   1546\u001b[0m                 \u001b[1;32melif\u001b[0m \u001b[0mon_unused_input\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'raise'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1547\u001b[0m                     raise UnusedInputError(msg % (inputs.index(i),\n\u001b[1;32m-> 1548\u001b[1;33m                                                   i.variable, err_msg))\n\u001b[0m\u001b[0;32m   1549\u001b[0m                 \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1550\u001b[0m                     raise ValueError(\"Invalid value for keyword \"\n",
      "\u001b[1;31mUnusedInputError\u001b[0m: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 0 is not part of the computational graph needed to compute the outputs: Elemwise{add,no_inplace}.0.\nTo make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'."
     ]
    }
   ],
   "source": [
    "import theano\n",
    "\n",
    "to_encode = hot_input_tensor.astype(np.float32)\n",
    "encoding = model.layers[4].get_output(train=False).eval({model.layers[0].input: to_encode}) # works fine\n",
    "to_decode = encoding\n",
    "\n",
    "print model.layers[-1].get_output(train=False).eval({model.layers[5].get_input(): to_decode}) # fails\n",
    "\n",
    "decoder = theano.function([model.layers[5].input], model.get_output(train=False), on_unused_input='warn') # fails too\n",
    "print decoder([to_decode])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}