Skip to content

Instantly share code, notes, and snippets.

@msakai
Created December 4, 2019 07:28
Show Gist options
  • Save msakai/e2ed8eaa5ab9125b27f091ee284fab22 to your computer and use it in GitHub Desktop.
Save msakai/e2ed8eaa5ab9125b27f091ee284fab22 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Experiment of `chainer.functions.rnn.*` weight shapes\n",
"\n",
"I proposed to change `(I, N)` in the documentation to `(N, I)` in pull request \"[Fix several documentation errors in chainer.functions.rnn.* #8454](https://github.com/chainer/chainer/pull/8454)\", and this jupyter notebook is for confirming the behavior of those functions when given the arrays of shape `(I, N)` and `(N, I)`.\n",
"\n",
"In this notebook, `I` is denoted by `input_size` and `N` is denoted by `hidden_size`.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7.0.0rc1\n",
"c0a3764573407af1807f9bde2e63ea496518cb87\r\n"
]
}
],
"source": [
"import chainer\n",
"import chainer.functions as F\n",
"import numpy as np\n",
"print(chainer.__version__)\n",
"!git rev-parse HEAD"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## n_step_rnn"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"n_layers = 1\n",
"batch_size = 2\n",
"input_size = 3\n",
"hidden_size = 4\n",
"seq_length = 5\n",
"\n",
"hx = np.ones((n_layers, batch_size, hidden_size), np.float32)\n",
"bs = [[np.ones(hidden_size, np.float32) for _ in range(2)] for _ in range(n_layers)]\n",
"xs = [np.ones((batch_size, input_size), np.float32) for _ in range(seq_length)]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(variable([[[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]]]),\n",
" (variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]])))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws_ok =[[np.ones((hidden_size, input_size), np.float32), np.ones((hidden_size, hidden_size), np.float32)] for _ in range(n_layers)]\n",
"F.n_step_rnn(n_layers, 0.0, hx, ws_ok, bs, xs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Inconsistent input size in input values and weight parameters: 3 != 4",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-762438e29efc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mws_ng\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_step_rnn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws_ng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36mn_step_rnn\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, activation, **kwargs)\u001b[0m\n\u001b[1;32m 523\u001b[0m \"\"\"\n\u001b[1;32m 524\u001b[0m return n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs,\n\u001b[0;32m--> 525\u001b[0;31m activation, use_bi_direction=False, **kwargs)\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36mn_step_rnn_base\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction, **kwargs)\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx_in\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mw_in\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 716\u001b[0m raise ValueError('Inconsistent input size in input values and weight '\n\u001b[0;32m--> 717\u001b[0;31m 'parameters: {} != {}'.format(x_in, w_in))\n\u001b[0m\u001b[1;32m 718\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[0mxp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbackend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_array_module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Inconsistent input size in input values and weight parameters: 3 != 4"
]
}
],
"source": [
"ws_ng = [[np.ones((input_size, hidden_size), np.float32), np.ones((hidden_size, hidden_size), np.float32)] for _ in range(n_layers)]\n",
"F.n_step_rnn(n_layers, 0.0, hx, ws_ng, bs, xs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## n_step_birnn"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"n_layers = 1\n",
"batch_size = 2\n",
"input_size = 3\n",
"hidden_size = 4\n",
"seq_length = 5\n",
"\n",
"hx = np.ones((2*n_layers, batch_size, hidden_size), np.float32)\n",
"bs = [[np.ones(hidden_size, np.float32) for _ in range(2)]\n",
" for _ in range(2*n_layers)]\n",
"xs = [np.ones((batch_size, input_size), np.float32) for _ in range(seq_length)]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(variable([[[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]],\n",
" \n",
" [[0.99999994, 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994]]]),\n",
" (variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994]]),\n",
" variable([[0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994],\n",
" [0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,\n",
" 0.99999994, 0.99999994, 0.99999994]])))"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws_ok = [[np.ones((hidden_size, input_size), np.float32), np.ones((hidden_size, hidden_size), np.float32)] for _ in range(2*n_layers)]\n",
"F.n_step_birnn(n_layers, 0.0, hx, ws_ok, bs, xs)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Inconsistent input size in input values and weight parameters: 3 != 4",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-55fdef5cb689>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mws_ng\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_step_birnn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws_ng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36mn_step_birnn\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, activation, **kwargs)\u001b[0m\n\u001b[1;32m 627\u001b[0m \"\"\"\n\u001b[1;32m 628\u001b[0m return n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs,\n\u001b[0;32m--> 629\u001b[0;31m activation, use_bi_direction=True)\n\u001b[0m\u001b[1;32m 630\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 631\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36mn_step_rnn_base\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction, **kwargs)\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx_in\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mw_in\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 716\u001b[0m raise ValueError('Inconsistent input size in input values and weight '\n\u001b[0;32m--> 717\u001b[0;31m 'parameters: {} != {}'.format(x_in, w_in))\n\u001b[0m\u001b[1;32m 718\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[0mxp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbackend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_array_module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Inconsistent input size in input values and weight parameters: 3 != 4"
]
}
],
"source": [
"ws_ng = [[np.ones((input_size, hidden_size), np.float32), np.ones((hidden_size, hidden_size), np.float32)] for _ in range(2*n_layers)]\n",
"F.n_step_birnn(n_layers, 0.0, hx, ws_ng, bs, xs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## n_step_lstm"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"n_layers = 1\n",
"batch_size = 2\n",
"input_size = 3\n",
"hidden_size = 4\n",
"seq_length = 5\n",
"\n",
"hx = np.ones((n_layers, batch_size, hidden_size), np.float32)\n",
"cx = np.ones((n_layers, batch_size, hidden_size), np.float32)\n",
"bs = [[np.ones(hidden_size, np.float32) for _ in range(8)]]\n",
"xs = [np.ones((batch_size, input_size), np.float32) for _ in range(seq_length)]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(variable([[[0.99986416, 0.99986416, 0.99986416, 0.99986416],\n",
" [0.99986416, 0.99986416, 0.99986416, 0.99986416]]]),\n",
" variable([[[5.997462, 5.997462, 5.997462, 5.997462],\n",
" [5.997462, 5.997462, 5.997462, 5.997462]]]),\n",
" (variable([[0.96389115, 0.96389115, 0.96389115, 0.96389115],\n",
" [0.96389115, 0.96389115, 0.96389115, 0.96389115]]),\n",
" variable([[0.99490625, 0.99490625, 0.99490625, 0.99490625],\n",
" [0.99490625, 0.99490625, 0.99490625, 0.99490625]]),\n",
" variable([[0.99920183, 0.99920183, 0.99920183, 0.99920183],\n",
" [0.99920183, 0.99920183, 0.99920183, 0.99920183]]),\n",
" variable([[0.99978507, 0.99978507, 0.99978507, 0.99978507],\n",
" [0.99978507, 0.99978507, 0.99978507, 0.99978507]]),\n",
" variable([[0.99986416, 0.99986416, 0.99986416, 0.99986416],\n",
" [0.99986416, 0.99986416, 0.99986416, 0.99986416]])))"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws_ok = [[np.ones((hidden_size, input_size), np.float32) for _ in range(4)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(4)]]\n",
"F.n_step_lstm(n_layers, 0.0, hx, cx, ws_ok, bs, xs)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Inconsistent input size in input values and weight parameters: 3 != 4",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-218f191417b6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m ws_ng = [[np.ones((input_size, hidden_size), np.float32) for _ in range(4)] +\n\u001b[1;32m 2\u001b[0m [np.ones((hidden_size, hidden_size), np.float32) for _ in range(4)]]\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_step_lstm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws_ng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_lstm.py\u001b[0m in \u001b[0;36mn_step_lstm\u001b[0;34m(n_layers, dropout_ratio, hx, cx, ws, bs, xs, **kwargs)\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 236\u001b[0m return n_step_lstm_base(n_layers, dropout_ratio, hx, cx, ws, bs, xs,\n\u001b[0;32m--> 237\u001b[0;31m use_bi_direction=False, **kwargs)\n\u001b[0m\u001b[1;32m 238\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_lstm.py\u001b[0m in \u001b[0;36mn_step_lstm_base\u001b[0;34m(n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx_in\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mw_in\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 479\u001b[0m raise ValueError('Inconsistent input size in input values and weight '\n\u001b[0;32m--> 480\u001b[0;31m 'parameters: {} != {}'.format(x_in, w_in))\n\u001b[0m\u001b[1;32m 481\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0mxp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbackend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_array_module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Inconsistent input size in input values and weight parameters: 3 != 4"
]
}
],
"source": [
"ws_ng = [[np.ones((input_size, hidden_size), np.float32) for _ in range(4)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(4)]]\n",
"F.n_step_lstm(n_layers, 0.0, hx, cx, ws_ng, bs, xs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## n_step_bilstm"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"n_layers = 1\n",
"batch_size = 2\n",
"input_size = 3\n",
"hidden_size = 4\n",
"seq_length = 5\n",
"\n",
"hx = np.ones((2*n_layers, batch_size, hidden_size), np.float32)\n",
"cx = np.ones((2*n_layers, batch_size, hidden_size), np.float32)\n",
"bs = [[np.ones(hidden_size, np.float32) for _ in range(8)]\n",
" for _ in range(2*n_layers)]\n",
"xs = [np.ones((batch_size, input_size), np.float32) for _ in range(seq_length)]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(variable([[[0.99986416, 0.99986416, 0.99986416, 0.99986416],\n",
" [0.99986416, 0.99986416, 0.99986416, 0.99986416]],\n",
" \n",
" [[0.99986416, 0.99986416, 0.99986416, 0.99986416],\n",
" [0.99986416, 0.99986416, 0.99986416, 0.99986416]]]),\n",
" variable([[[5.997462, 5.997462, 5.997462, 5.997462],\n",
" [5.997462, 5.997462, 5.997462, 5.997462]],\n",
" \n",
" [[5.997462, 5.997462, 5.997462, 5.997462],\n",
" [5.997462, 5.997462, 5.997462, 5.997462]]]),\n",
" (variable([[0.96389115, 0.96389115, 0.96389115, 0.96389115, 0.99986416,\n",
" 0.99986416, 0.99986416, 0.99986416],\n",
" [0.96389115, 0.96389115, 0.96389115, 0.96389115, 0.99986416,\n",
" 0.99986416, 0.99986416, 0.99986416]]),\n",
" variable([[0.99490625, 0.99490625, 0.99490625, 0.99490625, 0.99978507,\n",
" 0.99978507, 0.99978507, 0.99978507],\n",
" [0.99490625, 0.99490625, 0.99490625, 0.99490625, 0.99978507,\n",
" 0.99978507, 0.99978507, 0.99978507]]),\n",
" variable([[0.99920183, 0.99920183, 0.99920183, 0.99920183, 0.99920183,\n",
" 0.99920183, 0.99920183, 0.99920183],\n",
" [0.99920183, 0.99920183, 0.99920183, 0.99920183, 0.99920183,\n",
" 0.99920183, 0.99920183, 0.99920183]]),\n",
" variable([[0.99978507, 0.99978507, 0.99978507, 0.99978507, 0.99490625,\n",
" 0.99490625, 0.99490625, 0.99490625],\n",
" [0.99978507, 0.99978507, 0.99978507, 0.99978507, 0.99490625,\n",
" 0.99490625, 0.99490625, 0.99490625]]),\n",
" variable([[0.99986416, 0.99986416, 0.99986416, 0.99986416, 0.96389115,\n",
" 0.96389115, 0.96389115, 0.96389115],\n",
" [0.99986416, 0.99986416, 0.99986416, 0.99986416, 0.96389115,\n",
" 0.96389115, 0.96389115, 0.96389115]])))"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws_ok = [[np.ones((hidden_size, input_size), np.float32) for _ in range(4)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(4)]\n",
" for _ in range(2*n_layers)]\n",
"F.n_step_bilstm(n_layers, 0.0, hx, cx, ws_ok, bs, xs)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Inconsistent input size in input values and weight parameters: 3 != 4",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-f8f772191001>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m for _ in range(2*n_layers)]\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_step_bilstm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws_ng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_lstm.py\u001b[0m in \u001b[0;36mn_step_bilstm\u001b[0;34m(n_layers, dropout_ratio, hx, cx, ws, bs, xs, **kwargs)\u001b[0m\n\u001b[1;32m 394\u001b[0m \"\"\"\n\u001b[1;32m 395\u001b[0m return n_step_lstm_base(n_layers, dropout_ratio, hx, cx, ws, bs, xs,\n\u001b[0;32m--> 396\u001b[0;31m use_bi_direction=True, **kwargs)\n\u001b[0m\u001b[1;32m 397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 398\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_lstm.py\u001b[0m in \u001b[0;36mn_step_lstm_base\u001b[0;34m(n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx_in\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mw_in\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 479\u001b[0m raise ValueError('Inconsistent input size in input values and weight '\n\u001b[0;32m--> 480\u001b[0;31m 'parameters: {} != {}'.format(x_in, w_in))\n\u001b[0m\u001b[1;32m 481\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0mxp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbackend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_array_module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Inconsistent input size in input values and weight parameters: 3 != 4"
]
}
],
"source": [
"ws_ng = [[np.ones((input_size, hidden_size), np.float32) for _ in range(4)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(4)]\n",
" for _ in range(2*n_layers)]\n",
"F.n_step_bilstm(n_layers, 0.0, hx, cx, ws_ng, bs, xs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## n_step_gru"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"n_layers = 1\n",
"batch_size = 2\n",
"input_size = 3\n",
"hidden_size = 4\n",
"seq_length = 5\n",
"\n",
"hx = np.ones((n_layers, batch_size, hidden_size), np.float32)\n",
"bs = [[np.ones(hidden_size, np.float32) for _ in range(6)]]\n",
"xs = [np.ones((batch_size, input_size), np.float32) for _ in range(seq_length)]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(variable([[[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]]]), (variable([[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]]), variable([[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]]), variable([[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]]), variable([[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]]), variable([[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]])))"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws_ok = [[np.ones((hidden_size, input_size), np.float32) for _ in range(3)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(3)]]\n",
"F.n_step_gru(n_layers, 0.0, hx, ws_ok, bs, xs)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"ename": "InvalidType",
"evalue": "\nInvalid operation is performed in: LinearFunction (Forward)\n\nExpect: x.shape[1] == W.shape[1]\nActual: 3 != 4",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mInvalidType\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-16-8a80f63d09fe>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m ws_ng = [[np.ones((input_size, hidden_size), np.float32) for _ in range(3)] +\n\u001b[1;32m 2\u001b[0m [np.ones((hidden_size, hidden_size), np.float32) for _ in range(3)]]\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_step_gru\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws_ng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_gru.py\u001b[0m in \u001b[0;36mn_step_gru\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, **kwargs)\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 174\u001b[0m return n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs,\n\u001b[0;32m--> 175\u001b[0;31m use_bi_direction=False, **kwargs)\n\u001b[0m\u001b[1;32m 176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_gru.py\u001b[0m in \u001b[0;36mn_step_gru_base\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs)\u001b[0m\n\u001b[1;32m 386\u001b[0m hy, _, ys = n_step_rnn.n_step_rnn_impl(\n\u001b[1;32m 387\u001b[0m \u001b[0m_gru\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdropout_ratio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m use_bi_direction)\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mhy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36mn_step_rnn_impl\u001b[0;34m(f, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdirection\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 810\u001b[0m h, c, h_forward = _one_directional_loop(\n\u001b[0;32m--> 811\u001b[0;31m f, xs, hx[idx], cx[idx], ws[idx], bs[idx])\n\u001b[0m\u001b[1;32m 812\u001b[0m \u001b[0mhy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 813\u001b[0m \u001b[0mcy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36m_one_directional_loop\u001b[0;34m(f, xs, h, c, w, b)\u001b[0m\n\u001b[1;32m 851\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc_rest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msplit_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 852\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 853\u001b[0;31m \u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 854\u001b[0m \u001b[0mh_list\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 855\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_gru.py\u001b[0m in \u001b[0;36m_gru\u001b[0;34m(x, h, c, w, b)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0mhb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconcat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 398\u001b[0;31m \u001b[0mgru_x\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinear\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 399\u001b[0m \u001b[0mgru_h\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinear\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/connection/linear.py\u001b[0m in \u001b[0;36mlinear\u001b[0;34m(x, W, b, n_batch_axes)\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mW\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 308\u001b[0;31m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLinearFunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 309\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mn_batch_axes\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_shape\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/function_node.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconfiguration\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype_check\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 307\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_data_type_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0min_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 308\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_layout_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_vars\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/function_node.py\u001b[0m in \u001b[0;36m_check_data_type_forward\u001b[0;34m(self, in_data)\u001b[0m\n\u001b[1;32m 453\u001b[0m in_data, 'in_types', False, shapes=in_shapes)\n\u001b[1;32m 454\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtype_check\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_function_check_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 455\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_type_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0min_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 456\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcheck_type_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0min_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/connection/linear.py\u001b[0m in \u001b[0;36mcheck_type_forward\u001b[0;34m(self, in_types)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mx_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mw_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0mx_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mw_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 30\u001b[0m )\n\u001b[1;32m 31\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype_check\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_in\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/utils/type_check.py\u001b[0m in \u001b[0;36mexpect\u001b[0;34m(*bool_exprs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mexpr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbool_exprs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 563\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTestable\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 564\u001b[0;31m \u001b[0mexpr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 565\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/utils/type_check.py\u001b[0m in \u001b[0;36mexpect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 495\u001b[0m raise InvalidType(\n\u001b[1;32m 496\u001b[0m \u001b[0;34m'{0} {1} {2}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlhs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrhs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 497\u001b[0;31m '{0} {1} {2}'.format(left, self.inv, right))\n\u001b[0m\u001b[1;32m 498\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 499\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mInvalidType\u001b[0m: \nInvalid operation is performed in: LinearFunction (Forward)\n\nExpect: x.shape[1] == W.shape[1]\nActual: 3 != 4"
]
}
],
"source": [
"ws_ng = [[np.ones((input_size, hidden_size), np.float32) for _ in range(3)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(3)]]\n",
"F.n_step_gru(n_layers, 0.0, hx, ws_ng, bs, xs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## n_step_bigru"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"n_layers = 1\n",
"batch_size = 2\n",
"input_size = 3\n",
"hidden_size = 4\n",
"seq_length = 5\n",
"\n",
"hx = np.ones((2*n_layers, batch_size, hidden_size), np.float32)\n",
"bs = [[np.ones(hidden_size, np.float32) for _ in range(6)]\n",
" for _ in range(2*n_layers)]\n",
"xs = [np.ones((batch_size, input_size), np.float32) for _ in range(seq_length)]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(variable([[[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]],\n",
" \n",
" [[1., 1., 1., 1.],\n",
" [1., 1., 1., 1.]]]), (variable([[1., 1., 1., 1., 1., 1., 1., 1.],\n",
" [1., 1., 1., 1., 1., 1., 1., 1.]]),\n",
" variable([[1., 1., 1., 1., 1., 1., 1., 1.],\n",
" [1., 1., 1., 1., 1., 1., 1., 1.]]),\n",
" variable([[1., 1., 1., 1., 1., 1., 1., 1.],\n",
" [1., 1., 1., 1., 1., 1., 1., 1.]]),\n",
" variable([[1., 1., 1., 1., 1., 1., 1., 1.],\n",
" [1., 1., 1., 1., 1., 1., 1., 1.]]),\n",
" variable([[1., 1., 1., 1., 1., 1., 1., 1.],\n",
" [1., 1., 1., 1., 1., 1., 1., 1.]])))"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws_ok = [[np.ones((hidden_size, input_size), np.float32) for _ in range(3)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(3)]\n",
" for _ in range(2*n_layers)]\n",
"F.n_step_bigru(n_layers, 0.0, hx, ws_ok, bs, xs)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"ename": "InvalidType",
"evalue": "\nInvalid operation is performed in: LinearFunction (Forward)\n\nExpect: x.shape[1] == W.shape[1]\nActual: 3 != 4",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mInvalidType\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-19-58e41de64281>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m for _ in range(2*n_layers)]\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_step_bigru\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws_ng\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_gru.py\u001b[0m in \u001b[0;36mn_step_bigru\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, **kwargs)\u001b[0m\n\u001b[1;32m 270\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 271\u001b[0m return n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs,\n\u001b[0;32m--> 272\u001b[0;31m use_bi_direction=True, **kwargs)\n\u001b[0m\u001b[1;32m 273\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_gru.py\u001b[0m in \u001b[0;36mn_step_gru_base\u001b[0;34m(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs)\u001b[0m\n\u001b[1;32m 386\u001b[0m hy, _, ys = n_step_rnn.n_step_rnn_impl(\n\u001b[1;32m 387\u001b[0m \u001b[0m_gru\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_layers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdropout_ratio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mws\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m use_bi_direction)\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mhy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36mn_step_rnn_impl\u001b[0;34m(f, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdirection\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 810\u001b[0m h, c, h_forward = _one_directional_loop(\n\u001b[0;32m--> 811\u001b[0;31m f, xs, hx[idx], cx[idx], ws[idx], bs[idx])\n\u001b[0m\u001b[1;32m 812\u001b[0m \u001b[0mhy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 813\u001b[0m \u001b[0mcy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_rnn.py\u001b[0m in \u001b[0;36m_one_directional_loop\u001b[0;34m(f, xs, h, c, w, b)\u001b[0m\n\u001b[1;32m 851\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc_rest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msplit_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 852\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 853\u001b[0;31m \u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 854\u001b[0m \u001b[0mh_list\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 855\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/rnn/n_step_gru.py\u001b[0m in \u001b[0;36m_gru\u001b[0;34m(x, h, c, w, b)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0mhb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconcat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 398\u001b[0;31m \u001b[0mgru_x\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinear\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 399\u001b[0m \u001b[0mgru_h\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinear\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/connection/linear.py\u001b[0m in \u001b[0;36mlinear\u001b[0;34m(x, W, b, n_batch_axes)\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mW\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 308\u001b[0;31m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLinearFunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 309\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mn_batch_axes\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_shape\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/function_node.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconfiguration\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype_check\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 307\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_data_type_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0min_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 308\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_layout_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_vars\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/function_node.py\u001b[0m in \u001b[0;36m_check_data_type_forward\u001b[0;34m(self, in_data)\u001b[0m\n\u001b[1;32m 453\u001b[0m in_data, 'in_types', False, shapes=in_shapes)\n\u001b[1;32m 454\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtype_check\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_function_check_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 455\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_type_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0min_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 456\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcheck_type_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0min_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/functions/connection/linear.py\u001b[0m in \u001b[0;36mcheck_type_forward\u001b[0;34m(self, in_types)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mx_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mw_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0mx_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mw_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 30\u001b[0m )\n\u001b[1;32m 31\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype_check\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_in\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/utils/type_check.py\u001b[0m in \u001b[0;36mexpect\u001b[0;34m(*bool_exprs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mexpr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbool_exprs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 563\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTestable\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 564\u001b[0;31m \u001b[0mexpr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 565\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/chainer/chainer/utils/type_check.py\u001b[0m in \u001b[0;36mexpect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 495\u001b[0m raise InvalidType(\n\u001b[1;32m 496\u001b[0m \u001b[0;34m'{0} {1} {2}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlhs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrhs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 497\u001b[0;31m '{0} {1} {2}'.format(left, self.inv, right))\n\u001b[0m\u001b[1;32m 498\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 499\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mInvalidType\u001b[0m: \nInvalid operation is performed in: LinearFunction (Forward)\n\nExpect: x.shape[1] == W.shape[1]\nActual: 3 != 4"
]
}
],
"source": [
"ws_ng = [[np.ones((input_size, hidden_size), np.float32) for _ in range(3)] +\n",
" [np.ones((hidden_size, hidden_size), np.float32) for _ in range(3)]\n",
" for _ in range(2*n_layers)]\n",
"F.n_step_bigru(n_layers, 0.0, hx, ws_ng, bs, xs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment