Skip to content

Instantly share code, notes, and snippets.

@albertz
Created October 1, 2021 18:56
Show Gist options
  • Save albertz/21e00a500e41eb0c8d27a8519e763f0e to your computer and use it in GitHub Desktop.
Save albertz/21e00a500e41eb0c8d27a8519e763f0e to your computer and use it in GitHub Desktop.
from returnn.config import Config
from returnn.tf.engine import Engine
import sys
sys.path.append("tests")
from test_TFNetworkLayer import make_feed_dict
n_in = 40
model_filename = "test-703.model.001"
def make_net_dict():
return {
"output": {
"class": "conv", "from": "data",
"filter_size": [3,3], "padding": "same",
"n_out": 32, "activation": None, "with_bias": True
}
}
config = Config({
"extern_data": {"data": {"shape": (None, n_in, 1)}},
"task": "train",
"network": make_net_dict(),
})
engine = Engine(config=config)
engine.init_train_from_config()
engine.save_model(model_filename)
config.typed_dict["extern_data"]["data"]["shape"] = (None, 1, n_in)
config.typed_dict["task"] = "eval"
config.typed_dict["load"] = model_filename
engine = Engine(config=config)
engine.init_network_from_config()
net = engine.network
out = net.get_layer("output").output
engine.tf_session.run(out.placeholder, feed_dict=make_feed_dict(net.extern_data))
@albertz
Copy link
Author

albertz commented Oct 1, 2021

Note, this TF code:

import tensorflow as tf
import numpy

tf.compat.v1.disable_eager_execution()


n_in = 1
n_out = 32
filter_size = (3, 3)
filter_shape = list(filter_size) + [n_in, n_out]


with tf.Graph().as_default() as graph:
    with tf.compat.v1.Session(graph=graph) as session:
        x = tf.compat.v1.placeholder(tf.float32, (None, None, 1, 40))  # [B,T,1,40]
        filters = tf.compat.v1.get_variable(name="W", shape=filter_shape)
        y = tf.compat.v1.nn.convolution(x, filter=filters, padding="SAME")

        session.run(y, feed_dict={x: numpy.zeros((3, 4, 1, 40))})

Produces a similar error as the CPU error:

tensorflow.python.framework.errors_impl.InvalidArgumentError: Depth of output (32) is not a multiple of the number of groups (40) for '{{node convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](Placeholder, convolution/ReadVariableOp)' with input shapes: [?,?,1,40], [3,3,1,32].

@albertz
Copy link
Author

albertz commented Oct 1, 2021

This code produces the same convolution error on GPU:



import tensorflow as tf
import numpy

tf.compat.v1.disable_eager_execution()


with tf.Graph().as_default() as graph:
    with tf.compat.v1.Session(graph=graph) as session:
        x = tf.compat.v1.placeholder(tf.float32, (None, None, 1, 40))  # [B,T,1,40]
        filters = tf.compat.v1.placeholder(tf.float32, (3, 3, None, 32))
        y = tf.compat.v1.nn.convolution(x, filter=filters, padding="SAME")

        session.run(
            y,
            feed_dict={
                x: numpy.zeros((3, 4, 1, 40)),
                filters: numpy.zeros((3, 3, 1, 32)),
                })

Error:

2021-10-01 23:05:27.951528: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6173 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070, pci bus id:0000:09:00.0, compute capability: 7.5
2021-10-01 23:05:28.331213: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8204
2021-10-01 23:05:28.866860: W tensorflow/core/framework/op_kernel.cc:1692] OP_REQUIRES failed at conv_ops.cc:1276 : Not found: No algorithm worked!
Traceback (most recent call last):
  File "/home/az/.local/lib/python3.8/site-packages/tensorflow/python/client/session.py", line 1375, in _do_call
    return fn(*args)
  File "/home/az/.local/lib/python3.8/site-packages/tensorflow/python/client/session.py", line 1359, in _run_fn
    return self._call_tf_sessionrun(options, feed_dict, fetch_list,
  File "/home/az/.local/lib/python3.8/site-packages/tensorflow/python/client/session.py", line 1451, in _call_tf_sessionrun
    return tf_session.TF_SessionRun_wrapper(self._session, options, feed_dict,
tensorflow.python.framework.errors_impl.NotFoundError: 2 root error(s) found.
  (0) Not found: No algorithm worked!
         [[{{node convolution}}]]
  (1) Not found: No algorithm worked!
         [[{{node convolution}}]]
         [[convolution/_5]]
0 successful operations.
0 derived errors ignored.

@albertz
Copy link
Author

albertz commented Oct 1, 2021

So I reported this here: tensorflow/tensorflow#52223

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment