Last active
April 29, 2017 08:54
-
-
Save lispc/27c9f4fe935abade5de90c18276e1742 to your computer and use it in GitHub Desktop.
Tensorflow FIFOQueue bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import os | |
os.environ["CUDA_VISIBLE_DEVICES"]='' | |
import time | |
import threading | |
import numpy as np | |
import tensorflow as tf | |
queue_size = 10 | |
queue_shape = [6, 200, 200, 3] | |
cluster_spec_config = {'train': ['localhost:22222', 'localhost:23333']} | |
queue_loc = '/job:train/task:0/cpu:0' | |
def run_pop(): | |
with tf.device(queue_loc): | |
queue = tf.FIFOQueue(10, tf.uint8, queue_shape, name='queue', shared_name='shared_queue') | |
sess = tf.Session('grpc://localhost:22222') | |
all_data = [] | |
for i in range(5): | |
data = queue.dequeue() | |
all_data.append(tf.Print(data, [data], 'data:')) | |
pop_op = tf.group(*all_data) | |
while True: | |
print('running pop_op') | |
sess.run(pop_op) | |
def run_push(): | |
with tf.device(queue_loc): | |
queue = tf.FIFOQueue(10, tf.uint8, queue_shape, name='queue', shared_name='shared_queue') | |
sess = tf.Session('grpc://localhost:23333') | |
sess.run(tf.global_variables_initializer()) | |
place_holder = tf.placeholder(tf.uint8, queue_shape, 'place_holder') | |
enqueue_op = queue.enqueue(place_holder) | |
idx = 0 | |
while True: | |
time.sleep(5) | |
print('pushing') | |
for i in range(4): | |
sess.run(enqueue_op, feed_dict={place_holder: np.full(queue_shape, idx, np.uint8)}) | |
idx += 1 | |
def test_dequeue(): | |
cluster_spec = tf.train.ClusterSpec(cluster_spec_config) | |
server0 = tf.train.Server(cluster_spec, job_name='train', task_index=0) | |
server1 = tf.train.Server(cluster_spec, job_name='train', task_index=1) | |
# put queue on server1, pop from server0 | |
t = threading.Thread(target=run_push) | |
t.setDaemon(True) | |
t.start() | |
time.sleep(20) | |
run_pop() | |
def main(): | |
test_dequeue() | |
if __name__ == '__main__': | |
main() | |
''' | |
Log: | |
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcublas.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcudnn.so.5 locally | |
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcufft.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcuda.so.1 locally | |
I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcurand.so.8.0 locally | |
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine and could speed up CPU computations. | |
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations. | |
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations. | |
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations. | |
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations. | |
W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations. | |
E tensorflow/stream_executor/cuda/cuda_driver.cc:509] failed call to cuInit: CUDA_ERROR_NO_DEVICE | |
I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:158] retrieving CUDA diagnostic information for host: ficusmexico | |
I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:165] hostname: ficusmexico | |
I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:189] libcuda reported version is: 367.57.0 | |
I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:363] driver version file contents: """NVRM version: NVIDIA UNIX x86_64 Kernel Module 367.57 Mon Oct 3 20:37:01 PDT 2016 | |
GCC version: gcc version 4.9.4 (Ubuntu 4.9.4-2ubuntu1~14.04.1) | |
""" | |
I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:193] kernel reported version is: 367.57.0 | |
I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:300] kernel version seems to match DSO: 367.57.0 | |
I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:200] Initialize GrpcChannelCache for job train -> {0 -> localhost:22222, 1 -> localhost:23333} | |
I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:221] Started server with target: grpc://localhost:22222 | |
I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:200] Initialize GrpcChannelCache for job train -> {0 -> localhost:22222, 1 -> localhost:23333} | |
I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:221] Started server with target: grpc://localhost:23333 | |
I tensorflow/core/distributed_runtime/master_session.cc:1012] Start master session 7796303ae7a69d4d with config: | |
pushing | |
pushing | |
pushing | |
running pop_op | |
I tensorflow/core/distributed_runtime/master_session.cc:1012] Start master session ec298cfbc9003ece with config: | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[0 0 0]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[0 0 0]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[0 0 0]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[0 0 0]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[0 0 0]]]...] | |
running pop_op | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[1 1 1]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[1 1 1]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[1 1 1]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[1 1 1]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[1 1 1]]]...] | |
running pop_op | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[2 2 2]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[2 2 2]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[2 2 2]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[2 2 2]]]...] | |
I tensorflow/core/kernels/logging_ops.cc:79] data:[[[[2 2 2]]]...] | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment