Last active
November 30, 2016 15:19
-
-
Save hailiang-wang/ede650487bcacc99d0b86c9488796e73 to your computer and use it in GitHub Desktop.
TensorFlow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcublas.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcudnn.so.5 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcufft.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcuda.so.1 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcurand.so.8.0 locally | |
INFO:tensorflow:Using config: {'cluster_spec': None, 'master': '', '_job_name': None, 'tf_random_seed': None, 'task': 0, 'keep_checkpoint_max': 5, '_is_chief': True, 'tf_config': gpu_options { | |
per_process_gpu_memory_fraction: 1 | |
} | |
, 'save_checkpoints_secs': 600, 'evaluation_master': '', 'num_ps_replicas': 0, 'keep_checkpoint_every_n_hours': 10000, 'save_summary_steps': 100} | |
WARNING:tensorflow:parser_num_threads is deprecated, it will be removed onSept 3 2016 | |
INFO:tensorflow:Setting feature info to {'utterance': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'context_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False), 'context': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'utterance_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False)} | |
INFO:tensorflow:Setting targets info to TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False) | |
INFO:tensorflow:Loading Vocab embeddings... | |
INFO:tensorflow:Start to Load Vocab. | |
INFO:tensorflow:Load Vocab Successfully. | |
INFO:tensorflow:Loading Glove embeddings... | |
INFO:tensorflow:Start to Load Glove. | |
INFO:tensorflow:Found 0 out of 400000 vectors in Glove | |
INFO:tensorflow:Load Glove Successfully. | |
INFO:tensorflow:Create CheckpointSaverHook | |
I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:951] Found device 0 with properties: | |
name: GRID K520 | |
major: 3 minor: 0 memoryClockRate (GHz) 0.797 | |
pciBusID 0000:00:03.0 | |
Total memory: 3.94GiB | |
Free memory: 3.91GiB | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:972] DMA: 0 | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] 0: Y | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:1041] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) | |
INFO:tensorflow:loss = 0.692882, step = 1 | |
INFO:tensorflow:Saving checkpoints for 1 into /home/ubuntu/git/deeplearning/network-model/runs/1479904178/model.ckpt. | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:245] PoolAllocator: After 8187 get requests, put_count=7491 evicted_count=1000 eviction_rate=0.133494 and unsatisfied allocation rate=0.219372 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:257] Raising pool_size_limit_ from 100 to 110 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:245] PoolAllocator: After 9014 get requests, put_count=8651 evicted_count=1000 eviction_rate=0.115594 and unsatisfied allocation rate=0.153761 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:257] Raising pool_size_limit_ from 256 to 281 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:245] PoolAllocator: After 14660 get requests, put_count=14848 evicted_count=1000 eviction_rate=0.0673491 and unsatisfied allocation rate=0.0594134 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:257] Raising pool_size_limit_ from 655 to 720 | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcublas.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcudnn.so.5 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcufft.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcuda.so.1 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcurand.so.8.0 locally | |
INFO:tensorflow:Using config: {'keep_checkpoint_every_n_hours': 10000, '_job_name': None, '_is_chief': True, 'cluster_spec': None, 'save_checkpoints_secs': 600, 'num_ps_replicas': 0, 'tf_random_seed': None, 'save_summary_steps': 100, 'tf_config': gpu_options { | |
per_process_gpu_memory_fraction: 1 | |
} | |
, 'evaluation_master': '', 'task': 0, 'keep_checkpoint_max': 5, 'master': ''} | |
WARNING:tensorflow:parser_num_threads is deprecated, it will be removed onSept 3 2016 | |
INFO:tensorflow:Setting feature info to {'context_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False), 'utterance': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'context': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'utterance_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False)} | |
INFO:tensorflow:Setting targets info to TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False) | |
INFO:tensorflow:Loading Vocab embeddings... | |
INFO:tensorflow:Start to Load Vocab. | |
INFO:tensorflow:loss = 0.747786, step = 101 | |
INFO:tensorflow:Load Vocab Successfully. | |
INFO:tensorflow:Loading Glove embeddings... | |
INFO:tensorflow:Start to Load Glove. | |
INFO:tensorflow:Found 0 out of 400000 vectors in Glove | |
INFO:tensorflow:Load Glove Successfully. | |
INFO:tensorflow:Create CheckpointSaverHook | |
I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:951] Found device 0 with properties: | |
name: GRID K520 | |
major: 3 minor: 0 memoryClockRate (GHz) 0.797 | |
pciBusID 0000:00:03.0 | |
Total memory: 3.94GiB | |
Free memory: 219.00MiB | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:972] DMA: 0 | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] 0: Y | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:1041] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (256): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (512): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (1024): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (2048): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (4096): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (8192): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (16384): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (32768): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (65536): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (131072): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (262144): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (524288): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (1048576): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (2097152): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (4194304): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (8388608): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (16777216): Total Chunks: 1, Chunks in use: 0 18.99MiB allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (33554432): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (67108864): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (134217728): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (268435456): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:656] Bin for 34.95MiB was 32.00MiB, Chunk State: | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702480000 of size 1280 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702480500 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702480600 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702480700 of size 4096 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481700 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481800 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481900 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481a00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481b00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481c00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481d00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x702481e00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:683] Free at 0x702481f00 of size 19915008 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:689] Summary of in-use Chunks by size: | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 10 Chunks of size 256 totalling 2.5KiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 1280 totalling 1.2KiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 4096 totalling 4.0KiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:696] Sum Total of in-use chunks: 7.8KiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:698] Stats: | |
Limit: 19922944 | |
InUse: 7936 | |
MaxInUse: 7936 | |
NumAllocs: 12 | |
MaxAllocSize: 4096 | |
W tensorflow/core/common_runtime/bfc_allocator.cc:270] *___________________________________________________________________________________________________ | |
W tensorflow/core/common_runtime/bfc_allocator.cc:271] Ran out of memory trying to allocate 34.95MiB. See logs for memory state. | |
W tensorflow/core/framework/op_kernel.cc:958] Internal: Dst tensor is not initialized. | |
E tensorflow/core/common_runtime/executor.cc:334] Executor failed to create kernel. Internal: Dst tensor is not initialized. | |
[[Node: OptimizeLoss/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [91620,100] values: 0 0 0...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]] | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 972, in _do_call | |
return fn(*args) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 954, in _run_fn | |
status, run_metadata) | |
File "/usr/lib/python3.4/contextlib.py", line 66, in __exit__ | |
next(self.gen) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/errors.py", line 463, in raise_exception_on_not_ok_status | |
pywrap_tensorflow.TF_GetCode(status)) | |
tensorflow.python.framework.errors.InternalError: Dst tensor is not initialized. | |
[[Node: OptimizeLoss/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [91620,100] values: 0 0 0...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]] | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "udc_train.py", line 70, in <module> | |
tf.app.run() | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/platform/app.py", line 30, in run | |
sys.exit(main(sys.argv[:1] + flags_passthrough)) | |
File "udc_train.py", line 67, in main | |
estimator.fit(input_fn=input_fn_train, steps=None, monitors=[eval_monitor]) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 333, in fit | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 708, in _train_model | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 281, in _monitored_train | |
hooks=all_hooks) as super_sess: | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 342, in __init__ | |
self._sess = _RecoverableSession(self._coordinated_creator) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 511, in __init__ | |
_WrappedSession.__init__(self, self._sess_creator.create_session()) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 400, in create_session | |
self.tf_sess = self._session_creator.create_session() | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 253, in create_session | |
init_fn=self._scaffold.init_fn) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/session_manager.py", line 233, in prepare_session | |
sess.run(init_op, feed_dict=init_feed_dict) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 717, in run | |
run_metadata_ptr) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 915, in _run | |
feed_dict_string, options, run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 965, in _do_run | |
target_list, options, run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 985, in _do_call | |
raise type(e)(node_def, op, message) | |
tensorflow.python.framework.errors.InternalError: Dst tensor is not initialized. | |
[[Node: OptimizeLoss/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [91620,100] values: 0 0 0...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]] | |
Caused by op 'OptimizeLoss/zeros', defined at: | |
File "udc_train.py", line 70, in <module> | |
tf.app.run() | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/platform/app.py", line 30, in run | |
sys.exit(main(sys.argv[:1] + flags_passthrough)) | |
File "udc_train.py", line 67, in main | |
estimator.fit(input_fn=input_fn_train, steps=None, monitors=[eval_monitor]) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 333, in fit | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 662, in _train_model | |
train_op, loss_op = self._get_train_ops(features, targets) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 963, in _get_train_ops | |
_, loss, train_op = self._call_model_fn(features, targets, ModeKeys.TRAIN) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 946, in _call_model_fn | |
return self._model_fn(features, targets, mode=mode) | |
File "/home/ubuntu/git/deeplearning/network-model/udc_model.py", line 40, in model_fn | |
train_op = create_train_op(loss, hparams) | |
File "/home/ubuntu/git/deeplearning/network-model/udc_model.py", line 16, in create_train_op | |
optimizer=hparams.optimizer) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/layers/python/layers/optimizers.py", line 224, in optimize_loss | |
name="train") | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/optimizer.py", line 311, in apply_gradients | |
self._create_slots(var_list) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/adam.py", line 119, in _create_slots | |
self._zeros_slot(v, "m", self._name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/optimizer.py", line 505, in _zeros_slot | |
named_slots[var] = slot_creator.create_zeros_slot(var, op_name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/slot_creator.py", line 106, in create_zeros_slot | |
val = array_ops.zeros(primary.get_shape().as_list(), dtype=dtype) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/array_ops.py", line 1182, in zeros | |
output = constant(zero, shape=shape, dtype=dtype, name=name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/constant_op.py", line 167, in constant | |
attrs={"value": tensor_value, "dtype": dtype_value}, name=name).outputs[0] | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 2380, in create_op | |
original_op=self._default_original_op, op_def=op_def) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 1298, in __init__ | |
self._traceback = _extract_stack() | |
InternalError (see above for traceback): Dst tensor is not initialized. | |
[[Node: OptimizeLoss/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [91620,100] values: 0 0 0...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]] | |
INFO:tensorflow:loss = 0.683966, step = 201 | |
INFO:tensorflow:loss = 0.683804, step = 301 | |
INFO:tensorflow:loss = 0.683264, step = 401 | |
INFO:tensorflow:loss = 0.785771, step = 501 | |
INFO:tensorflow:loss = 0.692336, step = 601 | |
INFO:tensorflow:loss = 0.646901, step = 701 | |
INFO:tensorflow:loss = 0.590936, step = 801 | |
INFO:tensorflow:loss = 0.578003, step = 901 | |
INFO:tensorflow:loss = 0.563656, step = 1001 | |
INFO:tensorflow:loss = 0.620312, step = 1101 | |
INFO:tensorflow:loss = 0.58938, step = 1201 | |
INFO:tensorflow:loss = 0.551057, step = 1301 | |
INFO:tensorflow:loss = 0.509638, step = 1401 | |
INFO:tensorflow:loss = 0.523488, step = 1501 | |
INFO:tensorflow:loss = 0.585533, step = 1601 | |
INFO:tensorflow:loss = 0.490285, step = 1701 | |
INFO:tensorflow:loss = 0.505317, step = 1801 | |
INFO:tensorflow:loss = 0.525044, step = 1901 | |
INFO:tensorflow:Saving checkpoints for 1974 into /home/ubuntu/git/deeplearning/network-model/runs/1479904178/model.ckpt. | |
WARNING:tensorflow:parser_num_threads is deprecated, it will be removed onSept 3 2016 | |
WARNING:tensorflow:Given features: {'distractor_4_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:11' shape=(?, 1) dtype=int64>, 'distractor_8': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:18' shape=(?, 160) dtype=int64>, 'distractor_0_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:3' shape=(?, 1) dtype=int64>, 'distractor_2': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:6' shape=(?, 160) dtype=int64>, 'utterance_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:21' shape=(?, 1) dtype=int64>, 'utterance': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:20' shape=(?, 160) dtype=int64>, 'distractor_1': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:4' shape=(?, 160) dtype=int64>, 'distractor_0': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:2' shape=(?, 160) dtype=int64>, 'context_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:1' shape=(?, 1) dtype=int64>, 'context': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:0' shape=(?, 160) dtype=int64>, 'distractor_6': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:14' shape=(?, 160) dtype=int64>, 'distractor_6_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:15' shape=(?, 1) dtype=int64>, 'distractor_3_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:9' shape=(?, 1) dtype=int64>, 'distractor_3': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:8' shape=(?, 160) dtype=int64>, 'distractor_5': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:12' shape=(?, 160) dtype=int64>, 'distractor_7_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:17' shape=(?, 1) dtype=int64>, 'distractor_2_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:7' shape=(?, 1) dtype=int64>, 'distractor_1_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:5' shape=(?, 1) dtype=int64>, 'distractor_7': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:16' shape=(?, 160) dtype=int64>, 'distractor_8_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:19' shape=(?, 1) dtype=int64>, 'distractor_4': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:10' shape=(?, 160) dtype=int64>, 'distractor_5_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:13' shape=(?, 1) dtype=int64>}, required signatures: {'utterance': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'context_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False), 'context': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'utterance_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False)}. | |
WARNING:tensorflow:Given targets: Tensor("zeros:0", shape=(16, 1), dtype=int64), required signatures: TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False). | |
INFO:tensorflow:Loading Vocab embeddings... | |
INFO:tensorflow:Start to Load Vocab. | |
INFO:tensorflow:Load Vocab Successfully. | |
INFO:tensorflow:Loading Glove embeddings... | |
INFO:tensorflow:Start to Load Glove. | |
INFO:tensorflow:Found 0 out of 400000 vectors in Glove | |
INFO:tensorflow:Load Glove Successfully. | |
WARNING:tensorflow:Please specify metrics using MetricSpec. Using bare functions or (key, fn) tuples is deprecated and support for it will be removed on Oct 1, 2016. | |
WARNING:tensorflow:Please specify metrics using MetricSpec. Using bare functions or (key, fn) tuples is deprecated and support for it will be removed on Oct 1, 2016. | |
WARNING:tensorflow:Please specify metrics using MetricSpec. Using bare functions or (key, fn) tuples is deprecated and support for it will be removed on Oct 1, 2016. | |
WARNING:tensorflow:Please specify metrics using MetricSpec. Using bare functions or (key, fn) tuples is deprecated and support for it will be removed on Oct 1, 2016. | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:1041] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) | |
INFO:tensorflow:Restored model from /home/ubuntu/git/deeplearning/network-model/runs/1479904178 | |
W tensorflow/core/framework/op_kernel.cc:968] Out of range: Reached limit of 1 | |
[[Node: read_batch_features_eval/file_name_queue/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@read_batch_features_eval/file_name_queue/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](read_batch_features_eval/file_name_queue/limit_epochs/epochs)]] | |
W tensorflow/core/framework/op_kernel.cc:968] Out of range: Reached limit of 1 | |
[[Node: read_batch_features_eval/file_name_queue/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@read_batch_features_eval/file_name_queue/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](read_batch_features_eval/file_name_queue/limit_epochs/epochs)]] | |
INFO:tensorflow:Eval steps [0,inf) for training step 1974. | |
INFO:tensorflow:Results after 10 steps (0.124 sec/batch): recall_at_5 = 0.58125, loss = 0.873905, recall_at_2 = 0.3, recall_at_1 = 0.15, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 20 steps (0.120 sec/batch): recall_at_5 = 0.575, loss = 0.87269, recall_at_2 = 0.284375, recall_at_1 = 0.171875, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 30 steps (0.120 sec/batch): recall_at_5 = 0.595833333333, loss = 0.86121, recall_at_2 = 0.2875, recall_at_1 = 0.185416666667, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 40 steps (0.121 sec/batch): recall_at_5 = 0.6171875, loss = 0.876507, recall_at_2 = 0.2984375, recall_at_1 = 0.18125, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 50 steps (0.121 sec/batch): recall_at_5 = 0.61375, loss = 0.869931, recall_at_2 = 0.3025, recall_at_1 = 0.175, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 60 steps (0.120 sec/batch): recall_at_5 = 0.613541666667, loss = 0.878188, recall_at_2 = 0.302083333333, recall_at_1 = 0.173958333333, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 70 steps (0.120 sec/batch): recall_at_5 = 0.610714285714, loss = 0.882178, recall_at_2 = 0.308928571429, recall_at_1 = 0.16875, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 80 steps (0.120 sec/batch): recall_at_5 = 0.615625, loss = 0.887192, recall_at_2 = 0.315625, recall_at_1 = 0.17421875, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 90 steps (0.120 sec/batch): recall_at_5 = 0.613194444444, loss = 0.885683, recall_at_2 = 0.314583333333, recall_at_1 = 0.175, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 100 steps (0.120 sec/batch): recall_at_5 = 0.61625, loss = 0.884637, recall_at_2 = 0.315625, recall_at_1 = 0.17625, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 110 steps (0.121 sec/batch): recall_at_5 = 0.617613636364, loss = 0.880329, recall_at_2 = 0.317045454545, recall_at_1 = 0.176136363636, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 120 steps (0.120 sec/batch): recall_at_5 = 0.622395833333, loss = 0.882829, recall_at_2 = 0.315625, recall_at_1 = 0.174479166667, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 130 steps (0.121 sec/batch): recall_at_5 = 0.625480769231, loss = 0.881882, recall_at_2 = 0.318269230769, recall_at_1 = 0.176442307692, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 140 steps (0.121 sec/batch): recall_at_5 = 0.623214285714, loss = 0.879824, recall_at_2 = 0.316071428571, recall_at_1 = 0.173214285714, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 150 steps (0.120 sec/batch): recall_at_5 = 0.622916666667, loss = 0.878012, recall_at_2 = 0.317083333333, recall_at_1 = 0.1725, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 160 steps (0.120 sec/batch): recall_at_5 = 0.621484375, loss = 0.876555, recall_at_2 = 0.316796875, recall_at_1 = 0.173828125, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 170 steps (0.120 sec/batch): recall_at_5 = 0.621323529412, loss = 0.87308, recall_at_2 = 0.319485294118, recall_at_1 = 0.175735294118, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 180 steps (0.121 sec/batch): recall_at_5 = 0.623958333333, loss = 0.876405, recall_at_2 = 0.321527777778, recall_at_1 = 0.178125, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 190 steps (0.121 sec/batch): recall_at_5 = 0.622039473684, loss = 0.877071, recall_at_2 = 0.321052631579, recall_at_1 = 0.178289473684, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 200 steps (0.120 sec/batch): recall_at_5 = 0.621875, loss = 0.878788, recall_at_2 = 0.3209375, recall_at_1 = 0.1790625, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 210 steps (0.120 sec/batch): recall_at_5 = 0.626488095238, loss = 0.877362, recall_at_2 = 0.324107142857, recall_at_1 = 0.184226190476, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 220 steps (0.121 sec/batch): recall_at_5 = 0.629261363636, loss = 0.877072, recall_at_2 = 0.324715909091, recall_at_1 = 0.186079545455, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 230 steps (0.120 sec/batch): recall_at_5 = 0.629891304348, loss = 0.877223, recall_at_2 = 0.325543478261, recall_at_1 = 0.186684782609, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 240 steps (0.121 sec/batch): recall_at_5 = 0.63203125, loss = 0.876842, recall_at_2 = 0.327864583333, recall_at_1 = 0.187760416667, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 250 steps (0.121 sec/batch): recall_at_5 = 0.6315, loss = 0.877677, recall_at_2 = 0.3245, recall_at_1 = 0.1865, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 260 steps (0.121 sec/batch): recall_at_5 = 0.633894230769, loss = 0.87864, recall_at_2 = 0.327884615385, recall_at_1 = 0.187740384615, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 270 steps (0.121 sec/batch): recall_at_5 = 0.635648148148, loss = 0.877764, recall_at_2 = 0.326157407407, recall_at_1 = 0.186342592593, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 280 steps (0.120 sec/batch): recall_at_5 = 0.636830357143, loss = 0.879556, recall_at_2 = 0.326785714286, recall_at_1 = 0.186830357143, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 290 steps (0.121 sec/batch): recall_at_5 = 0.635344827586, loss = 0.87969, recall_at_2 = 0.32650862069, recall_at_1 = 0.186206896552, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 300 steps (0.121 sec/batch): recall_at_5 = 0.634375, loss = 0.879269, recall_at_2 = 0.326041666667, recall_at_1 = 0.185208333333, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 310 steps (0.120 sec/batch): recall_at_5 = 0.635080645161, loss = 0.877863, recall_at_2 = 0.324596774194, recall_at_1 = 0.183870967742, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 320 steps (0.121 sec/batch): recall_at_5 = 0.6353515625, loss = 0.876179, recall_at_2 = 0.324609375, recall_at_1 = 0.1833984375, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 330 steps (0.120 sec/batch): recall_at_5 = 0.635606060606, loss = 0.877592, recall_at_2 = 0.325189393939, recall_at_1 = 0.183522727273, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 340 steps (0.121 sec/batch): recall_at_5 = 0.635661764706, loss = 0.877583, recall_at_2 = 0.325367647059, recall_at_1 = 0.183639705882, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 350 steps (0.120 sec/batch): recall_at_5 = 0.635892857143, loss = 0.876568, recall_at_2 = 0.324821428571, recall_at_1 = 0.183928571429, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 360 steps (0.122 sec/batch): recall_at_5 = 0.635243055556, loss = 0.876571, recall_at_2 = 0.324131944444, recall_at_1 = 0.183506944444, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 370 steps (0.121 sec/batch): recall_at_5 = 0.636655405405, loss = 0.875999, recall_at_2 = 0.32347972973, recall_at_1 = 0.183952702703, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 380 steps (0.120 sec/batch): recall_at_5 = 0.636677631579, loss = 0.875879, recall_at_2 = 0.323848684211, recall_at_1 = 0.184703947368, recall_at_10 = 1.0. | |
INFO:tensorflow:Results after 390 steps (0.120 sec/batch): recall_at_5 = 0.635897435897, loss = 0.875552, recall_at_2 = 0.324198717949, recall_at_1 = 0.184775641026, recall_at_10 = 1.0. | |
W tensorflow/core/framework/op_kernel.cc:968] Invalid argument: Incompatible shapes: [40,1] vs. [160,1] | |
[[Node: prediction/logistic_loss/mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](prediction/Squeeze, prediction/ToFloat)]] | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 972, in _do_call | |
return fn(*args) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 954, in _run_fn | |
status, run_metadata) | |
File "/usr/lib/python3.4/contextlib.py", line 66, in __exit__ | |
next(self.gen) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/errors.py", line 463, in raise_exception_on_not_ok_status | |
pywrap_tensorflow.TF_GetCode(status)) | |
tensorflow.python.framework.errors.InvalidArgumentError: Incompatible shapes: [40,1] vs. [160,1] | |
[[Node: prediction/logistic_loss/mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](prediction/Squeeze, prediction/ToFloat)]] | |
[[Node: concat_5/_93 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_171_concat_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "udc_train.py", line 70, in <module> | |
tf.app.run() | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/platform/app.py", line 30, in run | |
sys.exit(main(sys.argv[:1] + flags_passthrough)) | |
File "udc_train.py", line 67, in main | |
estimator.fit(input_fn=input_fn_train, steps=None, monitors=[eval_monitor]) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 333, in fit | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 708, in _train_model | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 285, in _monitored_train | |
None) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 368, in run | |
run_metadata=run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 521, in run | |
run_metadata=run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 488, in run | |
return self._sess.run(*args, **kwargs) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 625, in run | |
hook in outputs else None)) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 1215, in after_run | |
induce_stop = m.step_end(self._last_step, result) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 411, in step_end | |
return self.every_n_step_end(step, output) | |
File "udc_train.py", line 64, in every_n_step_end | |
steps=None) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 399, in evaluate | |
name=name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 771, in _evaluate_model | |
max_steps=steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 738, in evaluate | |
session.run(update_op, feed_dict=feed_dict) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 717, in run | |
run_metadata_ptr) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 915, in _run | |
feed_dict_string, options, run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 965, in _do_run | |
target_list, options, run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 985, in _do_call | |
raise type(e)(node_def, op, message) | |
tensorflow.python.framework.errors.InvalidArgumentError: Incompatible shapes: [40,1] vs. [160,1] | |
[[Node: prediction/logistic_loss/mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](prediction/Squeeze, prediction/ToFloat)]] | |
[[Node: concat_5/_93 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_171_concat_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] | |
Caused by op 'prediction/logistic_loss/mul', defined at: | |
File "udc_train.py", line 70, in <module> | |
tf.app.run() | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/platform/app.py", line 30, in run | |
sys.exit(main(sys.argv[:1] + flags_passthrough)) | |
File "udc_train.py", line 67, in main | |
estimator.fit(input_fn=input_fn_train, steps=None, monitors=[eval_monitor]) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 333, in fit | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 708, in _train_model | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 285, in _monitored_train | |
None) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 368, in run | |
run_metadata=run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 521, in run | |
run_metadata=run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 488, in run | |
return self._sess.run(*args, **kwargs) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 625, in run | |
hook in outputs else None)) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 1215, in after_run | |
induce_stop = m.step_end(self._last_step, result) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 411, in step_end | |
return self.every_n_step_end(step, output) | |
File "udc_train.py", line 64, in every_n_step_end | |
steps=None) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 399, in evaluate | |
name=name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 760, in _evaluate_model | |
eval_dict = self._get_eval_ops(features, targets, metrics) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 991, in _get_eval_ops | |
predictions, loss, _ = self._call_model_fn(features, targets, ModeKeys.EVAL) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 946, in _call_model_fn | |
return self._model_fn(features, targets, mode=mode) | |
File "/home/ubuntu/git/deeplearning/network-model/udc_model.py", line 83, in model_fn | |
tf.concat(0, all_targets)) | |
File "/home/ubuntu/git/deeplearning/network-model/models/dual_encoder.py", line 83, in dual_encoder_model | |
losses = tf.nn.sigmoid_cross_entropy_with_logits(logits, tf.to_float(targets)) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/nn.py", line 448, in sigmoid_cross_entropy_with_logits | |
return math_ops.add(relu_logits - logits * targets, | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/math_ops.py", line 751, in binary_op_wrapper | |
return func(x, y, name=name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/math_ops.py", line 910, in _mul_dispatch | |
return gen_math_ops.mul(x, y, name=name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1519, in mul | |
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/op_def_library.py", line 749, in apply_op | |
op_def=op_def) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 2380, in create_op | |
original_op=self._default_original_op, op_def=op_def) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 1298, in __init__ | |
self._traceback = _extract_stack() | |
InvalidArgumentError (see above for traceback): Incompatible shapes: [40,1] vs. [160,1] | |
[[Node: prediction/logistic_loss/mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](prediction/Squeeze, prediction/ToFloat)]] | |
[[Node: concat_5/_93 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_171_concat_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcublas.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcudnn.so.5 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcufft.so.8.0 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcuda.so.1 locally | |
I tensorflow/stream_executor/dso_loader.cc:111] successfully opened CUDA library libcurand.so.8.0 locally | |
INFO:tensorflow:Using config: {'_is_chief': True, 'keep_checkpoint_max': 5, 'master': '', 'keep_checkpoint_every_n_hours': 10000, 'save_checkpoints_secs': 600, 'tf_config': gpu_options { | |
per_process_gpu_memory_fraction: 1 | |
} | |
, 'cluster_spec': None, 'evaluation_master': '', 'num_ps_replicas': 0, '_job_name': None, 'task': 0, 'save_summary_steps': 100, 'tf_random_seed': None} | |
WARNING:tensorflow:parser_num_threads is deprecated, it will be removed onSept 3 2016 | |
INFO:tensorflow:Setting feature info to {'utterance_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False), 'context': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'context_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False), 'utterance': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False)} | |
INFO:tensorflow:Setting targets info to TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False) | |
INFO:tensorflow:Loading Vocab embeddings... | |
INFO:tensorflow:Start to Load Vocab. | |
INFO:tensorflow:Load Vocab Successfully. | |
INFO:tensorflow:Loading Glove embeddings... | |
INFO:tensorflow:Start to Load Glove. | |
INFO:tensorflow:Found 2169 out of 5501 vectors in Glove | |
INFO:tensorflow:Load Glove Successfully. | |
INFO:tensorflow:Create CheckpointSaverHook | |
I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:951] Found device 0 with properties: | |
name: GRID K520 | |
major: 3 minor: 0 memoryClockRate (GHz) 0.797 | |
pciBusID 0000:00:03.0 | |
Total memory: 3.94GiB | |
Free memory: 3.91GiB | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:972] DMA: 0 | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] 0: Y | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:1041] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) | |
INFO:tensorflow:loss = 0.691986, step = 1 | |
INFO:tensorflow:Saving checkpoints for 1 into /home/ubuntu/git/deeplearning/network-model/runs/1480511520/model.ckpt. | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:245] PoolAllocator: After 8167 get requests, put_count=7467 evicted_count=1000 eviction_rate=0.133923 and unsatisfied allocation rate=0.220399 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:257] Raising pool_size_limit_ from 100 to 110 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:245] PoolAllocator: After 9053 get requests, put_count=8696 evicted_count=1000 eviction_rate=0.114995 and unsatisfied allocation rate=0.152436 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:257] Raising pool_size_limit_ from 256 to 281 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:245] PoolAllocator: After 14656 get requests, put_count=14839 evicted_count=1000 eviction_rate=0.06739 and unsatisfied allocation rate=0.0597707 | |
I tensorflow/core/common_runtime/gpu/pool_allocator.cc:257] Raising pool_size_limit_ from 655 to 720 | |
INFO:tensorflow:loss = 1.02459, step = 101 | |
INFO:tensorflow:loss = 0.696416, step = 201 | |
INFO:tensorflow:loss = 0.794155, step = 301 | |
INFO:tensorflow:loss = 0.672896, step = 401 | |
INFO:tensorflow:loss = 0.744249, step = 501 | |
INFO:tensorflow:loss = 0.62826, step = 601 | |
INFO:tensorflow:loss = 0.603072, step = 701 | |
INFO:tensorflow:loss = 0.795948, step = 801 | |
INFO:tensorflow:loss = 0.582972, step = 901 | |
INFO:tensorflow:loss = 0.565721, step = 1001 | |
INFO:tensorflow:loss = 0.616399, step = 1101 | |
INFO:tensorflow:loss = 0.62225, step = 1201 | |
INFO:tensorflow:loss = 0.576449, step = 1301 | |
INFO:tensorflow:loss = 0.57228, step = 1401 | |
INFO:tensorflow:loss = 0.432333, step = 1501 | |
INFO:tensorflow:loss = 0.478497, step = 1601 | |
INFO:tensorflow:loss = 0.508757, step = 1701 | |
INFO:tensorflow:loss = 0.562799, step = 1801 | |
INFO:tensorflow:loss = 0.45738, step = 1901 | |
INFO:tensorflow:Saving checkpoints for 1985 into /home/ubuntu/git/deeplearning/network-model/runs/1480511520/model.ckpt. | |
WARNING:tensorflow:parser_num_threads is deprecated, it will be removed onSept 3 2016 | |
WARNING:tensorflow:Given features: {'distractor_0_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:3' shape=(?, 1) dtype=int64>, 'distractor_0': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:2' shape=(?, 160) dtype=int64>, 'utterance': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:20' shape=(?, 160) dtype=int64>, 'distractor_1_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:5' shape=(?, 1) dtype=int64>, 'distractor_4_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:11' shape=(?, 1) dtype=int64>, 'utterance_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:21' shape=(?, 1) dtype=int64>, 'distractor_7_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:17' shape=(?, 1) dtype=int64>, 'context': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:0' shape=(?, 160) dtype=int64>, 'distractor_4': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:10' shape=(?, 160) dtype=int64>, 'distractor_6': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:14' shape=(?, 160) dtype=int64>, 'distractor_3': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:8' shape=(?, 160) dtype=int64>, 'context_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:1' shape=(?, 1) dtype=int64>, 'distractor_1': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:4' shape=(?, 160) dtype=int64>, 'distractor_5_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:13' shape=(?, 1) dtype=int64>, 'distractor_5': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:12' shape=(?, 160) dtype=int64>, 'distractor_6_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:15' shape=(?, 1) dtype=int64>, 'distractor_8': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:18' shape=(?, 160) dtype=int64>, 'distractor_2_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:7' shape=(?, 1) dtype=int64>, 'distractor_8_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:19' shape=(?, 1) dtype=int64>, 'distractor_3_len': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:9' shape=(?, 1) dtype=int64>, 'distractor_2': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:6' shape=(?, 160) dtype=int64>, 'distractor_7': <tf.Tensor 'read_batch_features_eval/fifo_queue_Dequeue:16' shape=(?, 160) dtype=int64>}, required signatures: {'utterance_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False), 'context': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False), 'context_len': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False), 'utterance': TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(160)]), is_sparse=False)}. | |
WARNING:tensorflow:Given targets: Tensor("zeros:0", shape=(16, 1), dtype=int64), required signatures: TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(128), Dimension(1)]), is_sparse=False). | |
INFO:tensorflow:Loading Vocab embeddings... | |
INFO:tensorflow:Start to Load Vocab. | |
INFO:tensorflow:Load Vocab Successfully. | |
INFO:tensorflow:Loading Glove embeddings... | |
INFO:tensorflow:Start to Load Glove. | |
INFO:tensorflow:Found 2169 out of 5501 vectors in Glove | |
INFO:tensorflow:Load Glove Successfully. | |
context_embedded Tensor("embed_context:0", shape=(128, 160, 100), dtype=float32) | |
utterance_embedded Tensor("embed_utterance:0", shape=(128, 160, 100), dtype=float32) | |
encoding_context Tensor("rnn/split:0", shape=(?, 256), dtype=float32) | |
encoding_utterance Tensor("rnn/split:1", shape=(?, 256), dtype=float32) | |
generated_response Tensor("prediction/ExpandDims:0", shape=(?, 256, 1), dtype=float32) | |
context_embedded Tensor("embed_context:0", shape=(?, 160, 100), dtype=float32) | |
utterance_embedded Tensor("embed_utterance:0", shape=(?, 160, 100), dtype=float32) | |
encoding_context Tensor("rnn/split:0", shape=(?, 256), dtype=float32) | |
encoding_utterance Tensor("rnn/split:1", shape=(?, 256), dtype=float32) | |
generated_response Tensor("prediction/ExpandDims:0", shape=(?, 256, 1), dtype=float32) | |
Traceback (most recent call last): | |
File "snap_train.py", line 82, in <module> | |
tf.app.run() | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/platform/app.py", line 30, in run | |
sys.exit(main(sys.argv[:1] + flags_passthrough)) | |
File "snap_train.py", line 79, in main | |
estimator.fit(input_fn=input_fn_train, steps=None, monitors=[eval_monitor]) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 333, in fit | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 708, in _train_model | |
max_steps=max_steps) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 285, in _monitored_train | |
None) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 368, in run | |
run_metadata=run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 521, in run | |
run_metadata=run_metadata) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 488, in run | |
return self._sess.run(*args, **kwargs) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 625, in run | |
hook in outputs else None)) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 1215, in after_run | |
induce_stop = m.step_end(self._last_step, result) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 411, in step_end | |
return self.every_n_step_end(step, output) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 712, in every_n_step_end | |
steps=self.eval_steps, metrics=self.metrics, name=self.name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 399, in evaluate | |
name=name) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 760, in _evaluate_model | |
eval_dict = self._get_eval_ops(features, targets, metrics) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 993, in _get_eval_ops | |
result.update(_make_metrics_ops(metrics, features, targets, predictions)) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 223, in _make_metrics_ops | |
result[name] = metric.create_metric_ops(features, targets, predictions) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/metric_spec.py", line 173, in create_metric_ops | |
prediction = _get_dict('prediction', predictions, self.prediction_key) | |
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/learn/python/learn/metric_spec.py", line 160, in _get_dict | |
name + 's dict, got %s' % dict_or_tensor) | |
ValueError: MetricSpec with prediction_key specified requires predictions dict, got Tensor("concat_5:0", shape=(?, 10), dtype=float32) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
# reset everything to rerun in jupyter | |
tf.reset_default_graph() | |
# config | |
batch_size = 100 | |
learning_rate = 0.5 | |
training_epochs = 5 | |
logs_path = "/tmp/mnist/2" | |
# load mnist data set | |
from tensorflow.examples.tutorials.mnist import input_data | |
mnist = input_data.read_data_sets('MNIST_data', one_hot=True) | |
# input images | |
with tf.name_scope('input'): | |
# None -> batch size can be any size, 784 -> flattened mnist image | |
x = tf.placeholder(tf.float32, shape=[None, 784], name="x-input") | |
# target 10 output classes | |
y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y-input") | |
# model parameters will change during training so we use tf.Variable | |
with tf.name_scope("weights"): | |
W = tf.Variable(tf.zeros([784, 10])) | |
# bias | |
with tf.name_scope("biases"): | |
b = tf.Variable(tf.zeros([10])) | |
# implement model | |
with tf.name_scope("softmax"): | |
# y is our prediction | |
y = tf.nn.softmax(tf.matmul(x,W) + b) | |
# specify cost function | |
with tf.name_scope('cross_entropy'): | |
# this is our cost | |
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) | |
# specify optimizer | |
with tf.name_scope('train'): | |
# optimizer is an "operation" which we can execute in a session | |
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy) | |
with tf.name_scope('Accuracy'): | |
# Accuracy | |
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) | |
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | |
# create a summary for our cost and accuracy | |
tf.scalar_summary("cost", cross_entropy) | |
tf.scalar_summary("accuracy", accuracy) | |
# merge all summaries into a single "operation" which we can execute in a session | |
summary_op = tf.merge_all_summaries() | |
with tf.Session() as sess: | |
# variables need to be initialized before we can use them | |
sess.run(tf.initialize_all_variables()) | |
# create log writer object | |
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph()) | |
# perform training cycles | |
for epoch in range(training_epochs): | |
# number of batches in one epoch | |
batch_count = int(mnist.train.num_examples/batch_size) | |
for i in range(batch_count): | |
batch_x, batch_y = mnist.train.next_batch(batch_size) | |
# perform the operations we defined earlier on batch | |
_, summary = sess.run([train_op, summary_op], feed_dict={x: batch_x, y_: batch_y}) | |
# write log | |
writer.add_summary(summary, epoch * batch_count + i) | |
if epoch % 5 == 0: | |
print "Epoch: ", epoch | |
print "Accuracy: ", accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}) | |
print "done" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment