Skip to content

Instantly share code, notes, and snippets.

@mvsusp
Last active September 28, 2018 22:00
Show Gist options
  • Save mvsusp/562433f8182076528e0292ca3c69785c to your computer and use it in GitHub Desktop.
Save mvsusp/562433f8182076528e0292ca3c69785c to your computer and use it in GitHub Desktop.
How to compile and debug TensorFlow

How to compile and debug TensorFlow

Main reference

Building TensorFlow from source

Differences

On deep learning AMI

source activate base

bazel clean --expunge

CI_BUILD_PYTHON=python tensorflow/tools/ci_build/builds/configured CPU \
  bazel build -c opt --copt=-mavx -c dbg --strip=never --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
  tensorflow/tools/pip_package:build_pip_package

bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg

.tf_configure file

build --action_env PYTHON_BIN_PATH="/Users/mvs/miniconda3/envs/py27/bin/python"
build --action_env PYTHON_LIB_PATH="/Users/mvs/miniconda3/envs/py27/lib/python2.7/site-packages"
build --force_python=py2
build --host_force_python=py2
build --python_path="/usr/bin/python"
build:gcp --define with_gcp_support=true
build:hdfs --define with_hdfs_support=true
build --define with_s3_support=true
build --define with_kafka_support=true
build:xla --define with_xla_support=true
build:gdr --define with_gdr_support=true
build:verbs --define with_verbs_support=true
build --action_env TF_NEED_OPENCL_SYCL="0"
build --action_env TF_NEED_CUDA="0"
build --action_env TF_DOWNLOAD_CLANG="0"
build --define grpc_no_ares=true
build:opt --copt=-mavx
build:opt --host_copt=-march=native
build:opt --define with_default_optimizations=true
build --copt=-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
build --host_copt=-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK

Building the S3 file system

conda activate base
CI_BUILD_PYTHON=python tensorflow/tools/ci_build/builds/configured CPU   bazel build -c opt --copt=-mavx -c dbg --strip=never --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"   tensorflow/core/platform/s3:s3_file_system

Building TF

https://gist.github.com/Mistobaan/738e76c3a5bb1f9bcc52e2809a23a7a1

https://gist.github.com/Mistobaan/738e76c3a5bb1f9bcc52e2809a23a7a1

bazel clean --expunge
bazel build -c dbg  --strip=never --config=opt //tensorflow/tools/pip_package:build_pip_package

bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg

sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl

VERBOSE CPP

export TF_CPP_MIN_VLOG_LEVEL=2
S3_REGION=us-west-2

Debug

sudo gdb --directory /home/ubuntu/tensorflow -p 56032
break tensorflow/core/platform/s3/s3_file_system.cc:206
break tensorflow/core/platform/s3/s3_file_system.cc:387
break tensorflow/tensorflow/core/platform/s3/aws_logging.cc:60
break tensorflow/tensorflow/core/platform/s3/aws_logging.cc:57

break TF_NewDeprecatedSession
break tensorflow/core/platform/s3/s3_file_system.cc:379
break tensorflow/core/platform/s3/s3_file_system.cc:175

break tensorflow/core/platform/s3/s3_file_system.cc:310

File system registry

#0  tensorflow::(anonymous namespace)::GetDefaultClientConfig () at tensorflow/core/platform/s3/s3_file_system.cc:48
#1  0x00007f50c294239a in tensorflow::S3FileSystem::GetS3Client (this=0xd89230) at tensorflow/core/platform/s3/s3_file_system.cc:322
#2  0x00007f50c2942569 in tensorflow::S3FileSystem::NewRandomAccessFile (this=0xd89230,
    fname="s3://sagemaker-us-west-2-369233609183/datasets/sagemaker-tf-1mb-images-tf-records/a/train-00148-of-01024",
    result=0x7f4bbc002258) at tensorflow/core/platform/s3/s3_file_system.cc:334
#3  0x00007f50b70176d9 in tensorflow::Env::NewRandomAccessFile (this=0x1161770,
    fname="s3://sagemaker-us-west-2-369233609183/datasets/sagemaker-tf-1mb-images-tf-records/a/train-00148-of-01024",
    result=0x7f4bbc002258) at tensorflow/core/platform/env.cc:134
#4  0x00007f50bf6376f3 in tensorflow::(anonymous namespace)::TFRecordDatasetOp::Dataset::Iterator::SetupStreamsLocked (
    this=0x7f4bbc002210, env=0x1161770) at tensorflow/core/kernels/data/reader_dataset_ops.cc:660
#5  0x00007f50bf636ea1 in tensorflow::(anonymous namespace)::TFRecordDatasetOp::Dataset::Iterator::GetNextInternal (this=0x7f4bbc002210,
    ctx=0x7f4e64025eb0, out_tensors=0x7f4bd97f9d00, end_of_sequence=0x7f4bd97f99ff)
    at tensorflow/core/kernels/data/reader_dataset_ops.cc:614
#6  0x00007f50bf63859a in tensorflow::DatasetIterator<tensorflow::(anonymous namespace)::TFRecordDatasetOp::Dataset>::GetNext (
    this=0x7f4bbc002210, ctx=0x7f4e64025eb0, out_tensors=0x7f4bd97f9d00, end_of_sequence=0x7f4bd97f99ff)
    at ./tensorflow/core/framework/dataset.h:525
#7  0x00007f50bf5caed4 in tensorflow::(anonymous namespace)::FlatMapDatasetOp::Dataset::Iterator::GetNextInternal (this=0x7f4bbc0028a0,
    ctx=0x7f4e64025eb0, out_tensors=0x7f4bd97f9d00, end_of_sequence=0x7f4bd97f9c7f)
    at tensorflow/core/kernels/data/flat_map_dataset_op.cc:144
#8  0x00007f50bf5cc77c in tensorflow::DatasetIterator<tensorflow::(anonymous namespace)::FlatMapDatasetOp::Dataset>::GetNext (
    this=0x7f4bbc0028a0, ctx=0x7f4e64025eb0, out_tensors=0x7f4bd97f9d00, end_of_sequence=0x7f4bd97f9c7f)
    at ./tensorflow/core/framework/dataset.h:525
#9  0x00007f50bf61f490 in tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator::WorkerThread (
    this=0x7f4e7009ab40, ctx_ptr=0x7f4e64025eb0, thread_index=148) at tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc:417
#10 0x00007f50bf622d1c in std::_Mem_fn_base<void (tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator::*)(tensorflow::IteratorContext*, long long), true>::operator()<tensorflow::IteratorContext*&, long long&, void> (this=0x7f4e64025fc0,
    __object=0x7f4e7009ab40) at /usr/include/c++/5/functional:600
#11 0x00007f50bf622692 in std::_Bind<std::_Mem_fn<void (tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator::*)(tensorflow::IteratorContext*, long long int)>(tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator*, tensorflow::IteratorContext*, long long int)>::__call<void, 0ul, 1ul, 2ul>(<unknown type in /usr/local/lib/python2.7/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so, CU 0xab2bfce, DIE 0xabad791>, std::_Index_tuple<0ul, 1ul, 2ul>) (this=0x7f4e64025fc0,
    __args=<unknown type in /usr/local/lib/python2.7/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so, CU 0xab2bfce, DIE 0xabad791>) at /usr/include/c++/5/functional:1074
#12 0x00007f50bf621aa2 in std::_Bind<std::_Mem_fn<void (tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator::*)(tensorflow::IteratorContext*, long long int)>(tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator*, tensorflow::IteratorContext*, long long int)>::operator()<, void>(void) (this=0x7f4e64025fc0) at /usr/include/c++/5/functional:1133
#13 0x00007f50bf620cbe in std::_Function_handler<void(), std::_Bind<std::_Mem_fn<void (tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator::*)(tensorflow::IteratorContext*, long long int)>(tensorflow::(anonymous namespace)::ParallelInterleaveDatasetOp::Dataset::Iterator*, tensorflow::IteratorContext*, long long int)> >::_M_invoke(const std::_Any_data &) (__functor=...)
    at /usr/include/c++/5/functional:1871
#14 0x00007f50b6d9c616 in std::function<void ()>::operator()() const (this=0x7f4e64026018) at /usr/include/c++/5/functional:2267
#15 0x00007f50b7028ec6 in std::_Bind_simple<std::function<void ()> ()>::_M_invoke<>(std::_Index_tuple<>) (this=0x7f4e64026018)
    at /usr/include/c++/5/functional:1531
#16 0x00007f50b7028e2f in std::_Bind_simple<std::function<void ()> ()>::operator()() (this=0x7f4e64026018)
    at /usr/include/c++/5/functional:1520
#17 0x00007f50b7028dce in std::thread::_Impl<std::_Bind_simple<std::function<void ()> ()> >::_M_run() (this=0x7f4e64026000)
    at /usr/include/c++/5/thread:115
#18 0x00007f516b07fc80 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#19 0x00007f516c31a6ba in start_thread (arg=0x7f4bd97fa700) at pthread_create.c:333
#20 0x00007f516c05041d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109



cfg
$19 = {userAgent = "aws-sdk-cpp/1.3.15 Linux/4.4.0-1052-aws x86_64", scheme = Aws::Http::Scheme::HTTPS, region = "us-east-1",
  useDualStack = false, maxConnections = 25, requestTimeoutMs = 3000, connectTimeoutMs = 1000,
  retryStrategy = std::shared_ptr (count 1, weak 0) 0x7f4bbc002860, endpointOverride = "", proxyScheme = Aws::Http::Scheme::HTTP,
  proxyHost = "", proxyPort = 0, proxyUserName = "", proxyPassword = "", executor = std::shared_ptr (count 1, weak 0) 0x7f4bbc004690,
  verifySSL = true, caPath = "", caFile = "", writeRateLimiter = std::shared_ptr (empty) 0x0,
  readRateLimiter = std::shared_ptr (empty) 0x0, httpLibOverride = Aws::Http::TransferLibType::DEFAULT_CLIENT, followRedirects = true,
  enableClockSkewAdjustment = true}

S3_USE_HTTPS=0

S3_VERIFY_SSL=0

S3_CONNECT_TIMEOUT_MSEC

S3_REQUEST_TIMEOUT_MSEC

You can trigger hud dynamically by push ctrl+x and ctrl+a.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment