Skip to content

Instantly share code, notes, and snippets.

@lissyx
Created August 7, 2017 19:35
Show Gist options
  • Save lissyx/c008b43fd808d132989ec4d238d664fb to your computer and use it in GitHub Desktop.
Save lissyx/c008b43fd808d132989ec4d238d664fb to your computer and use it in GitHub Desktop.
RPi3 ARMv8
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 48ef8dfa8..be831d5c0 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -8,7 +8,7 @@ load("//tensorflow:tensorflow.bzl", "if_not_mobile")
WITH_GCP_SUPPORT = False
WITH_HDFS_SUPPORT = False
WITH_XLA_SUPPORT = False
-WITH_JEMALLOC = True
+WITH_JEMALLOC = False
# Appends a suffix to a list of deps.
def tf_deps(deps, suffix):
diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc
index f460f31d3..7e7b4abe0 100644
--- a/tensorflow/tools/graph_transforms/quantize_nodes.cc
+++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc
@@ -684,6 +684,11 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
const NodeDef& float_node = match.node;
const QuantizedOpInfo& op_info = op_map[float_node.op()];
+ bool are_under_while_if = false;
+ if ( (float_node.name().find("/while/") != string::npos) || (float_node.name().find("/if/") != string::npos) ) {
+ are_under_while_if = true;
+ }
+
DataTypeVector input_types;
DataTypeVector output_types;
TF_RETURN_IF_ERROR(
@@ -723,10 +728,15 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
string unique_input_name =
namespace_prefix + "/" + UniqueNodeNameFromInput(input_name);
+ fprintf(stderr, "float_node.op()=%s input_name=%s are_under_while_if=%d\n", float_node.op().c_str(), input_name.c_str(), are_under_while_if);
+
// Add some common constants we need for reshaping inputs.
NodeDef reshape_dims;
reshape_dims.set_op("Const");
reshape_dims.set_name(unique_input_name + "/reshape_dims");
+ if (are_under_while_if) {
+ AddNodeInput("^" + input_name, &reshape_dims);
+ }
SetNodeAttr("dtype", DT_INT32, &reshape_dims);
Tensor reshape_dims_tensor(DT_INT32, {1});
reshape_dims_tensor.flat<int32>()(0) = -1;
@@ -736,6 +746,9 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
NodeDef reduction_dims;
reduction_dims.set_op("Const");
reduction_dims.set_name(unique_input_name + "/reduction_dims");
+ if (are_under_while_if) {
+ AddNodeInput("^" + input_name, &reduction_dims);
+ }
SetNodeAttr("dtype", DT_INT32, &reduction_dims);
Tensor reduction_dims_tensor(DT_INT32, {1});
reduction_dims_tensor.flat<int32>()(0) = 0;
diff --git a/tools/arm_compiler/BUILD b/tools/arm_compiler/BUILD
index 92699753f..58fa200b4 100644
--- a/tools/arm_compiler/BUILD
+++ b/tools/arm_compiler/BUILD
@@ -13,6 +13,7 @@ cc_toolchain_suite(
"linaro-armeabi|gcc": ":cc-compiler-linaro-armeabi",
"linaro64-armeabi|gcc": ":cc-compiler-linaro64-armeabi",
"rpi-armeabi|gcc": ":cc-compiler-rpi-armeabi",
+ "rpi3-aarch64|gcc": ":cc-compiler-rpi3-aarch64",
},
)
@@ -146,3 +147,18 @@ cc_toolchain(
supports_param_files = 1,
visibility = ["//visibility:public"],
)
+
+cc_toolchain(
+ name = "cc-compiler-rpi3-aarch64",
+ all_files = ":gcc_linux_all_files",
+ compiler_files = ":gcc_linux_compiler_files",
+ cpu = "gcc-aarch64",
+ dwp_files = ":empty",
+ dynamic_runtime_libs = [":empty"],
+ linker_files = ":gcc_linux_linker_files",
+ objcopy_files = "//tools/arm_compiler/gcc_arm_rpi:objcopy",
+ static_runtime_libs = [":empty"],
+ strip_files = "//tools/arm_compiler/gcc_arm_rpi:strip",
+ supports_param_files = 1,
+ visibility = ["//visibility:public"],
+)
diff --git a/tools/arm_compiler/CROSSTOOL b/tools/arm_compiler/CROSSTOOL
index e8f855b8a..c09182065 100644
--- a/tools/arm_compiler/CROSSTOOL
+++ b/tools/arm_compiler/CROSSTOOL
@@ -17,6 +17,11 @@ default_toolchain {
toolchain_identifier: "gcc_rpi_linux_armhf"
}
+default_toolchain {
+ cpu: "rpi3-aarch64"
+ toolchain_identifier: "gcc_rpi3_linux_aarch64"
+}
+
toolchain {
abi_version: "armeabi"
abi_libc_version: "glibc_2.13"
@@ -352,8 +357,6 @@ toolchain {
linker_flag: "-Wl,--gc-sections"
}
}
-
-
toolchain {
abi_version: "armeabi"
abi_libc_version: "glibc_2.19"
@@ -412,6 +415,8 @@ toolchain {
compiler_flag: "external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/usr/include/arm-linux-gnueabihf"
compiler_flag: "-isystem"
compiler_flag: "external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/lib/gcc/arm-linux-gnueabihf/4.9.3/include"
+ compiler_flag: "-isystem"
+ compiler_flag: "DEEPSPEECH_ROOT/multistrap-raspbian-jessie/usr/include/"
cxx_flag: "-std=c++11"
cxx_flag: "-isystem"
@@ -433,6 +438,7 @@ toolchain {
cxx_builtin_include_directory: "%package(@GccArmRpi//arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/lib/gcc/arm-linux-gnueabihf/4.9.3/include)%"
cxx_builtin_include_directory: "%package(@GccArmRpi//arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/lib/gcc/arm-linux-gnueabihf/4.9.3/include-fixed)%"
cxx_builtin_include_directory: "%package(@GccArmRpi//arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/include)%/c++/4.9.3"
+ cxx_builtin_include_directory: "DEEPSPEECH_ROOT/multistrap-raspbian-jessie/usr/include/"
# Anticipated future default.
# This makes GCC and Clang do what we want when called through symlinks.
@@ -473,6 +479,7 @@ toolchain {
linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/lib"
linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/usr/lib"
linker_flag: "-Bexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/bin"
+ linker_flag: "-LDEEPSPEECH_ROOT/multistrap-raspbian-jessie/usr/lib"
linker_flag: "-pie"
linker_flag: "-lstdc++"
# linker_flag: "-lm"
@@ -497,7 +504,7 @@ toolchain {
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
# even generally? However, that can't happen here, as it requires special
# handling in Bazel.
- compiler_flag: "-g0"
+ compiler_flag: "-g"
# Disable assertions
compiler_flag: "-DNDEBUG"
@@ -530,3 +537,155 @@ toolchain {
}
linking_mode_flags { mode: DYNAMIC }
}
+
+toolchain {
+ abi_version: "aarch64"
+ abi_libc_version: "glibc_2.19"
+ builtin_sysroot: ""
+ compiler: "gcc"
+ host_system_name: "aarch64"
+ needsPic: true
+ supports_gold_linker: false
+ supports_incremental_linker: false
+ supports_fission: false
+ supports_interface_shared_objects: false
+ supports_normalizing_ar: false
+ supports_start_end_lib: false
+ supports_thin_archives: false
+ target_libc: "glibc_2.19"
+ target_cpu: "rpi3-aarch64"
+ target_system_name: "arm64-v8a"
+ toolchain_identifier: "gcc_rpi3_linux_aarch64"
+
+ tool_path { name: "ar" path: "/usr/bin/aarch64-linux-gnu-ar" }
+ tool_path { name: "compat-ld" path: "/usr/bin/aarch64-linux-gnu-ld" }
+ tool_path { name: "cpp" path: "/usr/bin/aarch64-linux-gnu-cpp" }
+ tool_path { name: "dwp" path: "/usr/bin/aarch64-linux-gnu-dwp" }
+ tool_path { name: "gcc" path: "/usr/bin/aarch64-linux-gnu-gcc" }
+ tool_path { name: "gcov" path: "/usr/bin/aarch64-linux-gnu-gcov" }
+ # C(++) compiles invoke the compiler (as that is the one knowing where
+ # to find libraries), but we provide LD so other rules can invoke the linker.
+ tool_path { name: "ld" path: "/usr/bin/aarch64-linux-gnu-ld" }
+ tool_path { name: "nm" path: "/usr/bin/aarch64-linux-gnu-nm" }
+ tool_path { name: "objcopy" path: "/usr/bin/aarch64-linux-gnu-objcopy" }
+ objcopy_embed_flag: "-I"
+ objcopy_embed_flag: "binary"
+ tool_path { name: "objdump" path: "/usr/bin/aarch64-linux-gnu-objdump" }
+ tool_path { name: "strip" path: "/usr/bin/aarch64-linux-gnu-strip" }
+
+ compiler_flag: "-march=armv8-a+crc"
+ #compiler_flag: "--sysroot=external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot"
+ compiler_flag: "--sysroot=DEEPSPEECH_ROOT/multistrap-debian_arm64-sid/"
+ #compiler_flag: "-Wl,--sysroot=external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot"
+ #compiler_flag: "-mfloat-abi=hard"
+ # Local change to disable IS_MOBILE_PLATFORM
+ compiler_flag: "-D__ARM_RPI__"
+ # compiler_flag: "-nostdinc"
+ # compiler_flag: "-isystem"
+ cxx_flag: "-std=c++11"
+ cxx_builtin_include_directory: "DEEPSPEECH_ROOT/multistrap-debian_arm64-sid/usr/include/"
+
+ # Anticipated future default.
+ # This makes GCC and Clang do what we want when called through symlinks.
+ unfiltered_cxx_flag: "-no-canonical-prefixes"
+
+ # Make C++ compilation deterministic. Use linkstamping instead of these
+ # compiler symbols.
+ unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+ unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+ unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+ unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+
+ # Security hardening on by default.
+ compiler_flag: "-fstack-protector"
+ compiler_flag: "-fPIE"
+ # All warnings are enabled. Maybe enable -Werror as well?
+ compiler_flag: "-Wall"
+ # Enable a few more warnings that aren't part of -Wall.
+ compiler_flag: "-Wunused-but-set-parameter"
+ # But disable some that are problematic.
+ compiler_flag: "-Wno-free-nonheap-object" # has false positives
+ # Keep stack frames for debugging, even in opt mode.
+ compiler_flag: "-fno-omit-frame-pointer"
+ # Enable coloring even if there's no attached terminal. Bazel removes the
+ # escape sequences if --nocolor is specified.
+ compiler_flag: "-fdiagnostics-color=always"
+
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"
+
+ cxx_builtin_include_directory: "/usr/aarch64-linux-gnu/include/"
+ cxx_builtin_include_directory: "/usr/lib/gcc-cross/aarch64-linux-gnu/6/include/"
+ cxx_builtin_include_directory: "/usr/lib/gcc-cross/aarch64-linux-gnu/6/include-fixed/"
+
+ # linker_flag: "-target"
+ # linker_flag: "arm-linux-gnueabihf"
+ linker_flag: "--sysroot=DEEPSPEECH_ROOT/multistrap-debian_arm64-sid"
+ linker_flag: "-pass-exit-codes"
+ #linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/lib"
+ #linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/lib"
+ #linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/usr/lib"
+ #linker_flag: "-LDEEPSPEECH_ROOT/multistrap-debian_arm64-sid/lib"
+ #linker_flag: "-LDEEPSPEECH_ROOT/multistrap-debian_arm64-sid/usr/lib"
+ #linker_flag: "-Bexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/bin"
+ linker_flag: "-pie"
+ linker_flag: "-lstdc++"
+ # linker_flag: "-lm"
+ # linker_flag: "-lpthread"
+ linker_flag: "-Wl,--dynamic-linker=/lib/ld-linux-armhf.so.3"
+ linker_flag: "-Wl,-no-as-needed"
+ linker_flag: "-Wl,-z,relro,-z,now"
+ linker_flag: "-no-canonical-prefixes"
+ # Stamp the binary with a unique identifier.
+ linker_flag: "-Wl,--build-id=md5"
+ linker_flag: "-Wl,--hash-style=gnu"
+
+ compilation_mode_flags {
+ mode: DBG
+ # Enable debug symbols.
+ compiler_flag: "-g"
+ }
+ compilation_mode_flags {
+ mode: OPT
+
+ # No debug symbols.
+ # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
+ # even generally? However, that can't happen here, as it requires special
+ # handling in Bazel.
+ compiler_flag: "-g"
+
+ # Disable assertions
+ compiler_flag: "-DNDEBUG"
+
+ # Removal of unused code and data at link time (can this increase binary size in some cases?).
+ compiler_flag: "-ffunction-sections"
+ compiler_flag: "-fdata-sections"
+ linker_flag: "-Wl,--gc-sections"
+
+ # Conservative choice for -O
+ # -O3 can increase binary size and even slow down the resulting binaries.
+ # Profile first and / or use FDO if you need better performance than this.
+ compiler_flag: "-O2"
+
+ # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+ # We need to undef it before redefining it as some distributions now have
+ # it enabled by default.
+ # Also depends on -O.
+ compiler_flag: "-U_FORTIFY_SOURCE"
+ compiler_flag: "-D_FORTIFY_SOURCE=1"
+
+ # Basic optims for RPi3, from Gentoo/ArchLinux Wiki
+ compiler_flag: "-mtune=cortex-a53"
+ #compiler_flag: "-mfpu=crypto-neon-fp-armv8"
+ #compiler_flag: "-mfloat-abi=hard"
+ # Inference time on small frozen LDC93S1 model goes from 30s to 20s with
+ # that flag, but generates instructions that valgrind chokes on.
+ #compiler_flag: "-mfpu=neon-fp-armv8"
+ #compiler_flag: "-funsafe-math-optimizations"
+ compiler_flag: "-ftree-vectorize"
+ compiler_flag: "-pipe"
+ }
+ linking_mode_flags { mode: DYNAMIC }
+}
@elpimous
Copy link

elpimous commented Aug 7, 2017

thanks for help !

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment