Skip to content

Instantly share code, notes, and snippets.

Time for each load+add: 0.291310 ns for START=0
576460752303423488.000000
Time for each load+add: 0.290989 ns for START=1
inf
Time for each load+add: 0.291260 ns for START=2
nan
Time for each load+add: 0.292583 ns for START=3
nan
Time for each load+add: 0.290434 ns for START=4
nan
muralivi@gogeta:~$ /usr/local/google/home/muralivi/projects/iree-build/tools/iree-compile --output-format=vm-bytecode --mlir-print-op-on-diagnostic=false --iree-hal-target-backends=vmvx /usr/local/google/home/muralivi/projects/iree/tests/e2e/tensor_ops/pack.mlir -o check_vmvx_local-task_pack.mlir_module.vmfb --iree-hal-executable-object-search-path=\"/usr/local/google/home/muralivi/projects/iree-build\" --iree-llvm-embedded-linker-path=\"/usr/local/google/home/muralivi/projects/iree-build/llvm-project/bin/lld\" --iree-llvm-wasm-linker-path=\"/usr/local/google/home/muralivi/projects/iree-build/llvm-project/bin/lld\" >& ~/crap2
Aborted
muralivi@gogeta:~$ vim ~/crap2
muralivi@gogeta:~$ /usr/local/google/home/muralivi/projects/iree-build/tools/iree-compile --output-format=vm-bytecode --mlir-print-op-on-diagnostic=false --iree-hal-target-backends=vmvx /usr/local/google/home/muralivi/projects/iree/tests/e2e/tensor_ops/pack.mlir -o check_vmvx_local-task_pack.mlir_module.vmfb --iree-hal-executable-object-search-path
This file has been truncated, but you can view the full file.
// -----// IR Dump Before TosaToSCF (tosa-to-scf) //----- //
func.func @tensor_float() {
%0 = util.unfoldable_constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%1 = util.unfoldable_constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%2 = util.unfoldable_constant dense<1.000000e+00> : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq_const(%3, dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>) : tensor<2x1xf32>
return
}
General Purpose Registers:
x0 = 0x0000600003e00038
x1 = 0x000000016feb2d00
x2 = 0x000000016feb2cc0
x3 = 0x000000016feb2cc0
x4 = 0x0000000000000000
x5 = 0x000000016feb2f10
x6 = 0x00000001c1c1d07d
x7 = 0x000000000d84bc77
x8 = 0x000000010019c3e0
-> 0x10019c3e0: stp x29, x30, [sp, #-0x10]!
0x10019c3e4: mov x29, sp
0x10019c3e8: ldp x8, x9, [x1, #0x18]
0x10019c3ec: mov x13, xzr
0x10019c3f0: mov w17, #0x1
0x10019c3f4: mov w12, #0xd
0x10019c3f8: mov w14, #0x34
0x10019c3fc: ldr w10, [x8]
0x10019c400: ldp x11, x8, [x9]
0x10019c404: add x9, x11, x10
module attributes {hal.device.targets = [#hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}>]}>]} {
hal.executable private @encode_dispatch_285 {
hal.executable.variant public @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}> {
hal.executable.export public @encode_dispatch_285_generic_8x13 ordinal(0) layout(#hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_roo
module attributes {hal.device.targets = [#hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}>]}>]} {
hal.executable private @encode_dispatch_285 {
hal.executable.variant public @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}> {
hal.executable.export public @encode_dispatch_285_generic_768x50272 ordinal(0) layout(#hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%x, %y, %z = flow.dispatch.workgroup_count_from_da
This file has been truncated, but you can view the full file.
// -----// IR Dump After LegalizeControlFlowPass (mhlo-legalize-control-flow) //----- //
func.func @lbeta__2x2x2x2x2x2x2__f32__uniform(%arg0: !iree_input.buffer_view) -> !iree_input.buffer_view attributes {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,7,null,null,null,null,null,null,null]],\22r\22:[[\22ndarray\22,\22f32\22,6,null,null,null,null,null,null]],\22v\22:1}"} {
%0 = mhlo.constant dense<-0.000000e+00> : tensor<f32>
%1 = iree_input.cast.buffer_view_to_tensor %arg0 : !iree_input.buffer_view -> tensor<?x?x?x?x?x?x?xf32>
%2 = chlo.lgamma %1 : tensor<?x?x?x?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
%3 = mhlo.reduce(%2 init: %0) applies mhlo.add across dimensions = [6] : (tensor<?x?x?x?x?x?x?xf32>, tensor<f32>) -> tensor<?x?x?x?x?x?xf32>
%4 = mhlo.reduce(%1 init: %0) applies mhlo.add across dimensions = [6] : (tensor<?x?x?x?x?x?x?xf32>, tensor<f32>) -> tensor<?x?x?x?x?x?xf32>
%5 = chlo.lgamma %4 : tensor<?x?x?x?x?x?xf32> -> tensor<?x?x?x?x?x?xf32>
%6 = chlo.broadcast_subtract %3, %5 : (tensor
This file has been truncated, but you can view the full file.
// -----// IR Dump After LegalizeControlFlowPass (mhlo-legalize-control-flow) //----- //
func.func @lbeta__2x2x2x2x2x2x2__f32__uniform(%arg0: !iree_input.buffer_view) -> !iree_input.buffer_view attributes {iree.abi = "{\22a\22:[[\22ndarray\22,\22f32\22,7,null,null,null,null,null,null,null]],\22r\22:[[\22ndarray\22,\22f32\22,6,null,null,null,null,null,null]],\22v\22:1}"} {
%0 = mhlo.constant dense<-0.000000e+00> : tensor<f32>
%1 = iree_input.cast.buffer_view_to_tensor %arg0 : !iree_input.buffer_view -> tensor<?x?x?x?x?x?x?xf32>
%2 = chlo.lgamma %1 : tensor<?x?x?x?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
%3 = mhlo.reduce(%2 init: %0) applies mhlo.add across dimensions = [6] : (tensor<?x?x?x?x?x?x?xf32>, tensor<f32>) -> tensor<?x?x?x?x?x?xf32>
%4 = mhlo.reduce(%1 init: %0) applies mhlo.add across dimensions = [6] : (tensor<?x?x?x?x?x?x?xf32>, tensor<f32>) -> tensor<?x?x?x?x?x?xf32>
%5 = chlo.lgamma %4 : tensor<?x?x?x?x?x?xf32> -> tensor<?x?x?x?x?x?xf32>
%6 = chlo.broadcast_subtract %3, %5 : (tensor
muralivi@gogeta:~/projects/iree$ cmake --build ../iree-build
[0/2] Re-checking globbed directories...
[347/1202] Building CXX object third_party/llvm-project/llvm/tools/torch-mlir-dialects/lib/Dialect/TMTensor/IR/CMakeFiles/obj.TorchMLIRTMTensorDialect.dir/TMTensorInterfaces.cpp.o
FAILED: third_party/llvm-project/llvm/tools/torch-mlir-dialects/lib/Dialect/TMTensor/IR/CMakeFiles/obj.TorchMLIRTMTensorDialect.dir/TMTensorInterfaces.cpp.o
/usr/bin/clang++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/local/google/home/muralivi/projects/iree-build/third_party/llvm-project/llvm/tools/torch-mlir-dialects/lib/Dialect/TMTensor/IR -I/usr/local/google/home/muralivi/projects/iree/third_party/torch-mlir-dialects/lib/Dialect/TMTensor/IR -I/usr/local/google/home/muralivi/projects/iree-build/third_party/llvm-project/llvm/include -I/usr/local/google/home/muralivi/projects/iree/third_party/llvm-project/llvm/include -I/usr/local/google/home/muralivi/proj