This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#loc5 = "<XLA_MLIR_DEBUGINFO_BEGIN>torchvision.models.shufflenetv2.ShuffleNetV2/torch.nn.modules.pooling.MaxPool2d_maxpool;<XLA_MLIR_DEBUGINFO_END>aten__max_pool2d" | |
module @IrToHlo.1937 attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.use_auto_spmd_partitioning = false} { | |
func.func @main(%arg0: tensor<1000xf32> , %arg1: tensor<1000x1024xf32> , %arg2: tensor<1024xf32> , %arg3: tensor<1024xf32> , %arg4: tensor<1024xf32> , %arg5: tensor<1024xf32> , %arg6: tensor<1024x464x1x1xf32> , %arg7: tensor<232xf32> , %arg8: tensor<232xf32> , %arg9: tensor<232xf32> , %arg10: tensor<232xf32> , %arg11: tensor<232x232x1x1xf32> , %arg12: tensor<232xf32> , %arg13: tensor<232xf32> , %arg14: tensor<232xf32> , %arg15: tensor<232xf32> , %arg16: tensor<232x1x3x3xf32> , %arg17: tensor<232xf32> , %arg18: tensor<232xf32> , %arg19: tensor<232xf32> , %arg20: tensor<232xf32> , %arg21: tensor<232x232x1x1xf32> , %arg22: tensor<232xf32> , %arg23: tensor<232xf32> , %arg24: tensor<232xf32> , %arg25: tensor<232xf32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#loc5 = "<XLA_MLIR_DEBUGINFO_BEGIN>torchvision.models.shufflenetv2.ShuffleNetV2/torch.nn.modules.pooling.MaxPool2d_maxpool;<XLA_MLIR_DEBUGINFO_END>aten__max_pool2d" | |
module @IrToHlo.1937 attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.use_auto_spmd_partitioning = false} { | |
func.func @main(%arg0: tensor<1000xf32> , %arg1: tensor<1000x1024xf32> , %arg2: tensor<1024xf32> , %arg3: tensor<1024xf32> , %arg4: tensor<1024xf32> , %arg5: tensor<1024xf32> , %arg6: tensor<1024x464x1x1xf32> , %arg7: tensor<232xf32> , %arg8: tensor<232xf32> , %arg9: tensor<232xf32> , %arg10: tensor<232xf32> , %arg11: tensor<232x232x1x1xf32> , %arg12: tensor<232xf32> , %arg13: tensor<232xf32> , %arg14: tensor<232xf32> , %arg15: tensor<232xf32> , %arg16: tensor<232x1x3x3xf32> , %arg17: tensor<232xf32> , %arg18: tensor<232xf32> , %arg19: tensor<232xf32> , %arg20: tensor<232xf32> , %arg21: tensor<232x232x1x1xf32> , %arg22: tensor<232xf32> , %arg23: tensor<232xf32> , %arg24: tensor<232xf32> , %arg25: tensor<232xf32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After TileAndDistributeToWorkgroups (iree-codegen-tile-and-distribute-to-workgroups) //----- // | |
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "skylake-avx512", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,-avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,+xsavec,-avx10.1-512,-avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-avx512er,-ccmp,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,+rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-gfni,-avxvnniint16,-amx-fp16,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,-avx512bf16,-avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,-pku,+fsgsbase,-clzero,-mwaitx,-lwp,+lzcnt,-sha,-movdir64b,-ppx,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After TileAndDistributeToWorkgroups (iree-codegen-tile-and-distribute-to-workgroups) //----- // | |
hal.executable.variant public @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>) { | |
hal.executable.export public @tensor_pack_dispatch_0_pack_f32 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>], translation_info = #iree_codegen.translation_info<VMVXDefault>} { | |
^bb0(%arg0: !hal.device): | |
%c1 = arith.constant 1 : index | |
hal.return %c1, %c1, %c1 : index, index, index | |
} | |
builtin.module { | |
func.func @tensor_pack_dispatch_0_pack_f32() { | |
%c256 = arith.constant 256 : index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After mlir::iree_compiler::IREE::VM::OrdinalAllocationPass (iree-vm-ordinal-allocation) //----- // | |
vm.module public @module attributes {ordinal_counts = #vm.ordinal_counts<import_funcs = 19, export_funcs = 2, internal_funcs = 2, global_bytes = 4, global_refs = 2, rodatas = 6, rwdatas = 0>} { | |
vm.global.i32 private mutable @_device_query_0 {ordinal = 0 : i32} : i32 | |
vm.global.ref private mutable @_pipeline_layout_0 {ordinal = 0 : i32} : !vm.ref<!hal.pipeline_layout> | |
vm.global.ref private mutable @_executable_tensor_pack_dispatch_0 {ordinal = 1 : i32} : !vm.ref<!hal.executable> | |
vm.rodata private @tensor_pack_dispatch_0_vmvx_bytecode_fb {alignment = 16 : i64, mime_type = "application/x-flatbuffers", ordinal = 0 : i32} dense<"0x504B03042D0000000000000021007860132CFFFFFFFFFFFFFFFF090019006D6F64756C652E6662CAFE01000001001000FC07000000000000FC07000000000000FC070000080000004952454568F8FFFF2C0000000100000024030000380300003C03000040030000440300004C04000000000F005C04000010000000060000006D6F64756 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[TRACY ] Capture Name: iree-run-module @ 2024-04-03 20:33:10 | |
[TRACY ] Cpu Arch: x86_64 | |
[TRACY ] | |
[TRACY-CPU] CPU Threads: 29 | |
[TRACY-CPU] CPU Zones: 1695 | |
[TRACY-CPU] Zone Stats: 2 | |
[TRACY-CPU] Zone Count Total Main thread iree-poller iree-worker-0 iree-worker-1 iree-worker-10 iree-worker-11 iree-worker-12 iree-worker-13 iree-worker-14 iree-worker-15 iree-worker-16 iree-worker-17 iree-worker-18 iree-worker-19 iree-worker-2 iree-worker-20 iree-worker-21 iree-worker-22 iree-worker-23 iree-worker-3 iree-worker-4 iree-worker-5 iree-worker-6 iree-worker-7 iree-worker-8 iree-worker-9 | |
[TRACY-CPU] Duration 327.9ms(100%) 72.4844ms(100%) 18.6893ms(100%) 17.7719ms(100%) 17.1578ms(100%) 11.073ms(100%) 7.05754ms(100%) 7.04673ms(100%) 7.65951ms(100%) 7.24205ms(100%) 7.59424ms(100%) 7.02599ms(100%) 6.95218ms(100%) 6.14334ms(100%) 5.46324ms(100%) 15.8939ms(100%) 4.9626ms(100%) 4.45126ms(100%) 3 |
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AssignTargetDevicesPass (iree-hal-assign-target-devices) //----- // | |
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> | |
#device_target_vmvx = #hal.device.target<"vmvx", [#executable_target_vmvx_bytecode_fb]> | |
module attributes {hal.device.targets = [#device_target_vmvx]} { | |
func.func @tensor_pack(%arg0: tensor<127x256xf32>, %arg1: tensor<16x32x8x8xf32>) -> tensor<16x32x8x8xf32> { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%pack = tensor.pack %arg0 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %arg1 : tensor<127x256xf32> -> tensor<16x32x8x8xf32> | |
return %pack : tensor<16x32x8x8xf32> | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//only handle nchw*fchw -> nfhw | |
// for nhwc*fhwc ->nhwf | |
// permutation_input = [0,3,1,2] | |
// permutation_output = [0,3,1,2] | |
// permutation_window = [0,2,3,1] | |
// If feature_group_count != 0 or batch_group_count != 0 | |
// need to give reshaped %input, %window, %output with expanded dims. | |
// %input, %window are rank 5 and %output is rank 7. | |
#map0 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)> | |
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1, d4, d5, d6)> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AssignTargetDevicesPass (iree-hal-assign-target-devices) //----- // | |
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "none"}> | |
#device_target_vmvx = #hal.device.target<"vmvx", [#executable_target_vmvx_bytecode_fb]> | |
module attributes {hal.device.targets = [#device_target_vmvx]} { | |
func.func @abs_4d_f16(%arg0: tensor<5x5x5x5xf16>) -> tensor<5x5x5x5xf16> { | |
%0 = stablehlo.abs %arg0 : tensor<5x5x5x5xf16> | |
return %0 : tensor<5x5x5x5xf16> | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
func.func @abs_4d_f16(%a : tensor<5x5x5x5xf16>) -> tensor<5x5x5x5xf16> { | |
%r = stablehlo.abs %a : tensor<5x5x5x5xf16> | |
return %r : tensor<5x5x5x5xf16> | |
} |
NewerOlder