Skip to content

Instantly share code, notes, and snippets.

@benvanik
Created June 5, 2020 21:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benvanik/321bf0f993cbf80819cd65e89425d86f to your computer and use it in GitHub Desktop.
Save benvanik/321bf0f993cbf80819cd65e89425d86f to your computer and use it in GitHub Desktop.
%25 = xla_hlo.minimum %23, %24 : tensor<1x10xf32>
%26 = "xla_hlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
%27 = xla_hlo.maximum %25, %26 : tensor<1x10xf32>
%28 = "xla_hlo.slice"(%8) {limit_indices = dense<[1, 40]> : tensor<2xi64>, start_indices = dense<[0, 30]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
%29 = xla_hlo.multiply %6, %28 : tensor<1x10xf32>
%30 = "xla_hlo.tanh"(%29) : (tensor<1x10xf32>) -> tensor<1x10xf32>
%31 = xla_hlo.multiply %6, %30 : tensor<1x10xf32>
%32 = xla_hlo.add %6, %31 : tensor<1x10xf32>
%33 = "xla_hlo.tanh"(%27) : (tensor<1x10xf32>) -> tensor<1x10xf32>
%34 = xla_hlo.multiply %32, %33 : tensor<1x10xf32>
%35 = "xla_hlo.select"(%5, %arg0, %34) : (tensor<1x10xi1>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
%36 = "xla_hlo.reshape"(%35) : (tensor<1x10xf32>) -> tensor<1x1x10xf32>
return %35, %36 : tensor<1x10xf32>, tensor<1x1x10xf32>
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @arg2, set=0, binding=2, type="StorageBuffer", access="Read"
hal.interface.binding @arg3, set=0, binding=3, type="StorageBuffer", access="Read"
hal.interface.binding @arg4, set=0, binding=4, type="StorageBuffer", access="Read"
hal.interface.binding @arg5, set=0, binding=5, type="StorageBuffer", access="Read"
hal.interface.binding @arg6, set=0, binding=6, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=7, type="StorageBuffer", access="Write|Discard"
hal.interface.binding @ret1, set=0, binding=8, type="StorageBuffer", access="Write|Discard"
}
}
}
}
func @main(%arg0: tensor<1x5xf32> {iree.reflection = {}}, %arg1: tensor<1x5x2x2xf32> {iree.reflection = {}}) -> (tensor<5x1x10xf32> {iree.reflection = {}}) attributes {iree.module.export, iree.reflection = {f = "I19!B5!d1d5B9!d1d5d2d2R11!B8!d5d1d10", fv = "1"}} {
%c0 = constant 0 : index
%c64 = constant 64 : index
%c74 = constant 74 : index
%cst = constant dense<4.200000e-01> : tensor<74x40xf32>
%cst_0 = constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
%c1 = constant 1 : index
%c10 = constant 10 : index
%c20 = constant 20 : index
%c320 = constant 320 : index
%c5 = constant 5 : index
%c40 = constant 40 : index
%c50 = constant 50 : index
%0:7 = flow.ex.stream.fragment(%arg2 = %c10 : index, %arg3 = %c20 : index, %arg4 = %arg1 : tensor<1x5x2x2xf32>, %arg5 = %c320 : index, %arg6 = %c5 : index, %arg7 = %arg0 : tensor<1x5xf32>, %arg8 = %cst_0 : tensor<5xi32>, %arg9 = %c1 : index, %arg10 = %c40 : index, %arg11 = %c50 : index) -> (tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<i32>, tensor<i32>, tensor<40xf32>, tensor<5x1x10xf32>) {
%25 = flow.dispatch @main_ex_dispatch_0::@main_ex_dispatch_0[%arg2 : index]() : () -> tensor<1x10xf32>
%26 = flow.dispatch @main_ex_dispatch_1::@main_ex_dispatch_1[%arg3 : index](%arg4) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
%27 = flow.dispatch @main_ex_dispatch_2::@main_ex_dispatch_2[%arg5 : index](%26) : (tensor<1x5x4xf32>) -> tensor<1x5x64xf32>
%28 = flow.dispatch @main_ex_dispatch_3::@main_ex_dispatch_3[%arg5 : index](%27) : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
%29 = flow.dispatch @main_ex_dispatch_4::@main_ex_dispatch_4[%arg6 : index](%arg7) : (tensor<1x5xf32>) -> tensor<5x1x1xf32>
%30 = flow.dispatch @main_ex_dispatch_5::@main_ex_dispatch_5[%arg6 : index](%29) : (tensor<5x1x1xf32>) -> tensor<5xf32>
%31:2 = flow.dispatch @main_ex_dispatch_6::@main_ex_dispatch_6[%arg6 : index](%30, %arg8) : (tensor<5xf32>, tensor<5xi32>) -> (tensor<5xi32>, tensor<5xi32>)
%32 = flow.dispatch @main_ex_dispatch_7::@main_ex_dispatch_7[%arg9 : index](%31#0) : (tensor<5xi32>) -> tensor<i32>
%33 = flow.dispatch @main_ex_dispatch_8::@main_ex_dispatch_8[%arg9 : index](%32) : (tensor<i32>) -> tensor<i32>
%34 = flow.dispatch @main_ex_dispatch_9::@main_ex_dispatch_9[%arg9 : index](%31#1) : (tensor<5xi32>) -> tensor<i32>
%35 = flow.dispatch @main_ex_dispatch_10::@main_ex_dispatch_10[%arg9 : index](%34, %32) : (tensor<i32>, tensor<i32>) -> tensor<i32>
%36 = flow.dispatch @main_ex_dispatch_11::@main_ex_dispatch_11[%arg10 : index]() : () -> tensor<40xf32>
%37 = flow.dispatch @main_ex_dispatch_12::@main_ex_dispatch_12[%arg11 : index]() : () -> tensor<5x1x10xf32>
flow.return %25, %28, %29, %33, %35, %36, %37 : tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<i32>, tensor<i32>, tensor<40xf32>, tensor<5x1x10xf32>
}
br ^bb1(%0#4, %0#3, %0#5, %cst, %0#0, %0#0, %0#1, %0#2, %0#6 : tensor<i32>, tensor<i32>, tensor<40xf32>, tensor<74x40xf32>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x10xf32>)
^bb1(%1: tensor<i32>, %2: tensor<i32>, %3: tensor<40xf32>, %4: tensor<74x40xf32>, %5: tensor<1x10xf32>, %6: tensor<1x10xf32>, %7: tensor<5x1x64xf32>, %8: tensor<5x1x1xf32>, %9: tensor<5x1x10xf32>): // 2 preds: ^bb0, ^bb2
%10 = flow.ex.stream.fragment(%arg2 = %c1 : index, %arg3 = %1 : tensor<i32>, %arg4 = %2 : tensor<i32>) -> tensor<i1> {
%25 = flow.dispatch @main_ex_dispatch_13::@main_ex_dispatch_13[%arg2 : index](%arg3, %arg4) : (tensor<i32>, tensor<i32>) -> tensor<i1>
flow.return %25 : tensor<i1>
}
%11 = flow.tensor.load %10 : tensor<i1>
cond_br %11, ^bb2(%1, %2, %3, %4, %5, %6, %7, %8, %9 : tensor<i32>, tensor<i32>, tensor<40xf32>, tensor<74x40xf32>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x10xf32>), ^bb3(%9 : tensor<5x1x10xf32>)
^bb2(%12: tensor<i32>, %13: tensor<i32>, %14: tensor<40xf32>, %15: tensor<74x40xf32>, %16: tensor<1x10xf32>, %17: tensor<1x10xf32>, %18: tensor<5x1x64xf32>, %19: tensor<5x1x1xf32>, %20: tensor<5x1x10xf32>): // pred: ^bb1
%21 = flow.tensor.load %12 : tensor<i32>
%22 = index_cast %21 : i32 to index
%23:4 = flow.ex.stream.fragment(%arg2 = %c1 : index, %arg3 = %12 : tensor<i32>, %arg4 = %c64 : index, %arg5 = %18 : tensor<5x1x64xf32>, %arg6 = %c74 : index, %arg7 = %17 : tensor<1x10xf32>, %arg8 = %c40 : index, %arg9 = %15 : tensor<74x40xf32>, %arg10 = %c10 : index, %arg11 = %16 : tensor<1x10xf32>, %arg12 = %14 : tensor<40xf32>, %arg13 = %0#0 : tensor<1x10xf32>, %arg14 = %19 : tensor<5x1x1xf32>, %arg15 = %20 : tensor<5x1x10xf32>, %arg16 = %22 : index, %arg17 = %c0 : index) -> (tensor<i32>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x10xf32>) {
%25 = flow.dispatch @main_ex_dispatch_14::@main_ex_dispatch_14[%arg2 : index](%arg3) : (tensor<i32>) -> tensor<i32>
%26 = flow.dispatch @main_ex_dispatch_15::@main_ex_dispatch_15[%arg4 : index](%arg5, %arg3) : (tensor<5x1x64xf32>, tensor<i32>) -> tensor<1x64xf32>
%27 = flow.dispatch @main_ex_dispatch_16::@main_ex_dispatch_16[%arg6 : index](%26, %arg7) : (tensor<1x64xf32>, tensor<1x10xf32>) -> tensor<1x74xf32>
%28 = flow.dispatch @main_ex_dispatch_17::@main_ex_dispatch_17[%arg8 : index](%27, %arg9) : (tensor<1x74xf32>, tensor<74x40xf32>) -> tensor<1x40xf32>
%29 = flow.dispatch @main_ex_dispatch_18::@main_ex_dispatch_18[%arg10 : index](%arg11, %28, %arg12, %arg13, %arg14, %arg3) : (tensor<1x10xf32>, tensor<1x40xf32>, tensor<40xf32>, tensor<1x10xf32>, tensor<5x1x1xf32>, tensor<i32>) -> tensor<1x10xf32>
%30:2 = flow.dispatch @main_ex_dispatch_19::@main_ex_dispatch_19[%arg10 : index](%arg7, %arg11, %28, %arg12, %arg13, %arg14, %arg3) : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x40xf32>, tensor<40xf32>, tensor<1x10xf32>, tensor<5x1x1xf32>, tensor<i32>) -> (tensor<1x10xf32>, tensor<1x1x10xf32>)
%31 = flow.tensor.update %30#1, %arg15[%arg16, %arg17, %arg17] : tensor<1x1x10xf32> -> tensor<5x1x10xf32>
flow.return %25, %29, %30#0, %31 : tensor<i32>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x10xf32>
}
br ^bb1(%23#0, %13, %14, %15, %23#1, %23#2, %18, %19, %23#3 : tensor<i32>, tensor<i32>, tensor<40xf32>, tensor<74x40xf32>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x10xf32>)
^bb3(%24: tensor<5x1x10xf32>): // pred: ^bb1
return %24 : tensor<5x1x10xf32>
}
}
// *** IR Dump After Inliner ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
hal.interface.store.tensor %0, @legacy_io::@ret0, offset = %c0 : tensor<1x10xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::TieDynamicShapesPass ***
func @main_ex_dispatch_0() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
hal.interface.store.tensor %0, @legacy_io::@ret0, offset = %c0 : tensor<1x10xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::MaterializeShapeCalculationsPass ***
func @main_ex_dispatch_0() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
hal.interface.store.tensor %0, @legacy_io::@ret0, offset = %c0 : tensor<1x10xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::HoistShapeCalculations ***
func @main_ex_dispatch_0() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
hal.interface.store.tensor %0, @legacy_io::@ret0, offset = %c0 : tensor<1x10xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::DecomposeHLOClampPass ***
func @main_ex_dispatch_0() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
hal.interface.store.tensor %0, @legacy_io::@ret0, offset = %c0 : tensor<1x10xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnTensorsPass ***
func @main_ex_dispatch_0() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} %cst {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<f32> -> tensor<1x10xf32>
hal.interface.store.tensor %0, @legacy_io::@ret0, offset = %c0 : tensor<1x10xf32>
return
}
// *** IR Dump After LinalgFusionOfTensorOps ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() {
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} {
linalg.yield %cst : f32
}: -> tensor<1x10xf32>
hal.interface.store.tensor %0, @legacy_io::@ret0, offset = %c0 : tensor<1x10xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnBuffersPass ***
func @main_ex_dispatch_0() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %cst : f32
}: memref<1x10xf32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() {
%cst = constant 0.000000e+00 : f32
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %cst : f32
}: memref<1x10xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() {
%cst = constant 0.000000e+00 : f32
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %cst : f32
}: memref<1x10xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%c4 = constant 4 : index
%c32 = constant 32 : index
%c10 = constant 10 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c1, %c10) step (%c4, %c32) {
%1 = affine.min affine_map<(d0, d1, d2) -> (4, d1 - d2)>(%c4, %c1, %arg0)
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c10, %arg1)
%3 = subview %0[%arg0, %arg1] [%1, %2] [%c1, %c1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %3 {
^bb0(%arg2: f32): // no predecessors
linalg.yield %cst : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
scf.yield
}
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::SplitDispatchFunctionPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%c4 = constant 4 : index
%c32 = constant 32 : index
%c10 = constant 10 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c1, %c10) step (%c4, %c32) {
%1 = affine.min affine_map<(d0, d1, d2) -> (4, d1 - d2)>(%c4, %c1, %arg0)
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c10, %arg1)
%3 = subview %0[%arg0, %arg1] [%1, %2] [%c1, %c1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %3 {
^bb0(%arg2: f32): // no predecessors
linalg.yield %cst : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%c4 = constant 4 : index
%c32 = constant 32 : index
%c10 = constant 10 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c1, %c10) step (%c4, %c32) {
%1 = affine.min affine_map<(d0, d1, d2) -> (4, d1 - d2)>(%c4, %c1, %arg0)
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c10, %arg1)
%3 = subview %0[%arg0, %arg1] [%1, %2] [%c1, %c1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %3 {
^bb0(%arg2: f32): // no predecessors
linalg.yield %cst : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
scf.yield
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%c4 = constant 4 : index
%c32 = constant 32 : index
%c10 = constant 10 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c1, %c10) step (%c4, %c32) {
%1 = affine.min affine_map<(d0) -> (4, -d0 + 1)>(%arg0)
%2 = affine.min affine_map<(d0) -> (32, -d0 + 10)>(%arg1)
%3 = subview %0[%arg0, %arg1] [%1, %2] [1, 1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
linalg.generic {args_in = 0 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %3 {
^bb0(%arg2: f32): // no predecessors
linalg.yield %cst : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToGPUPass ***
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%c4 = constant 4 : index
%c32 = constant 32 : index
%c10 = constant 10 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %c4, %3 : index
%6 = addi %c0, %5 : index
%7 = muli %c4, %4 : index
%8 = muli %c32, %1 : index
%9 = addi %c0, %8 : index
%10 = muli %c32, %2 : index
scf.for %arg0 = %6 to %c1 step %7 {
scf.for %arg1 = %9 to %c10 step %10 {
%11 = affine.min affine_map<(d0) -> (4, -d0 + 1)>(%arg0)
%12 = affine.min affine_map<(d0) -> (32, -d0 + 10)>(%arg1)
%13 = subview %0[%arg0, %arg1] [%11, %12] [1, 1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%14 = dim %13, 0 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%15 = dim %13, 1 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%16 = affine.apply affine_map<(d0) -> (d0)>(%14)
%17 = affine.apply affine_map<(d0) -> (d0)>(%15)
%c0_0 = constant 0 : index
%c1_1 = constant 1 : index
%c0_2 = constant 0 : index
%c1_3 = constant 1 : index
%18 = "gpu.thread_id"() {dimension = "x"} : () -> index
%19 = "gpu.block_dim"() {dimension = "x"} : () -> index
%20 = "gpu.thread_id"() {dimension = "y"} : () -> index
%21 = "gpu.block_dim"() {dimension = "y"} : () -> index
%22 = muli %c1_1, %20 : index
%23 = addi %c0_0, %22 : index
%24 = muli %c1_1, %21 : index
%25 = muli %c1_3, %18 : index
%26 = addi %c0_2, %25 : index
%27 = muli %c1_3, %19 : index
scf.for %arg2 = %23 to %16 step %24 {
scf.for %arg3 = %26 to %17 step %27 {
%28 = affine.apply affine_map<(d0) -> (d0)>(%arg2)
%29 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%30 = load %13[%28, %29] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%31 = affine.apply affine_map<(d0) -> (d0)>(%arg2)
%32 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
store %cst, %13[%31, %32] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
}
}
}
}
return
}
// *** IR Dump After ConvertAffineToStandard ***
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%c4 = constant 4 : index
%c32 = constant 32 : index
%c10 = constant 10 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %c4, %3 : index
%6 = addi %c0, %5 : index
%7 = muli %c4, %4 : index
%8 = muli %c32, %1 : index
%9 = addi %c0, %8 : index
%10 = muli %c32, %2 : index
scf.for %arg0 = %6 to %c1 step %7 {
scf.for %arg1 = %9 to %c10 step %10 {
%c4_0 = constant 4 : index
%c-1 = constant -1 : index
%11 = muli %arg0, %c-1 : index
%c1_1 = constant 1 : index
%12 = addi %11, %c1_1 : index
%13 = cmpi "slt", %c4_0, %12 : index
%14 = select %13, %c4_0, %12 : index
%c32_2 = constant 32 : index
%c-1_3 = constant -1 : index
%15 = muli %arg1, %c-1_3 : index
%c10_4 = constant 10 : index
%16 = addi %15, %c10_4 : index
%17 = cmpi "slt", %c32_2, %16 : index
%18 = select %17, %c32_2, %16 : index
%19 = subview %0[%arg0, %arg1] [%14, %18] [1, 1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%20 = dim %19, 0 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%21 = dim %19, 1 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%c0_5 = constant 0 : index
%c1_6 = constant 1 : index
%c0_7 = constant 0 : index
%c1_8 = constant 1 : index
%22 = "gpu.thread_id"() {dimension = "x"} : () -> index
%23 = "gpu.block_dim"() {dimension = "x"} : () -> index
%24 = "gpu.thread_id"() {dimension = "y"} : () -> index
%25 = "gpu.block_dim"() {dimension = "y"} : () -> index
%26 = muli %c1_6, %24 : index
%27 = addi %c0_5, %26 : index
%28 = muli %c1_6, %25 : index
%29 = muli %c1_8, %22 : index
%30 = addi %c0_7, %29 : index
%31 = muli %c1_8, %23 : index
scf.for %arg2 = %27 to %20 step %28 {
scf.for %arg3 = %30 to %21 step %31 {
%32 = load %19[%arg2, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
store %cst, %19[%arg2, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
}
}
}
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c4 = constant 4 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c10 = constant 10 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %3, %c4 : index
%6 = muli %4, %c4 : index
%7 = muli %1, %c32 : index
%8 = muli %2, %c32 : index
scf.for %arg0 = %5 to %c1 step %6 {
scf.for %arg1 = %7 to %c10 step %8 {
%9 = muli %arg0, %c-1 : index
%10 = addi %9, %c1 : index
%11 = cmpi "slt", %c4, %10 : index
%12 = select %11, %c4, %10 : index
%13 = muli %arg1, %c-1 : index
%14 = addi %13, %c10 : index
%15 = cmpi "slt", %c32, %14 : index
%16 = select %15, %c32, %14 : index
%17 = subview %0[%arg0, %arg1] [%12, %16] [1, 1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%18 = "gpu.thread_id"() {dimension = "x"} : () -> index
%19 = "gpu.block_dim"() {dimension = "x"} : () -> index
%20 = "gpu.thread_id"() {dimension = "y"} : () -> index
%21 = "gpu.block_dim"() {dimension = "y"} : () -> index
scf.for %arg2 = %20 to %12 step %21 {
scf.for %arg3 = %18 to %16 step %19 {
store %cst, %17[%arg2, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c4 = constant 4 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c10 = constant 10 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %3, %c4 : index
%6 = muli %4, %c4 : index
%7 = muli %1, %c32 : index
%8 = muli %2, %c32 : index
scf.for %arg0 = %5 to %c1 step %6 {
scf.for %arg1 = %7 to %c10 step %8 {
%9 = muli %arg0, %c-1 : index
%10 = addi %9, %c1 : index
%11 = cmpi "slt", %c4, %10 : index
%12 = select %11, %c4, %10 : index
%13 = muli %arg1, %c-1 : index
%14 = addi %13, %c10 : index
%15 = cmpi "slt", %c32, %14 : index
%16 = select %15, %c32, %14 : index
%17 = subview %0[%arg0, %arg1] [%12, %16] [1, 1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%18 = "gpu.thread_id"() {dimension = "x"} : () -> index
%19 = "gpu.block_dim"() {dimension = "x"} : () -> index
%20 = "gpu.thread_id"() {dimension = "y"} : () -> index
%21 = "gpu.block_dim"() {dimension = "y"} : () -> index
scf.for %arg2 = %20 to %12 step %21 {
scf.for %arg3 = %18 to %16 step %19 {
store %cst, %17[%arg2, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ResolveShapeOpsPass ***
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c4 = constant 4 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c10 = constant 10 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %3, %c4 : index
%6 = muli %4, %c4 : index
%7 = muli %1, %c32 : index
%8 = muli %2, %c32 : index
scf.for %arg0 = %5 to %c1 step %6 {
scf.for %arg1 = %7 to %c10 step %8 {
%9 = muli %arg0, %c-1 : index
%10 = addi %9, %c1 : index
%11 = cmpi "slt", %c4, %10 : index
%12 = select %11, %c4, %10 : index
%13 = muli %arg1, %c-1 : index
%14 = addi %13, %c10 : index
%15 = cmpi "slt", %c32, %14 : index
%16 = select %15, %c32, %14 : index
%17 = subview %0[%arg0, %arg1] [%12, %16] [1, 1] : memref<1x10xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
%18 = "gpu.thread_id"() {dimension = "x"} : () -> index
%19 = "gpu.block_dim"() {dimension = "x"} : () -> index
%20 = "gpu.thread_id"() {dimension = "y"} : () -> index
%21 = "gpu.block_dim"() {dimension = "y"} : () -> index
scf.for %arg2 = %20 to %12 step %21 {
scf.for %arg3 = %18 to %16 step %19 {
store %cst, %17[%arg2, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>>
}
}
}
}
return
}
// *** IR Dump After LegalizeStandardForSPIRV ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c4 = constant 4 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c10 = constant 10 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %3, %c4 : index
%6 = muli %4, %c4 : index
%7 = muli %1, %c32 : index
%8 = muli %2, %c32 : index
scf.for %arg0 = %5 to %c1 step %6 {
scf.for %arg1 = %7 to %c10 step %8 {
%9 = muli %arg0, %c-1 : index
%10 = addi %9, %c1 : index
%11 = cmpi "slt", %c4, %10 : index
%12 = select %11, %c4, %10 : index
%13 = muli %arg1, %c-1 : index
%14 = addi %13, %c10 : index
%15 = cmpi "slt", %c32, %14 : index
%16 = select %15, %c32, %14 : index
%17 = "gpu.thread_id"() {dimension = "x"} : () -> index
%18 = "gpu.block_dim"() {dimension = "x"} : () -> index
%19 = "gpu.thread_id"() {dimension = "y"} : () -> index
%20 = "gpu.block_dim"() {dimension = "y"} : () -> index
scf.for %arg2 = %19 to %12 step %20 {
scf.for %arg3 = %17 to %16 step %18 {
%21 = addi %arg0, %arg2 : index
%22 = addi %arg1, %arg3 : index
store %cst, %0[%21, %22] : memref<1x10xf32>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c4 = constant 4 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c10 = constant 10 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %3, %c4 : index
%6 = muli %4, %c4 : index
%7 = muli %1, %c32 : index
%8 = muli %2, %c32 : index
scf.for %arg0 = %5 to %c1 step %6 {
scf.for %arg1 = %7 to %c10 step %8 {
%9 = muli %arg0, %c-1 : index
%10 = addi %9, %c1 : index
%11 = cmpi "slt", %c4, %10 : index
%12 = select %11, %c4, %10 : index
%13 = muli %arg1, %c-1 : index
%14 = addi %13, %c10 : index
%15 = cmpi "slt", %c32, %14 : index
%16 = select %15, %c32, %14 : index
%17 = "gpu.thread_id"() {dimension = "x"} : () -> index
%18 = "gpu.block_dim"() {dimension = "x"} : () -> index
%19 = "gpu.thread_id"() {dimension = "y"} : () -> index
%20 = "gpu.block_dim"() {dimension = "y"} : () -> index
scf.for %arg2 = %19 to %12 step %20 {
scf.for %arg3 = %17 to %16 step %18 {
%21 = addi %arg0, %arg2 : index
%22 = addi %arg1, %arg3 : index
store %cst, %0[%21, %22] : memref<1x10xf32>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
%c4 = constant 4 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c10 = constant 10 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x10xf32>
%1 = "gpu.block_id"() {dimension = "x"} : () -> index
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "y"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%5 = muli %3, %c4 : index
%6 = muli %4, %c4 : index
%7 = muli %1, %c32 : index
%8 = muli %2, %c32 : index
scf.for %arg0 = %5 to %c1 step %6 {
scf.for %arg1 = %7 to %c10 step %8 {
%9 = muli %arg0, %c-1 : index
%10 = addi %9, %c1 : index
%11 = cmpi "slt", %c4, %10 : index
%12 = select %11, %c4, %10 : index
%13 = muli %arg1, %c-1 : index
%14 = addi %13, %c10 : index
%15 = cmpi "slt", %c32, %14 : index
%16 = select %15, %c32, %14 : index
%17 = "gpu.thread_id"() {dimension = "x"} : () -> index
%18 = "gpu.block_dim"() {dimension = "x"} : () -> index
%19 = "gpu.thread_id"() {dimension = "y"} : () -> index
%20 = "gpu.block_dim"() {dimension = "y"} : () -> index
scf.for %arg2 = %19 to %12 step %20 {
scf.for %arg3 = %17 to %16 step %18 {
%21 = addi %arg0, %arg2 : index
%22 = addi %arg1, %arg3 : index
store %cst, %0[%21, %22] : memref<1x10xf32>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToSPIRVPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_0() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%0 = spv.constant 0.000000e+00 : f32
%1 = spv.constant 4 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant -1 : i32
%5 = spv.constant 10 : i32
%6 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%8 = spv.Load "Input" %7 : vector<3xi32>
%9 = spv.CompositeExtract %8[0 : i32] : vector<3xi32>
%10 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%17 = spv.Load "Input" %16 : vector<3xi32>
%18 = spv.CompositeExtract %17[1 : i32] : vector<3xi32>
%19 = spv.IMul %15, %1 : i32
%20 = spv.IMul %18, %1 : i32
%21 = spv.IMul %9, %3 : i32
%22 = spv.IMul %12, %3 : i32
spv.loop {
spv.Branch ^bb1(%19 : i32)
^bb1(%23: i32): // 2 preds: ^bb0, ^bb2
%24 = spv.SLessThan %23, %2 : i32
spv.BranchConditional %24, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%21 : i32)
^bb1(%26: i32): // 2 preds: ^bb0, ^bb2
%27 = spv.SLessThan %26, %5 : i32
spv.BranchConditional %27, ^bb2, ^bb3
^bb2: // pred: ^bb1
%28 = spv.IMul %23, %4 : i32
%29 = spv.IAdd %28, %2 : i32
%30 = spv.SLessThan %1, %29 : i32
%31 = spv.Select %30, %1, %29 : i1, i32
%32 = spv.IMul %26, %4 : i32
%33 = spv.IAdd %32, %5 : i32
%34 = spv.SLessThan %3, %33 : i32
%35 = spv.Select %34, %3, %33 : i1, i32
%36 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%37 = spv.Load "Input" %36 : vector<3xi32>
%38 = spv.CompositeExtract %37[0 : i32] : vector<3xi32>
%39 = spv.constant 32 : i32
%40 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%41 = spv.Load "Input" %40 : vector<3xi32>
%42 = spv.CompositeExtract %41[1 : i32] : vector<3xi32>
%43 = spv.constant 4 : i32
spv.loop {
spv.Branch ^bb1(%42 : i32)
^bb1(%45: i32): // 2 preds: ^bb0, ^bb2
%46 = spv.SLessThan %45, %31 : i32
spv.BranchConditional %46, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%38 : i32)
^bb1(%48: i32): // 2 preds: ^bb0, ^bb2
%49 = spv.SLessThan %48, %35 : i32
spv.BranchConditional %49, ^bb2, ^bb3
^bb2: // pred: ^bb1
%50 = spv.IAdd %23, %45 : i32
%51 = spv.IAdd %26, %48 : i32
%52 = spv.constant 0 : i32
%53 = spv.constant 0 : i32
%54 = spv.constant 10 : i32
%55 = spv.IMul %54, %50 : i32
%56 = spv.IAdd %53, %55 : i32
%57 = spv.constant 1 : i32
%58 = spv.IMul %57, %51 : i32
%59 = spv.IAdd %56, %58 : i32
%60 = spv.AccessChain %6[%52, %59] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %60, %0 : f32
%61 = spv.IAdd %48, %39 : i32
spv.Branch ^bb1(%61 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%47 = spv.IAdd %45, %43 : i32
spv.Branch ^bb1(%47 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%44 = spv.IAdd %26, %22 : i32
spv.Branch ^bb1(%44 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%25 = spv.IAdd %23, %20 : i32
spv.Branch ^bb1(%25 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After SPIRVLowerABIAttributes ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_0() "None" {
%0 = spv.constant 0.000000e+00 : f32
%1 = spv.constant 4 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant -1 : i32
%5 = spv.constant 10 : i32
%6 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%8 = spv.Load "Input" %7 : vector<3xi32>
%9 = spv.CompositeExtract %8[0 : i32] : vector<3xi32>
%10 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%17 = spv.Load "Input" %16 : vector<3xi32>
%18 = spv.CompositeExtract %17[1 : i32] : vector<3xi32>
%19 = spv.IMul %15, %1 : i32
%20 = spv.IMul %18, %1 : i32
%21 = spv.IMul %9, %3 : i32
%22 = spv.IMul %12, %3 : i32
spv.loop {
spv.Branch ^bb1(%19 : i32)
^bb1(%23: i32): // 2 preds: ^bb0, ^bb2
%24 = spv.SLessThan %23, %2 : i32
spv.BranchConditional %24, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%21 : i32)
^bb1(%26: i32): // 2 preds: ^bb0, ^bb2
%27 = spv.SLessThan %26, %5 : i32
spv.BranchConditional %27, ^bb2, ^bb3
^bb2: // pred: ^bb1
%28 = spv.IMul %23, %4 : i32
%29 = spv.IAdd %28, %2 : i32
%30 = spv.SLessThan %1, %29 : i32
%31 = spv.Select %30, %1, %29 : i1, i32
%32 = spv.IMul %26, %4 : i32
%33 = spv.IAdd %32, %5 : i32
%34 = spv.SLessThan %3, %33 : i32
%35 = spv.Select %34, %3, %33 : i1, i32
%36 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%37 = spv.Load "Input" %36 : vector<3xi32>
%38 = spv.CompositeExtract %37[0 : i32] : vector<3xi32>
%39 = spv.constant 32 : i32
%40 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%41 = spv.Load "Input" %40 : vector<3xi32>
%42 = spv.CompositeExtract %41[1 : i32] : vector<3xi32>
%43 = spv.constant 4 : i32
spv.loop {
spv.Branch ^bb1(%42 : i32)
^bb1(%45: i32): // 2 preds: ^bb0, ^bb2
%46 = spv.SLessThan %45, %31 : i32
spv.BranchConditional %46, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%38 : i32)
^bb1(%48: i32): // 2 preds: ^bb0, ^bb2
%49 = spv.SLessThan %48, %35 : i32
spv.BranchConditional %49, ^bb2, ^bb3
^bb2: // pred: ^bb1
%50 = spv.IAdd %23, %45 : i32
%51 = spv.IAdd %26, %48 : i32
%52 = spv.constant 0 : i32
%53 = spv.constant 0 : i32
%54 = spv.constant 10 : i32
%55 = spv.IMul %54, %50 : i32
%56 = spv.IAdd %53, %55 : i32
%57 = spv.constant 1 : i32
%58 = spv.IMul %57, %51 : i32
%59 = spv.IAdd %56, %58 : i32
%60 = spv.AccessChain %6[%52, %59] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %60, %0 : f32
%61 = spv.IAdd %48, %39 : i32
spv.Branch ^bb1(%61 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%47 = spv.IAdd %45, %43 : i32
spv.Branch ^bb1(%47 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%44 = spv.IAdd %26, %22 : i32
spv.Branch ^bb1(%44 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%25 = spv.IAdd %23, %20 : i32
spv.Branch ^bb1(%25 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_0 "LocalSize", 32, 4, 1
}
// *** IR Dump After Canonicalizer ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_0() "None" {
%0 = spv.constant 0.000000e+00 : f32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 4 : i32
%5 = spv.constant 0 : i32
%6 = spv.constant 10 : i32
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[1 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv.IMul %16, %4 : i32
%21 = spv.IMul %19, %4 : i32
%22 = spv.IMul %10, %3 : i32
%23 = spv.IMul %13, %3 : i32
spv.loop {
spv.Branch ^bb1(%20 : i32)
^bb1(%24: i32): // 2 preds: ^bb0, ^bb2
%25 = spv.SLessThan %24, %1 : i32
spv.BranchConditional %25, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%22 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %6 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
%29 = spv.IMul %24, %2 : i32
%30 = spv.IAdd %29, %1 : i32
%31 = spv.SLessThan %4, %30 : i32
%32 = spv.Select %31, %4, %30 : i1, i32
%33 = spv.IMul %27, %2 : i32
%34 = spv.IAdd %33, %6 : i32
%35 = spv.SLessThan %3, %34 : i32
%36 = spv.Select %35, %3, %34 : i1, i32
%37 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%38 = spv.Load "Input" %37 : vector<3xi32>
%39 = spv.CompositeExtract %38[0 : i32] : vector<3xi32>
%40 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%41 = spv.Load "Input" %40 : vector<3xi32>
%42 = spv.CompositeExtract %41[1 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%42 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %32 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%39 : i32)
^bb1(%47: i32): // 2 preds: ^bb0, ^bb2
%48 = spv.SLessThan %47, %36 : i32
spv.BranchConditional %48, ^bb2, ^bb3
^bb2: // pred: ^bb1
%49 = spv.IAdd %24, %44 : i32
%50 = spv.IAdd %27, %47 : i32
%51 = spv.IMul %49, %6 : i32
%52 = spv.IAdd %51, %50 : i32
%53 = spv.AccessChain %7[%5, %52] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %53, %0 : f32
%54 = spv.IAdd %47, %3 : i32
spv.Branch ^bb1(%54 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%46 = spv.IAdd %44, %4 : i32
spv.Branch ^bb1(%46 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%43 = spv.IAdd %27, %23 : i32
spv.Branch ^bb1(%43 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %24, %21 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_0 "LocalSize", 32, 4, 1
}
// *** IR Dump After CSE ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_0() "None" {
%0 = spv.constant 0.000000e+00 : f32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 4 : i32
%5 = spv.constant 0 : i32
%6 = spv.constant 10 : i32
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv.Load "Input" %8 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv.Load "Input" %11 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.IMul %15, %4 : i32
%19 = spv.IMul %17, %4 : i32
%20 = spv.IMul %10, %3 : i32
%21 = spv.IMul %13, %3 : i32
spv.loop {
spv.Branch ^bb1(%18 : i32)
^bb1(%22: i32): // 2 preds: ^bb0, ^bb2
%23 = spv.SLessThan %22, %1 : i32
spv.BranchConditional %23, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%20 : i32)
^bb1(%25: i32): // 2 preds: ^bb0, ^bb2
%26 = spv.SLessThan %25, %6 : i32
spv.BranchConditional %26, ^bb2, ^bb3
^bb2: // pred: ^bb1
%27 = spv.IMul %22, %2 : i32
%28 = spv.IAdd %27, %1 : i32
%29 = spv.SLessThan %4, %28 : i32
%30 = spv.Select %29, %4, %28 : i1, i32
%31 = spv.IMul %25, %2 : i32
%32 = spv.IAdd %31, %6 : i32
%33 = spv.SLessThan %3, %32 : i32
%34 = spv.Select %33, %3, %32 : i1, i32
%35 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%36 = spv.Load "Input" %35 : vector<3xi32>
%37 = spv.CompositeExtract %36[0 : i32] : vector<3xi32>
%38 = spv.Load "Input" %35 : vector<3xi32>
%39 = spv.CompositeExtract %38[1 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%39 : i32)
^bb1(%41: i32): // 2 preds: ^bb0, ^bb2
%42 = spv.SLessThan %41, %30 : i32
spv.BranchConditional %42, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%37 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %34 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
%46 = spv.IAdd %22, %41 : i32
%47 = spv.IAdd %25, %44 : i32
%48 = spv.IMul %46, %6 : i32
%49 = spv.IAdd %48, %47 : i32
%50 = spv.AccessChain %7[%5, %49] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %50, %0 : f32
%51 = spv.IAdd %44, %3 : i32
spv.Branch ^bb1(%51 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%43 = spv.IAdd %41, %4 : i32
spv.Branch ^bb1(%43 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%40 = spv.IAdd %25, %21 : i32
spv.Branch ^bb1(%40 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%24 = spv.IAdd %22, %19 : i32
spv.Branch ^bb1(%24 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_0 "LocalSize", 32, 4, 1
}
// *** IR Dump After SPIRVUpdateVCE ***
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_0() "None" {
%0 = spv.constant 0.000000e+00 : f32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 4 : i32
%5 = spv.constant 0 : i32
%6 = spv.constant 10 : i32
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv.Load "Input" %8 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv.Load "Input" %11 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.IMul %15, %4 : i32
%19 = spv.IMul %17, %4 : i32
%20 = spv.IMul %10, %3 : i32
%21 = spv.IMul %13, %3 : i32
spv.loop {
spv.Branch ^bb1(%18 : i32)
^bb1(%22: i32): // 2 preds: ^bb0, ^bb2
%23 = spv.SLessThan %22, %1 : i32
spv.BranchConditional %23, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%20 : i32)
^bb1(%25: i32): // 2 preds: ^bb0, ^bb2
%26 = spv.SLessThan %25, %6 : i32
spv.BranchConditional %26, ^bb2, ^bb3
^bb2: // pred: ^bb1
%27 = spv.IMul %22, %2 : i32
%28 = spv.IAdd %27, %1 : i32
%29 = spv.SLessThan %4, %28 : i32
%30 = spv.Select %29, %4, %28 : i1, i32
%31 = spv.IMul %25, %2 : i32
%32 = spv.IAdd %31, %6 : i32
%33 = spv.SLessThan %3, %32 : i32
%34 = spv.Select %33, %3, %32 : i1, i32
%35 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%36 = spv.Load "Input" %35 : vector<3xi32>
%37 = spv.CompositeExtract %36[0 : i32] : vector<3xi32>
%38 = spv.Load "Input" %35 : vector<3xi32>
%39 = spv.CompositeExtract %38[1 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%39 : i32)
^bb1(%41: i32): // 2 preds: ^bb0, ^bb2
%42 = spv.SLessThan %41, %30 : i32
spv.BranchConditional %42, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%37 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %34 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
%46 = spv.IAdd %22, %41 : i32
%47 = spv.IAdd %25, %44 : i32
%48 = spv.IMul %46, %6 : i32
%49 = spv.IAdd %48, %47 : i32
%50 = spv.AccessChain %7[%5, %49] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %50, %0 : f32
%51 = spv.IAdd %44, %3 : i32
spv.Branch ^bb1(%51 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%43 = spv.IAdd %41, %4 : i32
spv.Branch ^bb1(%43 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%40 = spv.IAdd %25, %21 : i32
spv.Branch ^bb1(%40 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%24 = spv.IAdd %22, %19 : i32
spv.Branch ^bb1(%24 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_0 "LocalSize", 32, 4, 1
}
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass ***
hal.executable @main_ex_dispatch_0 attributes {sym_visibility = "private"} {
hal.interface @legacy_io {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
hal.executable.entry_point @main_ex_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = () -> tensor<1x10xf32>}
hal.executable.target "vulkan*" {
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_0() "None" {
%0 = spv.constant 0.000000e+00 : f32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 4 : i32
%5 = spv.constant 0 : i32
%6 = spv.constant 10 : i32
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv.Load "Input" %8 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv.Load "Input" %11 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.IMul %15, %4 : i32
%19 = spv.IMul %17, %4 : i32
%20 = spv.IMul %10, %3 : i32
%21 = spv.IMul %13, %3 : i32
spv.loop {
spv.Branch ^bb1(%18 : i32)
^bb1(%22: i32): // 2 preds: ^bb0, ^bb2
%23 = spv.SLessThan %22, %1 : i32
spv.BranchConditional %23, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%20 : i32)
^bb1(%25: i32): // 2 preds: ^bb0, ^bb2
%26 = spv.SLessThan %25, %6 : i32
spv.BranchConditional %26, ^bb2, ^bb3
^bb2: // pred: ^bb1
%27 = spv.IMul %22, %2 : i32
%28 = spv.IAdd %27, %1 : i32
%29 = spv.SLessThan %4, %28 : i32
%30 = spv.Select %29, %4, %28 : i1, i32
%31 = spv.IMul %25, %2 : i32
%32 = spv.IAdd %31, %6 : i32
%33 = spv.SLessThan %3, %32 : i32
%34 = spv.Select %33, %3, %32 : i1, i32
%35 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%36 = spv.Load "Input" %35 : vector<3xi32>
%37 = spv.CompositeExtract %36[0 : i32] : vector<3xi32>
%38 = spv.Load "Input" %35 : vector<3xi32>
%39 = spv.CompositeExtract %38[1 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%39 : i32)
^bb1(%41: i32): // 2 preds: ^bb0, ^bb2
%42 = spv.SLessThan %41, %30 : i32
spv.BranchConditional %42, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%37 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %34 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
%46 = spv.IAdd %22, %41 : i32
%47 = spv.IAdd %25, %44 : i32
%48 = spv.IMul %46, %6 : i32
%49 = spv.IAdd %48, %47 : i32
%50 = spv.AccessChain %7[%5, %49] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %50, %0 : f32
%51 = spv.IAdd %44, %3 : i32
spv.Branch ^bb1(%51 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%43 = spv.IAdd %41, %4 : i32
spv.Branch ^bb1(%43 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%40 = spv.IAdd %25, %21 : i32
spv.Branch ^bb1(%40 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%24 = spv.IAdd %22, %19 : i32
spv.Branch ^bb1(%24 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_0 "LocalSize", 32, 4, 1
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @ret0, set=0, binding=0, type="StorageBuffer", access="Write|Discard"
}
}
}
}
// *** IR Dump After Inliner ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x2x2xf32>
%1 = "xla_hlo.reshape"(%0) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x4xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::TieDynamicShapesPass ***
func @main_ex_dispatch_1() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x2x2xf32>
%1 = "xla_hlo.reshape"(%0) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x4xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::MaterializeShapeCalculationsPass ***
func @main_ex_dispatch_1() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x2x2xf32>
%1 = "xla_hlo.reshape"(%0) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x4xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::HoistShapeCalculations ***
func @main_ex_dispatch_1() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x2x2xf32>
%1 = "xla_hlo.reshape"(%0) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x4xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::DecomposeHLOClampPass ***
func @main_ex_dispatch_1() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x2x2xf32>
%1 = "xla_hlo.reshape"(%0) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x4xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnTensorsPass ***
func @main_ex_dispatch_1() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x2x2xf32>
%1 = linalg.tensor_reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : tensor<1x5x2x2xf32> into tensor<1x5x4xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x4xf32>
return
}
// *** IR Dump After LinalgFusionOfTensorOps ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x2x2xf32>
%1 = linalg.tensor_reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : tensor<1x5x2x2xf32> into tensor<1x5x4xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x4xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnBuffersPass ***
func @main_ex_dispatch_1() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%c0 = constant 0 : index
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
linalg.copy(%2, %1) : memref<1x5x2x2xf32>, memref<1x5x2x2xf32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
linalg.copy(%2, %1) : memref<1x5x2x2xf32>, memref<1x5x2x2xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
linalg.copy(%2, %1) : memref<1x5x2x2xf32>, memref<1x5x2x2xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c0 = constant 0 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c1, %c5, %c2) step (%c2, %c2, %c32) {
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg0)
%4 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg1)
%5 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c2, %arg2)
%6 = subview %2[%arg0, %arg1, %arg2, %c0] [%3, %4, %5, %c2] [%c1, %c1, %c1, %c1] : memref<1x5x2x2xf32> to memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%7 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg0)
%8 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg1)
%9 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c2, %arg2)
%10 = subview %1[%arg0, %arg1, %arg2, %c0] [%7, %8, %9, %c2] [%c1, %c1, %c1, %c1] : memref<1x5x2x2xf32> to memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.copy(%6, %10) {__internal_linalg_transform__ = "workitem"} : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
scf.yield
}
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::SplitDispatchFunctionPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c0 = constant 0 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c1, %c5, %c2) step (%c2, %c2, %c32) {
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg0)
%4 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg1)
%5 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c2, %arg2)
%6 = subview %2[%arg0, %arg1, %arg2, %c0] [%3, %4, %5, %c2] [%c1, %c1, %c1, %c1] : memref<1x5x2x2xf32> to memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%7 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg0)
%8 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg1)
%9 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c2, %arg2)
%10 = subview %1[%arg0, %arg1, %arg2, %c0] [%7, %8, %9, %c2] [%c1, %c1, %c1, %c1] : memref<1x5x2x2xf32> to memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.copy(%6, %10) {__internal_linalg_transform__ = "workitem"} : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c0 = constant 0 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c1, %c5, %c2) step (%c2, %c2, %c32) {
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg0)
%4 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg1)
%5 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c2, %arg2)
%6 = subview %2[%arg0, %arg1, %arg2, %c0] [%3, %4, %5, %c2] [%c1, %c1, %c1, %c1] : memref<1x5x2x2xf32> to memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%7 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg0)
%8 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg1)
%9 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c2, %arg2)
%10 = subview %1[%arg0, %arg1, %arg2, %c0] [%7, %8, %9, %c2] [%c1, %c1, %c1, %c1] : memref<1x5x2x2xf32> to memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.copy(%6, %10) {__internal_linalg_transform__ = "workitem"} : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
scf.yield
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c0 = constant 0 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c1, %c5, %c2) step (%c2, %c2, %c32) {
%3 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg0)
%4 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg1)
%5 = affine.min affine_map<(d0) -> (32, -d0 + 2)>(%arg2)
%6 = subview %2[%arg0, %arg1, %arg2, 0] [%3, %4, %5, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%7 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg0)
%8 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg1)
%9 = affine.min affine_map<(d0) -> (32, -d0 + 2)>(%arg2)
%10 = subview %1[%arg0, %arg1, %arg2, 0] [%7, %8, %9, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
linalg.copy(%6, %10) {__internal_linalg_transform__ = "workitem"} : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>, memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToGPUPass ***
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c0 = constant 0 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %c2, %7 : index
%10 = addi %c0, %9 : index
%11 = muli %c2, %8 : index
%12 = muli %c2, %5 : index
%13 = addi %c0, %12 : index
%14 = muli %c2, %6 : index
%15 = muli %c32, %3 : index
%16 = addi %c0, %15 : index
%17 = muli %c32, %4 : index
scf.for %arg0 = %10 to %c1 step %11 {
scf.for %arg1 = %13 to %c5 step %14 {
scf.for %arg2 = %16 to %c2 step %17 {
%18 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg0)
%19 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg1)
%20 = affine.min affine_map<(d0) -> (32, -d0 + 2)>(%arg2)
%21 = subview %2[%arg0, %arg1, %arg2, 0] [%18, %19, %20, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%22 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg0)
%23 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg1)
%24 = affine.min affine_map<(d0) -> (32, -d0 + 2)>(%arg2)
%25 = subview %1[%arg0, %arg1, %arg2, 0] [%22, %23, %24, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%26 = dim %21, 0 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%27 = dim %21, 1 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%28 = dim %21, 2 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%29 = dim %21, 3 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%30 = dim %25, 0 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%31 = dim %25, 1 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%32 = dim %25, 2 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%33 = dim %25, 3 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%34 = affine.apply affine_map<(d0) -> (d0)>(%26)
%35 = affine.apply affine_map<(d0) -> (d0)>(%27)
%36 = affine.apply affine_map<(d0) -> (d0)>(%28)
%37 = affine.apply affine_map<()[s0] -> (s0)>()[%29]
%c0_0 = constant 0 : index
%c1_1 = constant 1 : index
%c0_2 = constant 0 : index
%c1_3 = constant 1 : index
%c0_4 = constant 0 : index
%c1_5 = constant 1 : index
%c0_6 = constant 0 : index
%c1_7 = constant 1 : index
%38 = "gpu.thread_id"() {dimension = "x"} : () -> index
%39 = "gpu.block_dim"() {dimension = "x"} : () -> index
%40 = "gpu.thread_id"() {dimension = "y"} : () -> index
%41 = "gpu.block_dim"() {dimension = "y"} : () -> index
%42 = "gpu.thread_id"() {dimension = "z"} : () -> index
%43 = "gpu.block_dim"() {dimension = "z"} : () -> index
%44 = muli %c1_1, %42 : index
%45 = addi %c0_0, %44 : index
%46 = muli %c1_1, %43 : index
%47 = muli %c1_3, %40 : index
%48 = addi %c0_2, %47 : index
%49 = muli %c1_3, %41 : index
%50 = muli %c1_5, %38 : index
%51 = addi %c0_4, %50 : index
%52 = muli %c1_5, %39 : index
scf.for %arg3 = %45 to %34 step %46 {
scf.for %arg4 = %48 to %35 step %49 {
scf.for %arg5 = %51 to %36 step %52 {
scf.for %arg6 = %c0_6 to %37 step %c1_7 {
%53 = load %21[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
store %53, %25[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
}
}
}
}
}
}
}
return
}
// *** IR Dump After ConvertAffineToStandard ***
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c0 = constant 0 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %c2, %7 : index
%10 = addi %c0, %9 : index
%11 = muli %c2, %8 : index
%12 = muli %c2, %5 : index
%13 = addi %c0, %12 : index
%14 = muli %c2, %6 : index
%15 = muli %c32, %3 : index
%16 = addi %c0, %15 : index
%17 = muli %c32, %4 : index
scf.for %arg0 = %10 to %c1 step %11 {
scf.for %arg1 = %13 to %c5 step %14 {
scf.for %arg2 = %16 to %c2 step %17 {
%c2_0 = constant 2 : index
%c-1 = constant -1 : index
%18 = muli %arg0, %c-1 : index
%c1_1 = constant 1 : index
%19 = addi %18, %c1_1 : index
%20 = cmpi "slt", %c2_0, %19 : index
%21 = select %20, %c2_0, %19 : index
%c2_2 = constant 2 : index
%c-1_3 = constant -1 : index
%22 = muli %arg1, %c-1_3 : index
%c5_4 = constant 5 : index
%23 = addi %22, %c5_4 : index
%24 = cmpi "slt", %c2_2, %23 : index
%25 = select %24, %c2_2, %23 : index
%c32_5 = constant 32 : index
%c-1_6 = constant -1 : index
%26 = muli %arg2, %c-1_6 : index
%c2_7 = constant 2 : index
%27 = addi %26, %c2_7 : index
%28 = cmpi "slt", %c32_5, %27 : index
%29 = select %28, %c32_5, %27 : index
%30 = subview %2[%arg0, %arg1, %arg2, 0] [%21, %25, %29, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%c2_8 = constant 2 : index
%c-1_9 = constant -1 : index
%31 = muli %arg0, %c-1_9 : index
%c1_10 = constant 1 : index
%32 = addi %31, %c1_10 : index
%33 = cmpi "slt", %c2_8, %32 : index
%34 = select %33, %c2_8, %32 : index
%c2_11 = constant 2 : index
%c-1_12 = constant -1 : index
%35 = muli %arg1, %c-1_12 : index
%c5_13 = constant 5 : index
%36 = addi %35, %c5_13 : index
%37 = cmpi "slt", %c2_11, %36 : index
%38 = select %37, %c2_11, %36 : index
%c32_14 = constant 32 : index
%c-1_15 = constant -1 : index
%39 = muli %arg2, %c-1_15 : index
%c2_16 = constant 2 : index
%40 = addi %39, %c2_16 : index
%41 = cmpi "slt", %c32_14, %40 : index
%42 = select %41, %c32_14, %40 : index
%43 = subview %1[%arg0, %arg1, %arg2, 0] [%34, %38, %42, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%44 = dim %30, 0 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%45 = dim %30, 1 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%46 = dim %30, 2 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%47 = dim %30, 3 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%48 = dim %43, 0 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%49 = dim %43, 1 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%50 = dim %43, 2 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%51 = dim %43, 3 : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%c0_17 = constant 0 : index
%c1_18 = constant 1 : index
%c0_19 = constant 0 : index
%c1_20 = constant 1 : index
%c0_21 = constant 0 : index
%c1_22 = constant 1 : index
%c0_23 = constant 0 : index
%c1_24 = constant 1 : index
%52 = "gpu.thread_id"() {dimension = "x"} : () -> index
%53 = "gpu.block_dim"() {dimension = "x"} : () -> index
%54 = "gpu.thread_id"() {dimension = "y"} : () -> index
%55 = "gpu.block_dim"() {dimension = "y"} : () -> index
%56 = "gpu.thread_id"() {dimension = "z"} : () -> index
%57 = "gpu.block_dim"() {dimension = "z"} : () -> index
%58 = muli %c1_18, %56 : index
%59 = addi %c0_17, %58 : index
%60 = muli %c1_18, %57 : index
%61 = muli %c1_20, %54 : index
%62 = addi %c0_19, %61 : index
%63 = muli %c1_20, %55 : index
%64 = muli %c1_22, %52 : index
%65 = addi %c0_21, %64 : index
%66 = muli %c1_22, %53 : index
scf.for %arg3 = %59 to %44 step %60 {
scf.for %arg4 = %62 to %45 step %63 {
scf.for %arg5 = %65 to %46 step %66 {
scf.for %arg6 = %c0_23 to %47 step %c1_24 {
%67 = load %30[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
store %67, %43[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
}
}
}
}
}
}
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %7, %c2 : index
%10 = muli %8, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %6, %c2 : index
%13 = muli %3, %c32 : index
%14 = muli %4, %c32 : index
scf.for %arg0 = %9 to %c1 step %10 {
scf.for %arg1 = %11 to %c5 step %12 {
scf.for %arg2 = %13 to %c2 step %14 {
%15 = muli %arg0, %c-1 : index
%16 = addi %15, %c1 : index
%17 = cmpi "slt", %c2, %16 : index
%18 = select %17, %c2, %16 : index
%19 = muli %arg1, %c-1 : index
%20 = addi %19, %c5 : index
%21 = cmpi "slt", %c2, %20 : index
%22 = select %21, %c2, %20 : index
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c2 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = subview %2[%arg0, %arg1, %arg2, 0] [%18, %22, %26, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%28 = muli %arg0, %c-1 : index
%29 = addi %28, %c1 : index
%30 = cmpi "slt", %c2, %29 : index
%31 = select %30, %c2, %29 : index
%32 = muli %arg1, %c-1 : index
%33 = addi %32, %c5 : index
%34 = cmpi "slt", %c2, %33 : index
%35 = select %34, %c2, %33 : index
%36 = muli %arg2, %c-1 : index
%37 = addi %36, %c2 : index
%38 = cmpi "slt", %c32, %37 : index
%39 = select %38, %c32, %37 : index
%40 = subview %1[%arg0, %arg1, %arg2, 0] [%31, %35, %39, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%41 = "gpu.thread_id"() {dimension = "x"} : () -> index
%42 = "gpu.block_dim"() {dimension = "x"} : () -> index
%43 = "gpu.thread_id"() {dimension = "y"} : () -> index
%44 = "gpu.block_dim"() {dimension = "y"} : () -> index
%45 = "gpu.thread_id"() {dimension = "z"} : () -> index
%46 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %45 to %18 step %46 {
scf.for %arg4 = %43 to %22 step %44 {
scf.for %arg5 = %41 to %26 step %42 {
scf.for %arg6 = %c0 to %c2 step %c1 {
%47 = load %27[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
store %47, %40[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
}
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %7, %c2 : index
%10 = muli %8, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %6, %c2 : index
%13 = muli %3, %c32 : index
%14 = muli %4, %c32 : index
scf.for %arg0 = %9 to %c1 step %10 {
scf.for %arg1 = %11 to %c5 step %12 {
scf.for %arg2 = %13 to %c2 step %14 {
%15 = muli %arg0, %c-1 : index
%16 = addi %15, %c1 : index
%17 = cmpi "slt", %c2, %16 : index
%18 = select %17, %c2, %16 : index
%19 = muli %arg1, %c-1 : index
%20 = addi %19, %c5 : index
%21 = cmpi "slt", %c2, %20 : index
%22 = select %21, %c2, %20 : index
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c2 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = subview %2[%arg0, %arg1, %arg2, 0] [%18, %22, %26, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%28 = subview %1[%arg0, %arg1, %arg2, 0] [%18, %22, %26, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%29 = "gpu.thread_id"() {dimension = "x"} : () -> index
%30 = "gpu.block_dim"() {dimension = "x"} : () -> index
%31 = "gpu.thread_id"() {dimension = "y"} : () -> index
%32 = "gpu.block_dim"() {dimension = "y"} : () -> index
%33 = "gpu.thread_id"() {dimension = "z"} : () -> index
%34 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %33 to %18 step %34 {
scf.for %arg4 = %31 to %22 step %32 {
scf.for %arg5 = %29 to %26 step %30 {
scf.for %arg6 = %c0 to %c2 step %c1 {
%35 = load %27[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
store %35, %28[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
}
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ResolveShapeOpsPass ***
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %7, %c2 : index
%10 = muli %8, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %6, %c2 : index
%13 = muli %3, %c32 : index
%14 = muli %4, %c32 : index
scf.for %arg0 = %9 to %c1 step %10 {
scf.for %arg1 = %11 to %c5 step %12 {
scf.for %arg2 = %13 to %c2 step %14 {
%15 = muli %arg0, %c-1 : index
%16 = addi %15, %c1 : index
%17 = cmpi "slt", %c2, %16 : index
%18 = select %17, %c2, %16 : index
%19 = muli %arg1, %c-1 : index
%20 = addi %19, %c5 : index
%21 = cmpi "slt", %c2, %20 : index
%22 = select %21, %c2, %20 : index
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c2 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = subview %2[%arg0, %arg1, %arg2, 0] [%18, %22, %26, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%28 = subview %1[%arg0, %arg1, %arg2, 0] [%18, %22, %26, 2] [1, 1, 1, 1] : memref<1x5x2x2xf32> to memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
%29 = "gpu.thread_id"() {dimension = "x"} : () -> index
%30 = "gpu.block_dim"() {dimension = "x"} : () -> index
%31 = "gpu.thread_id"() {dimension = "y"} : () -> index
%32 = "gpu.block_dim"() {dimension = "y"} : () -> index
%33 = "gpu.thread_id"() {dimension = "z"} : () -> index
%34 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %33 to %18 step %34 {
scf.for %arg4 = %31 to %22 step %32 {
scf.for %arg5 = %29 to %26 step %30 {
scf.for %arg6 = %c0 to %c2 step %c1 {
%35 = load %27[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
store %35, %28[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x2xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 20 + s0 + d1 * 4 + d2 * 2 + d3)>>
}
}
}
}
}
}
}
return
}
// *** IR Dump After LegalizeStandardForSPIRV ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %7, %c2 : index
%10 = muli %8, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %6, %c2 : index
%13 = muli %3, %c32 : index
%14 = muli %4, %c32 : index
scf.for %arg0 = %9 to %c1 step %10 {
scf.for %arg1 = %11 to %c5 step %12 {
scf.for %arg2 = %13 to %c2 step %14 {
%15 = muli %arg0, %c-1 : index
%16 = addi %15, %c1 : index
%17 = cmpi "slt", %c2, %16 : index
%18 = select %17, %c2, %16 : index
%19 = muli %arg1, %c-1 : index
%20 = addi %19, %c5 : index
%21 = cmpi "slt", %c2, %20 : index
%22 = select %21, %c2, %20 : index
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c2 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = "gpu.thread_id"() {dimension = "x"} : () -> index
%28 = "gpu.block_dim"() {dimension = "x"} : () -> index
%29 = "gpu.thread_id"() {dimension = "y"} : () -> index
%30 = "gpu.block_dim"() {dimension = "y"} : () -> index
%31 = "gpu.thread_id"() {dimension = "z"} : () -> index
%32 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %31 to %18 step %32 {
scf.for %arg4 = %29 to %22 step %30 {
scf.for %arg5 = %27 to %26 step %28 {
scf.for %arg6 = %c0 to %c2 step %c1 {
%33 = addi %arg0, %arg3 : index
%34 = addi %arg1, %arg4 : index
%35 = addi %arg2, %arg5 : index
%36 = load %2[%33, %34, %35, %arg6] : memref<1x5x2x2xf32>
%37 = addi %arg0, %arg3 : index
%38 = addi %arg1, %arg4 : index
%39 = addi %arg2, %arg5 : index
store %36, %1[%37, %38, %39, %arg6] : memref<1x5x2x2xf32>
}
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %7, %c2 : index
%10 = muli %8, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %6, %c2 : index
%13 = muli %3, %c32 : index
%14 = muli %4, %c32 : index
scf.for %arg0 = %9 to %c1 step %10 {
scf.for %arg1 = %11 to %c5 step %12 {
scf.for %arg2 = %13 to %c2 step %14 {
%15 = muli %arg0, %c-1 : index
%16 = addi %15, %c1 : index
%17 = cmpi "slt", %c2, %16 : index
%18 = select %17, %c2, %16 : index
%19 = muli %arg1, %c-1 : index
%20 = addi %19, %c5 : index
%21 = cmpi "slt", %c2, %20 : index
%22 = select %21, %c2, %20 : index
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c2 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = "gpu.thread_id"() {dimension = "x"} : () -> index
%28 = "gpu.block_dim"() {dimension = "x"} : () -> index
%29 = "gpu.thread_id"() {dimension = "y"} : () -> index
%30 = "gpu.block_dim"() {dimension = "y"} : () -> index
%31 = "gpu.thread_id"() {dimension = "z"} : () -> index
%32 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %31 to %18 step %32 {
scf.for %arg4 = %29 to %22 step %30 {
scf.for %arg5 = %27 to %26 step %28 {
scf.for %arg6 = %c0 to %c2 step %c1 {
%33 = addi %arg0, %arg3 : index
%34 = addi %arg1, %arg4 : index
%35 = addi %arg2, %arg5 : index
%36 = load %2[%33, %34, %35, %arg6] : memref<1x5x2x2xf32>
%37 = addi %arg0, %arg3 : index
%38 = addi %arg1, %arg4 : index
%39 = addi %arg2, %arg5 : index
store %36, %1[%37, %38, %39, %arg6] : memref<1x5x2x2xf32>
}
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x4xf32>
%1 = linalg.reshape %0 [affine_map<(d0, d1, d2, d3) -> (d0)>, affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : memref<1x5x4xf32> into memref<1x5x2x2xf32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x2x2xf32>
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%5 = "gpu.block_id"() {dimension = "y"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%7 = "gpu.block_id"() {dimension = "z"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = muli %7, %c2 : index
%10 = muli %8, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %6, %c2 : index
%13 = muli %3, %c32 : index
%14 = muli %4, %c32 : index
scf.for %arg0 = %9 to %c1 step %10 {
scf.for %arg1 = %11 to %c5 step %12 {
scf.for %arg2 = %13 to %c2 step %14 {
%15 = muli %arg0, %c-1 : index
%16 = addi %15, %c1 : index
%17 = cmpi "slt", %c2, %16 : index
%18 = select %17, %c2, %16 : index
%19 = muli %arg1, %c-1 : index
%20 = addi %19, %c5 : index
%21 = cmpi "slt", %c2, %20 : index
%22 = select %21, %c2, %20 : index
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c2 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = "gpu.thread_id"() {dimension = "x"} : () -> index
%28 = "gpu.block_dim"() {dimension = "x"} : () -> index
%29 = "gpu.thread_id"() {dimension = "y"} : () -> index
%30 = "gpu.block_dim"() {dimension = "y"} : () -> index
%31 = "gpu.thread_id"() {dimension = "z"} : () -> index
%32 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %31 to %18 step %32 {
scf.for %arg4 = %29 to %22 step %30 {
scf.for %arg5 = %27 to %26 step %28 {
scf.for %arg6 = %c0 to %c2 step %c1 {
%33 = addi %arg0, %arg3 : index
%34 = addi %arg1, %arg4 : index
%35 = addi %arg2, %arg5 : index
%36 = load %2[%33, %34, %35, %arg6] : memref<1x5x2x2xf32>
store %36, %1[%33, %34, %35, %arg6] : memref<1x5x2x2xf32>
}
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToSPIRVPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_1() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = spv.constant 5 : i32
%1 = spv.constant 32 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 0 : i32
%5 = spv.constant 1 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[1 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%21 = spv.Load "Input" %20 : vector<3xi32>
%22 = spv.CompositeExtract %21[2 : i32] : vector<3xi32>
%23 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[2 : i32] : vector<3xi32>
%26 = spv.IMul %22, %3 : i32
%27 = spv.IMul %25, %3 : i32
%28 = spv.IMul %16, %3 : i32
%29 = spv.IMul %19, %3 : i32
%30 = spv.IMul %10, %1 : i32
%31 = spv.IMul %13, %1 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spv.SLessThan %32, %5 : i32
spv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%35: i32): // 2 preds: ^bb0, ^bb2
%36 = spv.SLessThan %35, %0 : i32
spv.BranchConditional %36, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%38: i32): // 2 preds: ^bb0, ^bb2
%39 = spv.SLessThan %38, %3 : i32
spv.BranchConditional %39, ^bb2, ^bb3
^bb2: // pred: ^bb1
%40 = spv.IMul %32, %2 : i32
%41 = spv.IAdd %40, %5 : i32
%42 = spv.SLessThan %3, %41 : i32
%43 = spv.Select %42, %3, %41 : i1, i32
%44 = spv.IMul %35, %2 : i32
%45 = spv.IAdd %44, %0 : i32
%46 = spv.SLessThan %3, %45 : i32
%47 = spv.Select %46, %3, %45 : i1, i32
%48 = spv.IMul %38, %2 : i32
%49 = spv.IAdd %48, %3 : i32
%50 = spv.SLessThan %1, %49 : i32
%51 = spv.Select %50, %1, %49 : i1, i32
%52 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%53 = spv.Load "Input" %52 : vector<3xi32>
%54 = spv.CompositeExtract %53[0 : i32] : vector<3xi32>
%55 = spv.constant 32 : i32
%56 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%57 = spv.Load "Input" %56 : vector<3xi32>
%58 = spv.CompositeExtract %57[1 : i32] : vector<3xi32>
%59 = spv.constant 2 : i32
%60 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%61 = spv.Load "Input" %60 : vector<3xi32>
%62 = spv.CompositeExtract %61[2 : i32] : vector<3xi32>
%63 = spv.constant 2 : i32
spv.loop {
spv.Branch ^bb1(%62 : i32)
^bb1(%65: i32): // 2 preds: ^bb0, ^bb2
%66 = spv.SLessThan %65, %43 : i32
spv.BranchConditional %66, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%58 : i32)
^bb1(%68: i32): // 2 preds: ^bb0, ^bb2
%69 = spv.SLessThan %68, %47 : i32
spv.BranchConditional %69, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%71: i32): // 2 preds: ^bb0, ^bb2
%72 = spv.SLessThan %71, %51 : i32
spv.BranchConditional %72, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%4 : i32)
^bb1(%74: i32): // 2 preds: ^bb0, ^bb2
%75 = spv.SLessThan %74, %3 : i32
spv.BranchConditional %75, ^bb2, ^bb3
^bb2: // pred: ^bb1
%76 = spv.IAdd %32, %65 : i32
%77 = spv.IAdd %35, %68 : i32
%78 = spv.IAdd %38, %71 : i32
%79 = spv.constant 0 : i32
%80 = spv.constant 0 : i32
%81 = spv.constant 20 : i32
%82 = spv.IMul %81, %76 : i32
%83 = spv.IAdd %80, %82 : i32
%84 = spv.constant 4 : i32
%85 = spv.IMul %84, %77 : i32
%86 = spv.IAdd %83, %85 : i32
%87 = spv.constant 2 : i32
%88 = spv.IMul %87, %78 : i32
%89 = spv.IAdd %86, %88 : i32
%90 = spv.constant 1 : i32
%91 = spv.IMul %90, %74 : i32
%92 = spv.IAdd %89, %91 : i32
%93 = spv.AccessChain %7[%79, %92] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%94 = spv.Load "StorageBuffer" %93 : f32
%95 = spv.constant 0 : i32
%96 = spv.constant 0 : i32
%97 = spv.constant 20 : i32
%98 = spv.IMul %97, %76 : i32
%99 = spv.IAdd %96, %98 : i32
%100 = spv.constant 4 : i32
%101 = spv.IMul %100, %77 : i32
%102 = spv.IAdd %99, %101 : i32
%103 = spv.constant 2 : i32
%104 = spv.IMul %103, %78 : i32
%105 = spv.IAdd %102, %104 : i32
%106 = spv.constant 1 : i32
%107 = spv.IMul %106, %74 : i32
%108 = spv.IAdd %105, %107 : i32
%109 = spv.AccessChain %6[%95, %108] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %109, %94 : f32
%110 = spv.IAdd %74, %5 : i32
spv.Branch ^bb1(%110 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%73 = spv.IAdd %71, %55 : i32
spv.Branch ^bb1(%73 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%70 = spv.IAdd %68, %59 : i32
spv.Branch ^bb1(%70 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%67 = spv.IAdd %65, %63 : i32
spv.Branch ^bb1(%67 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%64 = spv.IAdd %38, %31 : i32
spv.Branch ^bb1(%64 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%37 = spv.IAdd %35, %29 : i32
spv.Branch ^bb1(%37 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%34 = spv.IAdd %32, %27 : i32
spv.Branch ^bb1(%34 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After SPIRVLowerABIAttributes ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_1() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant 32 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 0 : i32
%5 = spv.constant 1 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[1 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%21 = spv.Load "Input" %20 : vector<3xi32>
%22 = spv.CompositeExtract %21[2 : i32] : vector<3xi32>
%23 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[2 : i32] : vector<3xi32>
%26 = spv.IMul %22, %3 : i32
%27 = spv.IMul %25, %3 : i32
%28 = spv.IMul %16, %3 : i32
%29 = spv.IMul %19, %3 : i32
%30 = spv.IMul %10, %1 : i32
%31 = spv.IMul %13, %1 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spv.SLessThan %32, %5 : i32
spv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%35: i32): // 2 preds: ^bb0, ^bb2
%36 = spv.SLessThan %35, %0 : i32
spv.BranchConditional %36, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%38: i32): // 2 preds: ^bb0, ^bb2
%39 = spv.SLessThan %38, %3 : i32
spv.BranchConditional %39, ^bb2, ^bb3
^bb2: // pred: ^bb1
%40 = spv.IMul %32, %2 : i32
%41 = spv.IAdd %40, %5 : i32
%42 = spv.SLessThan %3, %41 : i32
%43 = spv.Select %42, %3, %41 : i1, i32
%44 = spv.IMul %35, %2 : i32
%45 = spv.IAdd %44, %0 : i32
%46 = spv.SLessThan %3, %45 : i32
%47 = spv.Select %46, %3, %45 : i1, i32
%48 = spv.IMul %38, %2 : i32
%49 = spv.IAdd %48, %3 : i32
%50 = spv.SLessThan %1, %49 : i32
%51 = spv.Select %50, %1, %49 : i1, i32
%52 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%53 = spv.Load "Input" %52 : vector<3xi32>
%54 = spv.CompositeExtract %53[0 : i32] : vector<3xi32>
%55 = spv.constant 32 : i32
%56 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%57 = spv.Load "Input" %56 : vector<3xi32>
%58 = spv.CompositeExtract %57[1 : i32] : vector<3xi32>
%59 = spv.constant 2 : i32
%60 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%61 = spv.Load "Input" %60 : vector<3xi32>
%62 = spv.CompositeExtract %61[2 : i32] : vector<3xi32>
%63 = spv.constant 2 : i32
spv.loop {
spv.Branch ^bb1(%62 : i32)
^bb1(%65: i32): // 2 preds: ^bb0, ^bb2
%66 = spv.SLessThan %65, %43 : i32
spv.BranchConditional %66, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%58 : i32)
^bb1(%68: i32): // 2 preds: ^bb0, ^bb2
%69 = spv.SLessThan %68, %47 : i32
spv.BranchConditional %69, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%71: i32): // 2 preds: ^bb0, ^bb2
%72 = spv.SLessThan %71, %51 : i32
spv.BranchConditional %72, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%4 : i32)
^bb1(%74: i32): // 2 preds: ^bb0, ^bb2
%75 = spv.SLessThan %74, %3 : i32
spv.BranchConditional %75, ^bb2, ^bb3
^bb2: // pred: ^bb1
%76 = spv.IAdd %32, %65 : i32
%77 = spv.IAdd %35, %68 : i32
%78 = spv.IAdd %38, %71 : i32
%79 = spv.constant 0 : i32
%80 = spv.constant 0 : i32
%81 = spv.constant 20 : i32
%82 = spv.IMul %81, %76 : i32
%83 = spv.IAdd %80, %82 : i32
%84 = spv.constant 4 : i32
%85 = spv.IMul %84, %77 : i32
%86 = spv.IAdd %83, %85 : i32
%87 = spv.constant 2 : i32
%88 = spv.IMul %87, %78 : i32
%89 = spv.IAdd %86, %88 : i32
%90 = spv.constant 1 : i32
%91 = spv.IMul %90, %74 : i32
%92 = spv.IAdd %89, %91 : i32
%93 = spv.AccessChain %7[%79, %92] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%94 = spv.Load "StorageBuffer" %93 : f32
%95 = spv.constant 0 : i32
%96 = spv.constant 0 : i32
%97 = spv.constant 20 : i32
%98 = spv.IMul %97, %76 : i32
%99 = spv.IAdd %96, %98 : i32
%100 = spv.constant 4 : i32
%101 = spv.IMul %100, %77 : i32
%102 = spv.IAdd %99, %101 : i32
%103 = spv.constant 2 : i32
%104 = spv.IMul %103, %78 : i32
%105 = spv.IAdd %102, %104 : i32
%106 = spv.constant 1 : i32
%107 = spv.IMul %106, %74 : i32
%108 = spv.IAdd %105, %107 : i32
%109 = spv.AccessChain %6[%95, %108] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %109, %94 : f32
%110 = spv.IAdd %74, %5 : i32
spv.Branch ^bb1(%110 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%73 = spv.IAdd %71, %55 : i32
spv.Branch ^bb1(%73 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%70 = spv.IAdd %68, %59 : i32
spv.Branch ^bb1(%70 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%67 = spv.IAdd %65, %63 : i32
spv.Branch ^bb1(%67 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%64 = spv.IAdd %38, %31 : i32
spv.Branch ^bb1(%64 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%37 = spv.IAdd %35, %29 : i32
spv.Branch ^bb1(%37 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%34 = spv.IAdd %32, %27 : i32
spv.Branch ^bb1(%34 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_1 "LocalSize", 32, 2, 2
}
// *** IR Dump After Canonicalizer ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_1() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant -1 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 0 : i32
%5 = spv.constant 20 : i32
%6 = spv.constant 4 : i32
%7 = spv.constant 2 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%17 = spv.Load "Input" %16 : vector<3xi32>
%18 = spv.CompositeExtract %17[1 : i32] : vector<3xi32>
%19 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%20 = spv.Load "Input" %19 : vector<3xi32>
%21 = spv.CompositeExtract %20[1 : i32] : vector<3xi32>
%22 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%23 = spv.Load "Input" %22 : vector<3xi32>
%24 = spv.CompositeExtract %23[2 : i32] : vector<3xi32>
%25 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%26 = spv.Load "Input" %25 : vector<3xi32>
%27 = spv.CompositeExtract %26[2 : i32] : vector<3xi32>
%28 = spv.IMul %24, %7 : i32
%29 = spv.IMul %27, %7 : i32
%30 = spv.IMul %18, %7 : i32
%31 = spv.IMul %21, %7 : i32
%32 = spv.IMul %12, %3 : i32
%33 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%34: i32): // 2 preds: ^bb0, ^bb2
%35 = spv.SLessThan %34, %2 : i32
spv.BranchConditional %35, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%37: i32): // 2 preds: ^bb0, ^bb2
%38 = spv.SLessThan %37, %0 : i32
spv.BranchConditional %38, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%32 : i32)
^bb1(%40: i32): // 2 preds: ^bb0, ^bb2
%41 = spv.SLessThan %40, %7 : i32
spv.BranchConditional %41, ^bb2, ^bb3
^bb2: // pred: ^bb1
%42 = spv.IMul %34, %1 : i32
%43 = spv.IAdd %42, %2 : i32
%44 = spv.SLessThan %7, %43 : i32
%45 = spv.Select %44, %7, %43 : i1, i32
%46 = spv.IMul %37, %1 : i32
%47 = spv.IAdd %46, %0 : i32
%48 = spv.SLessThan %7, %47 : i32
%49 = spv.Select %48, %7, %47 : i1, i32
%50 = spv.IMul %40, %1 : i32
%51 = spv.IAdd %50, %7 : i32
%52 = spv.SLessThan %3, %51 : i32
%53 = spv.Select %52, %3, %51 : i1, i32
%54 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%55 = spv.Load "Input" %54 : vector<3xi32>
%56 = spv.CompositeExtract %55[0 : i32] : vector<3xi32>
%57 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%58 = spv.Load "Input" %57 : vector<3xi32>
%59 = spv.CompositeExtract %58[1 : i32] : vector<3xi32>
%60 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%61 = spv.Load "Input" %60 : vector<3xi32>
%62 = spv.CompositeExtract %61[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%62 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %45 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%59 : i32)
^bb1(%67: i32): // 2 preds: ^bb0, ^bb2
%68 = spv.SLessThan %67, %49 : i32
spv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%70: i32): // 2 preds: ^bb0, ^bb2
%71 = spv.SLessThan %70, %53 : i32
spv.BranchConditional %71, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%4 : i32)
^bb1(%73: i32): // 2 preds: ^bb0, ^bb2
%74 = spv.SLessThan %73, %7 : i32
spv.BranchConditional %74, ^bb2, ^bb3
^bb2: // pred: ^bb1
%75 = spv.IAdd %34, %64 : i32
%76 = spv.IAdd %37, %67 : i32
%77 = spv.IAdd %40, %70 : i32
%78 = spv.IMul %75, %5 : i32
%79 = spv.IMul %76, %6 : i32
%80 = spv.IAdd %78, %79 : i32
%81 = spv.IMul %77, %7 : i32
%82 = spv.IAdd %80, %81 : i32
%83 = spv.IAdd %82, %73 : i32
%84 = spv.AccessChain %9[%4, %83] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%85 = spv.Load "StorageBuffer" %84 : f32
%86 = spv.IMul %75, %5 : i32
%87 = spv.IMul %76, %6 : i32
%88 = spv.IAdd %86, %87 : i32
%89 = spv.IMul %77, %7 : i32
%90 = spv.IAdd %88, %89 : i32
%91 = spv.IAdd %90, %73 : i32
%92 = spv.AccessChain %8[%4, %91] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %92, %85 : f32
%93 = spv.IAdd %73, %2 : i32
spv.Branch ^bb1(%93 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%72 = spv.IAdd %70, %3 : i32
spv.Branch ^bb1(%72 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%69 = spv.IAdd %67, %7 : i32
spv.Branch ^bb1(%69 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%66 = spv.IAdd %64, %7 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %40, %33 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%39 = spv.IAdd %37, %31 : i32
spv.Branch ^bb1(%39 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%36 = spv.IAdd %34, %29 : i32
spv.Branch ^bb1(%36 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_1 "LocalSize", 32, 2, 2
}
// *** IR Dump After CSE ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_1() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant -1 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 0 : i32
%5 = spv.constant 20 : i32
%6 = spv.constant 4 : i32
%7 = spv.constant 2 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.Load "Input" %10 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %13 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv.Load "Input" %10 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.Load "Input" %13 : vector<3xi32>
%23 = spv.CompositeExtract %22[2 : i32] : vector<3xi32>
%24 = spv.IMul %21, %7 : i32
%25 = spv.IMul %23, %7 : i32
%26 = spv.IMul %17, %7 : i32
%27 = spv.IMul %19, %7 : i32
%28 = spv.IMul %12, %3 : i32
%29 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %2 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %0 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%36: i32): // 2 preds: ^bb0, ^bb2
%37 = spv.SLessThan %36, %7 : i32
spv.BranchConditional %37, ^bb2, ^bb3
^bb2: // pred: ^bb1
%38 = spv.IMul %30, %1 : i32
%39 = spv.IAdd %38, %2 : i32
%40 = spv.SLessThan %7, %39 : i32
%41 = spv.Select %40, %7, %39 : i1, i32
%42 = spv.IMul %33, %1 : i32
%43 = spv.IAdd %42, %0 : i32
%44 = spv.SLessThan %7, %43 : i32
%45 = spv.Select %44, %7, %43 : i1, i32
%46 = spv.IMul %36, %1 : i32
%47 = spv.IAdd %46, %7 : i32
%48 = spv.SLessThan %3, %47 : i32
%49 = spv.Select %48, %3, %47 : i1, i32
%50 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%51 = spv.Load "Input" %50 : vector<3xi32>
%52 = spv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spv.Load "Input" %50 : vector<3xi32>
%54 = spv.CompositeExtract %53[1 : i32] : vector<3xi32>
%55 = spv.Load "Input" %50 : vector<3xi32>
%56 = spv.CompositeExtract %55[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%58: i32): // 2 preds: ^bb0, ^bb2
%59 = spv.SLessThan %58, %41 : i32
spv.BranchConditional %59, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%61: i32): // 2 preds: ^bb0, ^bb2
%62 = spv.SLessThan %61, %45 : i32
spv.BranchConditional %62, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %49 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%4 : i32)
^bb1(%67: i32): // 2 preds: ^bb0, ^bb2
%68 = spv.SLessThan %67, %7 : i32
spv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
%69 = spv.IAdd %30, %58 : i32
%70 = spv.IAdd %33, %61 : i32
%71 = spv.IAdd %36, %64 : i32
%72 = spv.IMul %69, %5 : i32
%73 = spv.IMul %70, %6 : i32
%74 = spv.IAdd %72, %73 : i32
%75 = spv.IMul %71, %7 : i32
%76 = spv.IAdd %74, %75 : i32
%77 = spv.IAdd %76, %67 : i32
%78 = spv.AccessChain %9[%4, %77] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%79 = spv.Load "StorageBuffer" %78 : f32
%80 = spv.AccessChain %8[%4, %77] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %80, %79 : f32
%81 = spv.IAdd %67, %2 : i32
spv.Branch ^bb1(%81 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%66 = spv.IAdd %64, %3 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %61, %7 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%60 = spv.IAdd %58, %7 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%57 = spv.IAdd %36, %29 : i32
spv.Branch ^bb1(%57 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%35 = spv.IAdd %33, %27 : i32
spv.Branch ^bb1(%35 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %25 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_1 "LocalSize", 32, 2, 2
}
// *** IR Dump After SPIRVUpdateVCE ***
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_1() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant -1 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 0 : i32
%5 = spv.constant 20 : i32
%6 = spv.constant 4 : i32
%7 = spv.constant 2 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.Load "Input" %10 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %13 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv.Load "Input" %10 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.Load "Input" %13 : vector<3xi32>
%23 = spv.CompositeExtract %22[2 : i32] : vector<3xi32>
%24 = spv.IMul %21, %7 : i32
%25 = spv.IMul %23, %7 : i32
%26 = spv.IMul %17, %7 : i32
%27 = spv.IMul %19, %7 : i32
%28 = spv.IMul %12, %3 : i32
%29 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %2 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %0 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%36: i32): // 2 preds: ^bb0, ^bb2
%37 = spv.SLessThan %36, %7 : i32
spv.BranchConditional %37, ^bb2, ^bb3
^bb2: // pred: ^bb1
%38 = spv.IMul %30, %1 : i32
%39 = spv.IAdd %38, %2 : i32
%40 = spv.SLessThan %7, %39 : i32
%41 = spv.Select %40, %7, %39 : i1, i32
%42 = spv.IMul %33, %1 : i32
%43 = spv.IAdd %42, %0 : i32
%44 = spv.SLessThan %7, %43 : i32
%45 = spv.Select %44, %7, %43 : i1, i32
%46 = spv.IMul %36, %1 : i32
%47 = spv.IAdd %46, %7 : i32
%48 = spv.SLessThan %3, %47 : i32
%49 = spv.Select %48, %3, %47 : i1, i32
%50 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%51 = spv.Load "Input" %50 : vector<3xi32>
%52 = spv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spv.Load "Input" %50 : vector<3xi32>
%54 = spv.CompositeExtract %53[1 : i32] : vector<3xi32>
%55 = spv.Load "Input" %50 : vector<3xi32>
%56 = spv.CompositeExtract %55[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%58: i32): // 2 preds: ^bb0, ^bb2
%59 = spv.SLessThan %58, %41 : i32
spv.BranchConditional %59, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%61: i32): // 2 preds: ^bb0, ^bb2
%62 = spv.SLessThan %61, %45 : i32
spv.BranchConditional %62, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %49 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%4 : i32)
^bb1(%67: i32): // 2 preds: ^bb0, ^bb2
%68 = spv.SLessThan %67, %7 : i32
spv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
%69 = spv.IAdd %30, %58 : i32
%70 = spv.IAdd %33, %61 : i32
%71 = spv.IAdd %36, %64 : i32
%72 = spv.IMul %69, %5 : i32
%73 = spv.IMul %70, %6 : i32
%74 = spv.IAdd %72, %73 : i32
%75 = spv.IMul %71, %7 : i32
%76 = spv.IAdd %74, %75 : i32
%77 = spv.IAdd %76, %67 : i32
%78 = spv.AccessChain %9[%4, %77] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%79 = spv.Load "StorageBuffer" %78 : f32
%80 = spv.AccessChain %8[%4, %77] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %80, %79 : f32
%81 = spv.IAdd %67, %2 : i32
spv.Branch ^bb1(%81 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%66 = spv.IAdd %64, %3 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %61, %7 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%60 = spv.IAdd %58, %7 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%57 = spv.IAdd %36, %29 : i32
spv.Branch ^bb1(%57 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%35 = spv.IAdd %33, %27 : i32
spv.Branch ^bb1(%35 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %25 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_1 "LocalSize", 32, 2, 2
}
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass ***
hal.executable @main_ex_dispatch_1 attributes {sym_visibility = "private"} {
hal.interface @legacy_io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
hal.executable.entry_point @main_ex_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>}
hal.executable.target "vulkan*" {
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_1() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant -1 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 0 : i32
%5 = spv.constant 20 : i32
%6 = spv.constant 4 : i32
%7 = spv.constant 2 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.Load "Input" %10 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %13 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv.Load "Input" %10 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.Load "Input" %13 : vector<3xi32>
%23 = spv.CompositeExtract %22[2 : i32] : vector<3xi32>
%24 = spv.IMul %21, %7 : i32
%25 = spv.IMul %23, %7 : i32
%26 = spv.IMul %17, %7 : i32
%27 = spv.IMul %19, %7 : i32
%28 = spv.IMul %12, %3 : i32
%29 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %2 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %0 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%36: i32): // 2 preds: ^bb0, ^bb2
%37 = spv.SLessThan %36, %7 : i32
spv.BranchConditional %37, ^bb2, ^bb3
^bb2: // pred: ^bb1
%38 = spv.IMul %30, %1 : i32
%39 = spv.IAdd %38, %2 : i32
%40 = spv.SLessThan %7, %39 : i32
%41 = spv.Select %40, %7, %39 : i1, i32
%42 = spv.IMul %33, %1 : i32
%43 = spv.IAdd %42, %0 : i32
%44 = spv.SLessThan %7, %43 : i32
%45 = spv.Select %44, %7, %43 : i1, i32
%46 = spv.IMul %36, %1 : i32
%47 = spv.IAdd %46, %7 : i32
%48 = spv.SLessThan %3, %47 : i32
%49 = spv.Select %48, %3, %47 : i1, i32
%50 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%51 = spv.Load "Input" %50 : vector<3xi32>
%52 = spv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spv.Load "Input" %50 : vector<3xi32>
%54 = spv.CompositeExtract %53[1 : i32] : vector<3xi32>
%55 = spv.Load "Input" %50 : vector<3xi32>
%56 = spv.CompositeExtract %55[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%58: i32): // 2 preds: ^bb0, ^bb2
%59 = spv.SLessThan %58, %41 : i32
spv.BranchConditional %59, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%61: i32): // 2 preds: ^bb0, ^bb2
%62 = spv.SLessThan %61, %45 : i32
spv.BranchConditional %62, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %49 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%4 : i32)
^bb1(%67: i32): // 2 preds: ^bb0, ^bb2
%68 = spv.SLessThan %67, %7 : i32
spv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
%69 = spv.IAdd %30, %58 : i32
%70 = spv.IAdd %33, %61 : i32
%71 = spv.IAdd %36, %64 : i32
%72 = spv.IMul %69, %5 : i32
%73 = spv.IMul %70, %6 : i32
%74 = spv.IAdd %72, %73 : i32
%75 = spv.IMul %71, %7 : i32
%76 = spv.IAdd %74, %75 : i32
%77 = spv.IAdd %76, %67 : i32
%78 = spv.AccessChain %9[%4, %77] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%79 = spv.Load "StorageBuffer" %78 : f32
%80 = spv.AccessChain %8[%4, %77] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %80, %79 : f32
%81 = spv.IAdd %67, %2 : i32
spv.Branch ^bb1(%81 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%66 = spv.IAdd %64, %3 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %61, %7 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%60 = spv.IAdd %58, %7 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%57 = spv.IAdd %36, %29 : i32
spv.Branch ^bb1(%57 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%35 = spv.IAdd %33, %27 : i32
spv.Branch ^bb1(%35 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %25 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_1 "LocalSize", 32, 2, 2
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
}
}
// *** IR Dump After Inliner ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x4xf32>
%1 = "xla_hlo.pad"(%0, %cst) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::TieDynamicShapesPass ***
func @main_ex_dispatch_2() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x4xf32>
%1 = "xla_hlo.pad"(%0, %cst) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::MaterializeShapeCalculationsPass ***
func @main_ex_dispatch_2() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x4xf32>
%1 = "xla_hlo.pad"(%0, %cst) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::HoistShapeCalculations ***
func @main_ex_dispatch_2() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x4xf32>
%1 = "xla_hlo.pad"(%0, %cst) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::DecomposeHLOClampPass ***
func @main_ex_dispatch_2() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x4xf32>
%1 = "xla_hlo.pad"(%0, %cst) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnTensorsPass ***
func @main_ex_dispatch_2() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x4xf32>
%1 = "xla_hlo.pad"(%0, %cst) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x64xf32>
return
}
// *** IR Dump After LinalgFusionOfTensorOps ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x4xf32>
%1 = "xla_hlo.pad"(%0, %cst) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1x5x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnBuffersPass ***
func @main_ex_dispatch_2() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%cst_0 = constant 0.000000e+00 : f32
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "no-tile"} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
%c0_1 = constant 0 : index
%c0_2 = constant 0 : index
%c0_3 = constant 0 : index
linalg.yield %arg3 : f32
}: memref<1x5x4xf32>, memref<1x5x64xf32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "no-tile"} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<1x5x4xf32>, memref<1x5x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "no-tile"} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<1x5x4xf32>, memref<1x5x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "no-tile"} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<1x5x4xf32>, memref<1x5x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::SplitDispatchFunctionPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "no-tile"} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<1x5x4xf32>, memref<1x5x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "no-tile"} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<1x5x4xf32>, memref<1x5x64xf32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "no-tile"} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<1x5x4xf32>, memref<1x5x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToGPUPass ***
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = dim %1, 0 : memref<1x5x4xf32>
%3 = dim %1, 1 : memref<1x5x4xf32>
%4 = dim %1, 2 : memref<1x5x4xf32>
%5 = dim %0, 0 : memref<1x5x64xf32>
%6 = dim %0, 1 : memref<1x5x64xf32>
%7 = dim %0, 2 : memref<1x5x64xf32>
%8 = affine.apply affine_map<()[s0] -> (s0)>()[%2]
%9 = affine.apply affine_map<()[s0] -> (s0)>()[%3]
%10 = affine.apply affine_map<()[s0] -> (s0)>()[%4]
%c0 = constant 0 : index
%c1 = constant 1 : index
%c0_0 = constant 0 : index
%c1_1 = constant 1 : index
%c0_2 = constant 0 : index
%c1_3 = constant 1 : index
%11 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%12 = "gpu.block_id"() {dimension = "x"} : () -> index
%13 = "gpu.block_dim"() {dimension = "x"} : () -> index
%14 = "gpu.thread_id"() {dimension = "x"} : () -> index
%15 = muli %12, %13 : index
%16 = addi %15, %14 : index
%17 = muli %13, %11 : index
%18 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%19 = "gpu.block_id"() {dimension = "y"} : () -> index
%20 = "gpu.block_dim"() {dimension = "y"} : () -> index
%21 = "gpu.thread_id"() {dimension = "y"} : () -> index
%22 = muli %19, %20 : index
%23 = addi %22, %21 : index
%24 = muli %20, %18 : index
%25 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%26 = "gpu.block_id"() {dimension = "z"} : () -> index
%27 = "gpu.block_dim"() {dimension = "z"} : () -> index
%28 = "gpu.thread_id"() {dimension = "z"} : () -> index
%29 = muli %26, %27 : index
%30 = addi %29, %28 : index
%31 = muli %27, %25 : index
%32 = muli %c1, %30 : index
%33 = addi %c0, %32 : index
%34 = muli %c1, %31 : index
%35 = muli %c1_1, %23 : index
%36 = addi %c0_0, %35 : index
%37 = muli %c1_1, %24 : index
%38 = muli %c1_3, %16 : index
%39 = addi %c0_2, %38 : index
%40 = muli %c1_3, %17 : index
scf.for %arg0 = %33 to %8 step %34 {
scf.for %arg1 = %36 to %9 step %37 {
scf.for %arg2 = %39 to %10 step %40 {
%41 = affine.apply affine_map<(d0) -> (d0)>(%arg0)
%42 = affine.apply affine_map<(d0) -> (d0)>(%arg1)
%43 = affine.apply affine_map<(d0) -> (d0)>(%arg2)
%44 = load %1[%41, %42, %43] : memref<1x5x4xf32>
%45 = affine.apply affine_map<(d0) -> (d0)>(%arg0)
%46 = affine.apply affine_map<(d0) -> (d0)>(%arg1)
%47 = affine.apply affine_map<(d0) -> (d0)>(%arg2)
%48 = load %0[%45, %46, %47] : memref<1x5x64xf32>
%49 = affine.apply affine_map<(d0) -> (d0)>(%arg0)
%50 = affine.apply affine_map<(d0) -> (d0)>(%arg1)
%51 = affine.apply affine_map<(d0) -> (d0)>(%arg2)
store %44, %0[%49, %50, %51] : memref<1x5x64xf32>
}
}
}
return
}
// *** IR Dump After ConvertAffineToStandard ***
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = dim %1, 0 : memref<1x5x4xf32>
%3 = dim %1, 1 : memref<1x5x4xf32>
%4 = dim %1, 2 : memref<1x5x4xf32>
%5 = dim %0, 0 : memref<1x5x64xf32>
%6 = dim %0, 1 : memref<1x5x64xf32>
%7 = dim %0, 2 : memref<1x5x64xf32>
%c0 = constant 0 : index
%c1 = constant 1 : index
%c0_0 = constant 0 : index
%c1_1 = constant 1 : index
%c0_2 = constant 0 : index
%c1_3 = constant 1 : index
%8 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%9 = "gpu.block_id"() {dimension = "x"} : () -> index
%10 = "gpu.block_dim"() {dimension = "x"} : () -> index
%11 = "gpu.thread_id"() {dimension = "x"} : () -> index
%12 = muli %9, %10 : index
%13 = addi %12, %11 : index
%14 = muli %10, %8 : index
%15 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%16 = "gpu.block_id"() {dimension = "y"} : () -> index
%17 = "gpu.block_dim"() {dimension = "y"} : () -> index
%18 = "gpu.thread_id"() {dimension = "y"} : () -> index
%19 = muli %16, %17 : index
%20 = addi %19, %18 : index
%21 = muli %17, %15 : index
%22 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%23 = "gpu.block_id"() {dimension = "z"} : () -> index
%24 = "gpu.block_dim"() {dimension = "z"} : () -> index
%25 = "gpu.thread_id"() {dimension = "z"} : () -> index
%26 = muli %23, %24 : index
%27 = addi %26, %25 : index
%28 = muli %24, %22 : index
%29 = muli %c1, %27 : index
%30 = addi %c0, %29 : index
%31 = muli %c1, %28 : index
%32 = muli %c1_1, %20 : index
%33 = addi %c0_0, %32 : index
%34 = muli %c1_1, %21 : index
%35 = muli %c1_3, %13 : index
%36 = addi %c0_2, %35 : index
%37 = muli %c1_3, %14 : index
scf.for %arg0 = %30 to %2 step %31 {
scf.for %arg1 = %33 to %3 step %34 {
scf.for %arg2 = %36 to %4 step %37 {
%38 = load %1[%arg0, %arg1, %arg2] : memref<1x5x4xf32>
%39 = load %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
store %38, %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
}
}
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c1 = constant 1 : index
%c5 = constant 5 : index
%c4 = constant 4 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index
%6 = muli %3, %4 : index
%7 = addi %6, %5 : index
%8 = muli %4, %2 : index
%9 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%10 = "gpu.block_id"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "y"} : () -> index
%12 = "gpu.thread_id"() {dimension = "y"} : () -> index
%13 = muli %10, %11 : index
%14 = addi %13, %12 : index
%15 = muli %11, %9 : index
%16 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%17 = "gpu.block_id"() {dimension = "z"} : () -> index
%18 = "gpu.block_dim"() {dimension = "z"} : () -> index
%19 = "gpu.thread_id"() {dimension = "z"} : () -> index
%20 = muli %17, %18 : index
%21 = addi %20, %19 : index
%22 = muli %18, %16 : index
scf.for %arg0 = %21 to %c1 step %22 {
scf.for %arg1 = %14 to %c5 step %15 {
scf.for %arg2 = %7 to %c4 step %8 {
%23 = load %1[%arg0, %arg1, %arg2] : memref<1x5x4xf32>
store %23, %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c1 = constant 1 : index
%c5 = constant 5 : index
%c4 = constant 4 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index
%6 = muli %3, %4 : index
%7 = addi %6, %5 : index
%8 = muli %4, %2 : index
%9 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%10 = "gpu.block_id"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "y"} : () -> index
%12 = "gpu.thread_id"() {dimension = "y"} : () -> index
%13 = muli %10, %11 : index
%14 = addi %13, %12 : index
%15 = muli %11, %9 : index
%16 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%17 = "gpu.block_id"() {dimension = "z"} : () -> index
%18 = "gpu.block_dim"() {dimension = "z"} : () -> index
%19 = "gpu.thread_id"() {dimension = "z"} : () -> index
%20 = muli %17, %18 : index
%21 = addi %20, %19 : index
%22 = muli %18, %16 : index
scf.for %arg0 = %21 to %c1 step %22 {
scf.for %arg1 = %14 to %c5 step %15 {
scf.for %arg2 = %7 to %c4 step %8 {
%23 = load %1[%arg0, %arg1, %arg2] : memref<1x5x4xf32>
store %23, %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ResolveShapeOpsPass ***
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c1 = constant 1 : index
%c5 = constant 5 : index
%c4 = constant 4 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index
%6 = muli %3, %4 : index
%7 = addi %6, %5 : index
%8 = muli %4, %2 : index
%9 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%10 = "gpu.block_id"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "y"} : () -> index
%12 = "gpu.thread_id"() {dimension = "y"} : () -> index
%13 = muli %10, %11 : index
%14 = addi %13, %12 : index
%15 = muli %11, %9 : index
%16 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%17 = "gpu.block_id"() {dimension = "z"} : () -> index
%18 = "gpu.block_dim"() {dimension = "z"} : () -> index
%19 = "gpu.thread_id"() {dimension = "z"} : () -> index
%20 = muli %17, %18 : index
%21 = addi %20, %19 : index
%22 = muli %18, %16 : index
scf.for %arg0 = %21 to %c1 step %22 {
scf.for %arg1 = %14 to %c5 step %15 {
scf.for %arg2 = %7 to %c4 step %8 {
%23 = load %1[%arg0, %arg1, %arg2] : memref<1x5x4xf32>
store %23, %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
}
}
}
return
}
// *** IR Dump After LegalizeStandardForSPIRV ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c1 = constant 1 : index
%c5 = constant 5 : index
%c4 = constant 4 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index
%6 = muli %3, %4 : index
%7 = addi %6, %5 : index
%8 = muli %4, %2 : index
%9 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%10 = "gpu.block_id"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "y"} : () -> index
%12 = "gpu.thread_id"() {dimension = "y"} : () -> index
%13 = muli %10, %11 : index
%14 = addi %13, %12 : index
%15 = muli %11, %9 : index
%16 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%17 = "gpu.block_id"() {dimension = "z"} : () -> index
%18 = "gpu.block_dim"() {dimension = "z"} : () -> index
%19 = "gpu.thread_id"() {dimension = "z"} : () -> index
%20 = muli %17, %18 : index
%21 = addi %20, %19 : index
%22 = muli %18, %16 : index
scf.for %arg0 = %21 to %c1 step %22 {
scf.for %arg1 = %14 to %c5 step %15 {
scf.for %arg2 = %7 to %c4 step %8 {
%23 = load %1[%arg0, %arg1, %arg2] : memref<1x5x4xf32>
store %23, %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c1 = constant 1 : index
%c5 = constant 5 : index
%c4 = constant 4 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index
%6 = muli %3, %4 : index
%7 = addi %6, %5 : index
%8 = muli %4, %2 : index
%9 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%10 = "gpu.block_id"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "y"} : () -> index
%12 = "gpu.thread_id"() {dimension = "y"} : () -> index
%13 = muli %10, %11 : index
%14 = addi %13, %12 : index
%15 = muli %11, %9 : index
%16 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%17 = "gpu.block_id"() {dimension = "z"} : () -> index
%18 = "gpu.block_dim"() {dimension = "z"} : () -> index
%19 = "gpu.thread_id"() {dimension = "z"} : () -> index
%20 = muli %17, %18 : index
%21 = addi %20, %19 : index
%22 = muli %18, %16 : index
scf.for %arg0 = %21 to %c1 step %22 {
scf.for %arg1 = %14 to %c5 step %15 {
scf.for %arg2 = %7 to %c4 step %8 {
%23 = load %1[%arg0, %arg1, %arg2] : memref<1x5x4xf32>
store %23, %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_2() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c1 = constant 1 : index
%c5 = constant 5 : index
%c4 = constant 4 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1x5x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x4xf32>
%2 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%3 = "gpu.block_id"() {dimension = "x"} : () -> index
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index
%6 = muli %3, %4 : index
%7 = addi %6, %5 : index
%8 = muli %4, %2 : index
%9 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%10 = "gpu.block_id"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "y"} : () -> index
%12 = "gpu.thread_id"() {dimension = "y"} : () -> index
%13 = muli %10, %11 : index
%14 = addi %13, %12 : index
%15 = muli %11, %9 : index
%16 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%17 = "gpu.block_id"() {dimension = "z"} : () -> index
%18 = "gpu.block_dim"() {dimension = "z"} : () -> index
%19 = "gpu.thread_id"() {dimension = "z"} : () -> index
%20 = muli %17, %18 : index
%21 = addi %20, %19 : index
%22 = muli %18, %16 : index
scf.for %arg0 = %21 to %c1 step %22 {
scf.for %arg1 = %14 to %c5 step %15 {
scf.for %arg2 = %7 to %c4 step %8 {
%23 = load %1[%arg0, %arg1, %arg2] : memref<1x5x4xf32>
store %23, %0[%arg0, %arg1, %arg2] : memref<1x5x64xf32>
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToSPIRVPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_2() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = spv.constant 1 : i32
%1 = spv.constant 5 : i32
%2 = spv.constant 4 : i32
%3 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%4 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%5 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%6 = spv.Load "Input" %5 : vector<3xi32>
%7 = spv.CompositeExtract %6[0 : i32] : vector<3xi32>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv.constant 32 : i32
%12 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%13 = spv.Load "Input" %12 : vector<3xi32>
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32>
%15 = spv.IMul %10, %11 : i32
%16 = spv.IAdd %15, %14 : i32
%17 = spv.IMul %11, %7 : i32
%18 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%19 = spv.Load "Input" %18 : vector<3xi32>
%20 = spv.CompositeExtract %19[1 : i32] : vector<3xi32>
%21 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%22 = spv.Load "Input" %21 : vector<3xi32>
%23 = spv.CompositeExtract %22[1 : i32] : vector<3xi32>
%24 = spv.constant 2 : i32
%25 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%26 = spv.Load "Input" %25 : vector<3xi32>
%27 = spv.CompositeExtract %26[1 : i32] : vector<3xi32>
%28 = spv.IMul %23, %24 : i32
%29 = spv.IAdd %28, %27 : i32
%30 = spv.IMul %24, %20 : i32
%31 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%32 = spv.Load "Input" %31 : vector<3xi32>
%33 = spv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%35 = spv.Load "Input" %34 : vector<3xi32>
%36 = spv.CompositeExtract %35[2 : i32] : vector<3xi32>
%37 = spv.constant 2 : i32
%38 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%39 = spv.Load "Input" %38 : vector<3xi32>
%40 = spv.CompositeExtract %39[2 : i32] : vector<3xi32>
%41 = spv.IMul %36, %37 : i32
%42 = spv.IAdd %41, %40 : i32
%43 = spv.IMul %37, %33 : i32
spv.loop {
spv.Branch ^bb1(%42 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %0 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%29 : i32)
^bb1(%47: i32): // 2 preds: ^bb0, ^bb2
%48 = spv.SLessThan %47, %1 : i32
spv.BranchConditional %48, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%16 : i32)
^bb1(%50: i32): // 2 preds: ^bb0, ^bb2
%51 = spv.SLessThan %50, %2 : i32
spv.BranchConditional %51, ^bb2, ^bb3
^bb2: // pred: ^bb1
%52 = spv.constant 0 : i32
%53 = spv.constant 0 : i32
%54 = spv.constant 20 : i32
%55 = spv.IMul %54, %44 : i32
%56 = spv.IAdd %53, %55 : i32
%57 = spv.constant 4 : i32
%58 = spv.IMul %57, %47 : i32
%59 = spv.IAdd %56, %58 : i32
%60 = spv.constant 1 : i32
%61 = spv.IMul %60, %50 : i32
%62 = spv.IAdd %59, %61 : i32
%63 = spv.AccessChain %4[%52, %62] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%64 = spv.Load "StorageBuffer" %63 : f32
%65 = spv.constant 0 : i32
%66 = spv.constant 0 : i32
%67 = spv.constant 320 : i32
%68 = spv.IMul %67, %44 : i32
%69 = spv.IAdd %66, %68 : i32
%70 = spv.constant 64 : i32
%71 = spv.IMul %70, %47 : i32
%72 = spv.IAdd %69, %71 : i32
%73 = spv.constant 1 : i32
%74 = spv.IMul %73, %50 : i32
%75 = spv.IAdd %72, %74 : i32
%76 = spv.AccessChain %3[%65, %75] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %76, %64 : f32
%77 = spv.IAdd %50, %17 : i32
spv.Branch ^bb1(%77 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%49 = spv.IAdd %47, %30 : i32
spv.Branch ^bb1(%49 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%46 = spv.IAdd %44, %43 : i32
spv.Branch ^bb1(%46 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After SPIRVLowerABIAttributes ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_2() "None" {
%0 = spv.constant 1 : i32
%1 = spv.constant 5 : i32
%2 = spv.constant 4 : i32
%3 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%4 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%5 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%6 = spv.Load "Input" %5 : vector<3xi32>
%7 = spv.CompositeExtract %6[0 : i32] : vector<3xi32>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv.constant 32 : i32
%12 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%13 = spv.Load "Input" %12 : vector<3xi32>
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32>
%15 = spv.IMul %10, %11 : i32
%16 = spv.IAdd %15, %14 : i32
%17 = spv.IMul %11, %7 : i32
%18 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%19 = spv.Load "Input" %18 : vector<3xi32>
%20 = spv.CompositeExtract %19[1 : i32] : vector<3xi32>
%21 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%22 = spv.Load "Input" %21 : vector<3xi32>
%23 = spv.CompositeExtract %22[1 : i32] : vector<3xi32>
%24 = spv.constant 2 : i32
%25 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%26 = spv.Load "Input" %25 : vector<3xi32>
%27 = spv.CompositeExtract %26[1 : i32] : vector<3xi32>
%28 = spv.IMul %23, %24 : i32
%29 = spv.IAdd %28, %27 : i32
%30 = spv.IMul %24, %20 : i32
%31 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%32 = spv.Load "Input" %31 : vector<3xi32>
%33 = spv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%35 = spv.Load "Input" %34 : vector<3xi32>
%36 = spv.CompositeExtract %35[2 : i32] : vector<3xi32>
%37 = spv.constant 2 : i32
%38 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%39 = spv.Load "Input" %38 : vector<3xi32>
%40 = spv.CompositeExtract %39[2 : i32] : vector<3xi32>
%41 = spv.IMul %36, %37 : i32
%42 = spv.IAdd %41, %40 : i32
%43 = spv.IMul %37, %33 : i32
spv.loop {
spv.Branch ^bb1(%42 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %0 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%29 : i32)
^bb1(%47: i32): // 2 preds: ^bb0, ^bb2
%48 = spv.SLessThan %47, %1 : i32
spv.BranchConditional %48, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%16 : i32)
^bb1(%50: i32): // 2 preds: ^bb0, ^bb2
%51 = spv.SLessThan %50, %2 : i32
spv.BranchConditional %51, ^bb2, ^bb3
^bb2: // pred: ^bb1
%52 = spv.constant 0 : i32
%53 = spv.constant 0 : i32
%54 = spv.constant 20 : i32
%55 = spv.IMul %54, %44 : i32
%56 = spv.IAdd %53, %55 : i32
%57 = spv.constant 4 : i32
%58 = spv.IMul %57, %47 : i32
%59 = spv.IAdd %56, %58 : i32
%60 = spv.constant 1 : i32
%61 = spv.IMul %60, %50 : i32
%62 = spv.IAdd %59, %61 : i32
%63 = spv.AccessChain %4[%52, %62] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%64 = spv.Load "StorageBuffer" %63 : f32
%65 = spv.constant 0 : i32
%66 = spv.constant 0 : i32
%67 = spv.constant 320 : i32
%68 = spv.IMul %67, %44 : i32
%69 = spv.IAdd %66, %68 : i32
%70 = spv.constant 64 : i32
%71 = spv.IMul %70, %47 : i32
%72 = spv.IAdd %69, %71 : i32
%73 = spv.constant 1 : i32
%74 = spv.IMul %73, %50 : i32
%75 = spv.IAdd %72, %74 : i32
%76 = spv.AccessChain %3[%65, %75] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %76, %64 : f32
%77 = spv.IAdd %50, %17 : i32
spv.Branch ^bb1(%77 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%49 = spv.IAdd %47, %30 : i32
spv.Branch ^bb1(%49 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%46 = spv.IAdd %44, %43 : i32
spv.Branch ^bb1(%46 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_2, @__builtin_var_NumWorkgroups__, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_2 "LocalSize", 32, 2, 2
}
// *** IR Dump After Canonicalizer ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_2() "None" {
%0 = spv.constant 1 : i32
%1 = spv.constant 5 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 20 : i32
%5 = spv.constant 4 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 320 : i32
%8 = spv.constant 64 : i32
%9 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[0 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[0 : i32] : vector<3xi32>
%20 = spv.IMul %16, %2 : i32
%21 = spv.IAdd %20, %19 : i32
%22 = spv.IMul %13, %2 : i32
%23 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[1 : i32] : vector<3xi32>
%26 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%27 = spv.Load "Input" %26 : vector<3xi32>
%28 = spv.CompositeExtract %27[1 : i32] : vector<3xi32>
%29 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%30 = spv.Load "Input" %29 : vector<3xi32>
%31 = spv.CompositeExtract %30[1 : i32] : vector<3xi32>
%32 = spv.IMul %28, %3 : i32
%33 = spv.IAdd %32, %31 : i32
%34 = spv.IMul %25, %3 : i32
%35 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%36 = spv.Load "Input" %35 : vector<3xi32>
%37 = spv.CompositeExtract %36[2 : i32] : vector<3xi32>
%38 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%39 = spv.Load "Input" %38 : vector<3xi32>
%40 = spv.CompositeExtract %39[2 : i32] : vector<3xi32>
%41 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%42 = spv.Load "Input" %41 : vector<3xi32>
%43 = spv.CompositeExtract %42[2 : i32] : vector<3xi32>
%44 = spv.IMul %40, %3 : i32
%45 = spv.IAdd %44, %43 : i32
%46 = spv.IMul %37, %3 : i32
spv.loop {
spv.Branch ^bb1(%45 : i32)
^bb1(%47: i32): // 2 preds: ^bb0, ^bb2
%48 = spv.SLessThan %47, %0 : i32
spv.BranchConditional %48, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%33 : i32)
^bb1(%50: i32): // 2 preds: ^bb0, ^bb2
%51 = spv.SLessThan %50, %1 : i32
spv.BranchConditional %51, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%21 : i32)
^bb1(%53: i32): // 2 preds: ^bb0, ^bb2
%54 = spv.SLessThan %53, %5 : i32
spv.BranchConditional %54, ^bb2, ^bb3
^bb2: // pred: ^bb1
%55 = spv.IMul %47, %4 : i32
%56 = spv.IMul %50, %5 : i32
%57 = spv.IAdd %55, %56 : i32
%58 = spv.IAdd %57, %53 : i32
%59 = spv.AccessChain %10[%6, %58] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%60 = spv.Load "StorageBuffer" %59 : f32
%61 = spv.IMul %47, %7 : i32
%62 = spv.IMul %50, %8 : i32
%63 = spv.IAdd %61, %62 : i32
%64 = spv.IAdd %63, %53 : i32
%65 = spv.AccessChain %9[%6, %64] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %65, %60 : f32
%66 = spv.IAdd %53, %22 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%52 = spv.IAdd %50, %34 : i32
spv.Branch ^bb1(%52 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%49 = spv.IAdd %47, %46 : i32
spv.Branch ^bb1(%49 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_2, @__builtin_var_NumWorkgroups__, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_2 "LocalSize", 32, 2, 2
}
// *** IR Dump After CSE ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_2() "None" {
%0 = spv.constant 1 : i32
%1 = spv.constant 5 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 20 : i32
%5 = spv.constant 4 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 320 : i32
%8 = spv.constant 64 : i32
%9 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[0 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[0 : i32] : vector<3xi32>
%20 = spv.IMul %16, %2 : i32
%21 = spv.IAdd %20, %19 : i32
%22 = spv.IMul %13, %2 : i32
%23 = spv.Load "Input" %11 : vector<3xi32>
%24 = spv.CompositeExtract %23[1 : i32] : vector<3xi32>
%25 = spv.Load "Input" %14 : vector<3xi32>
%26 = spv.CompositeExtract %25[1 : i32] : vector<3xi32>
%27 = spv.Load "Input" %17 : vector<3xi32>
%28 = spv.CompositeExtract %27[1 : i32] : vector<3xi32>
%29 = spv.IMul %26, %3 : i32
%30 = spv.IAdd %29, %28 : i32
%31 = spv.IMul %24, %3 : i32
%32 = spv.Load "Input" %11 : vector<3xi32>
%33 = spv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spv.Load "Input" %14 : vector<3xi32>
%35 = spv.CompositeExtract %34[2 : i32] : vector<3xi32>
%36 = spv.Load "Input" %17 : vector<3xi32>
%37 = spv.CompositeExtract %36[2 : i32] : vector<3xi32>
%38 = spv.IMul %35, %3 : i32
%39 = spv.IAdd %38, %37 : i32
%40 = spv.IMul %33, %3 : i32
spv.loop {
spv.Branch ^bb1(%39 : i32)
^bb1(%41: i32): // 2 preds: ^bb0, ^bb2
%42 = spv.SLessThan %41, %0 : i32
spv.BranchConditional %42, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %1 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%21 : i32)
^bb1(%47: i32): // 2 preds: ^bb0, ^bb2
%48 = spv.SLessThan %47, %5 : i32
spv.BranchConditional %48, ^bb2, ^bb3
^bb2: // pred: ^bb1
%49 = spv.IMul %41, %4 : i32
%50 = spv.IMul %44, %5 : i32
%51 = spv.IAdd %49, %50 : i32
%52 = spv.IAdd %51, %47 : i32
%53 = spv.AccessChain %10[%6, %52] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%54 = spv.Load "StorageBuffer" %53 : f32
%55 = spv.IMul %41, %7 : i32
%56 = spv.IMul %44, %8 : i32
%57 = spv.IAdd %55, %56 : i32
%58 = spv.IAdd %57, %47 : i32
%59 = spv.AccessChain %9[%6, %58] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %59, %54 : f32
%60 = spv.IAdd %47, %22 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%46 = spv.IAdd %44, %31 : i32
spv.Branch ^bb1(%46 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%43 = spv.IAdd %41, %40 : i32
spv.Branch ^bb1(%43 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_2, @__builtin_var_NumWorkgroups__, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_2 "LocalSize", 32, 2, 2
}
// *** IR Dump After SPIRVUpdateVCE ***
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_2() "None" {
%0 = spv.constant 1 : i32
%1 = spv.constant 5 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 20 : i32
%5 = spv.constant 4 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 320 : i32
%8 = spv.constant 64 : i32
%9 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[0 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[0 : i32] : vector<3xi32>
%20 = spv.IMul %16, %2 : i32
%21 = spv.IAdd %20, %19 : i32
%22 = spv.IMul %13, %2 : i32
%23 = spv.Load "Input" %11 : vector<3xi32>
%24 = spv.CompositeExtract %23[1 : i32] : vector<3xi32>
%25 = spv.Load "Input" %14 : vector<3xi32>
%26 = spv.CompositeExtract %25[1 : i32] : vector<3xi32>
%27 = spv.Load "Input" %17 : vector<3xi32>
%28 = spv.CompositeExtract %27[1 : i32] : vector<3xi32>
%29 = spv.IMul %26, %3 : i32
%30 = spv.IAdd %29, %28 : i32
%31 = spv.IMul %24, %3 : i32
%32 = spv.Load "Input" %11 : vector<3xi32>
%33 = spv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spv.Load "Input" %14 : vector<3xi32>
%35 = spv.CompositeExtract %34[2 : i32] : vector<3xi32>
%36 = spv.Load "Input" %17 : vector<3xi32>
%37 = spv.CompositeExtract %36[2 : i32] : vector<3xi32>
%38 = spv.IMul %35, %3 : i32
%39 = spv.IAdd %38, %37 : i32
%40 = spv.IMul %33, %3 : i32
spv.loop {
spv.Branch ^bb1(%39 : i32)
^bb1(%41: i32): // 2 preds: ^bb0, ^bb2
%42 = spv.SLessThan %41, %0 : i32
spv.BranchConditional %42, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %1 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%21 : i32)
^bb1(%47: i32): // 2 preds: ^bb0, ^bb2
%48 = spv.SLessThan %47, %5 : i32
spv.BranchConditional %48, ^bb2, ^bb3
^bb2: // pred: ^bb1
%49 = spv.IMul %41, %4 : i32
%50 = spv.IMul %44, %5 : i32
%51 = spv.IAdd %49, %50 : i32
%52 = spv.IAdd %51, %47 : i32
%53 = spv.AccessChain %10[%6, %52] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%54 = spv.Load "StorageBuffer" %53 : f32
%55 = spv.IMul %41, %7 : i32
%56 = spv.IMul %44, %8 : i32
%57 = spv.IAdd %55, %56 : i32
%58 = spv.IAdd %57, %47 : i32
%59 = spv.AccessChain %9[%6, %58] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %59, %54 : f32
%60 = spv.IAdd %47, %22 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%46 = spv.IAdd %44, %31 : i32
spv.Branch ^bb1(%46 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%43 = spv.IAdd %41, %40 : i32
spv.Branch ^bb1(%43 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_2, @__builtin_var_NumWorkgroups__, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_2 "LocalSize", 32, 2, 2
}
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass ***
hal.executable @main_ex_dispatch_2 attributes {sym_visibility = "private"} {
hal.interface @legacy_io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
hal.executable.entry_point @main_ex_dispatch_2 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (tensor<1x5x4xf32>) -> tensor<1x5x64xf32>}
hal.executable.target "vulkan*" {
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_2() "None" {
%0 = spv.constant 1 : i32
%1 = spv.constant 5 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 20 : i32
%5 = spv.constant 4 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 320 : i32
%8 = spv.constant 64 : i32
%9 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[0 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[0 : i32] : vector<3xi32>
%20 = spv.IMul %16, %2 : i32
%21 = spv.IAdd %20, %19 : i32
%22 = spv.IMul %13, %2 : i32
%23 = spv.Load "Input" %11 : vector<3xi32>
%24 = spv.CompositeExtract %23[1 : i32] : vector<3xi32>
%25 = spv.Load "Input" %14 : vector<3xi32>
%26 = spv.CompositeExtract %25[1 : i32] : vector<3xi32>
%27 = spv.Load "Input" %17 : vector<3xi32>
%28 = spv.CompositeExtract %27[1 : i32] : vector<3xi32>
%29 = spv.IMul %26, %3 : i32
%30 = spv.IAdd %29, %28 : i32
%31 = spv.IMul %24, %3 : i32
%32 = spv.Load "Input" %11 : vector<3xi32>
%33 = spv.CompositeExtract %32[2 : i32] : vector<3xi32>
%34 = spv.Load "Input" %14 : vector<3xi32>
%35 = spv.CompositeExtract %34[2 : i32] : vector<3xi32>
%36 = spv.Load "Input" %17 : vector<3xi32>
%37 = spv.CompositeExtract %36[2 : i32] : vector<3xi32>
%38 = spv.IMul %35, %3 : i32
%39 = spv.IAdd %38, %37 : i32
%40 = spv.IMul %33, %3 : i32
spv.loop {
spv.Branch ^bb1(%39 : i32)
^bb1(%41: i32): // 2 preds: ^bb0, ^bb2
%42 = spv.SLessThan %41, %0 : i32
spv.BranchConditional %42, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%44: i32): // 2 preds: ^bb0, ^bb2
%45 = spv.SLessThan %44, %1 : i32
spv.BranchConditional %45, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%21 : i32)
^bb1(%47: i32): // 2 preds: ^bb0, ^bb2
%48 = spv.SLessThan %47, %5 : i32
spv.BranchConditional %48, ^bb2, ^bb3
^bb2: // pred: ^bb1
%49 = spv.IMul %41, %4 : i32
%50 = spv.IMul %44, %5 : i32
%51 = spv.IAdd %49, %50 : i32
%52 = spv.IAdd %51, %47 : i32
%53 = spv.AccessChain %10[%6, %52] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4> [0]>, StorageBuffer>
%54 = spv.Load "StorageBuffer" %53 : f32
%55 = spv.IMul %41, %7 : i32
%56 = spv.IMul %44, %8 : i32
%57 = spv.IAdd %55, %56 : i32
%58 = spv.IAdd %57, %47 : i32
%59 = spv.AccessChain %9[%6, %58] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %59, %54 : f32
%60 = spv.IAdd %47, %22 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%46 = spv.IAdd %44, %31 : i32
spv.Branch ^bb1(%46 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%43 = spv.IAdd %41, %40 : i32
spv.Branch ^bb1(%43 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_2, @__builtin_var_NumWorkgroups__, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_2 "LocalSize", 32, 2, 2
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
}
}
// *** IR Dump After Inliner ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x64xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::TieDynamicShapesPass ***
func @main_ex_dispatch_3() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x64xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::MaterializeShapeCalculationsPass ***
func @main_ex_dispatch_3() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x64xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::HoistShapeCalculations ***
func @main_ex_dispatch_3() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x64xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::DecomposeHLOClampPass ***
func @main_ex_dispatch_3() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x64xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x64xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnTensorsPass ***
func @main_ex_dispatch_3() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x64xf32>
%1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<1x5x64xf32> -> tensor<5x1x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x64xf32>
return
}
// *** IR Dump After LinalgFusionOfTensorOps ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5x64xf32>
%1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<1x5x64xf32> -> tensor<5x1x64xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnBuffersPass ***
func @main_ex_dispatch_3() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%c0 = constant 0 : index
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %1, %0 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
linalg.yield %arg0 : f32
}: memref<1x5x64xf32>, memref<5x1x64xf32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %1, %0 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
linalg.yield %arg0 : f32
}: memref<1x5x64xf32>, memref<5x1x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %1, %0 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
linalg.yield %arg0 : f32
}: memref<1x5x64xf32>, memref<5x1x64xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c64 = constant 64 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c64) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%4 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c64, %arg2)
%5 = subview %1[%arg1, %arg0, %arg2] [%2, %3, %4] [%c1, %c1, %c1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
%6 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%7 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%8 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c64, %arg2)
%9 = subview %0[%arg0, %arg1, %arg2] [%6, %7, %8] [%c1, %c1, %c1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %5, %9 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
scf.yield
}
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::SplitDispatchFunctionPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c64 = constant 64 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c64) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%4 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c64, %arg2)
%5 = subview %1[%arg1, %arg0, %arg2] [%2, %3, %4] [%c1, %c1, %c1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
%6 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%7 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%8 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c64, %arg2)
%9 = subview %0[%arg0, %arg1, %arg2] [%6, %7, %8] [%c1, %c1, %c1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %5, %9 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c64 = constant 64 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c64) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%4 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c64, %arg2)
%5 = subview %1[%arg1, %arg0, %arg2] [%2, %3, %4] [%c1, %c1, %c1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
%6 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%7 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%8 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c64, %arg2)
%9 = subview %0[%arg0, %arg1, %arg2] [%6, %7, %8] [%c1, %c1, %c1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %5, %9 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
scf.yield
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c64 = constant 64 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c64) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%3 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%4 = affine.min affine_map<(d0) -> (32, -d0 + 64)>(%arg2)
%5 = subview %1[%arg1, %arg0, %arg2] [%2, %3, %4] [1, 1, 1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%6 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%7 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%8 = affine.min affine_map<(d0) -> (32, -d0 + 64)>(%arg2)
%9 = subview %0[%arg0, %arg1, %arg2] [%6, %7, %8] [1, 1, 1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %5, %9 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToGPUPass ***
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c64 = constant 64 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %c2, %6 : index
%9 = addi %c0, %8 : index
%10 = muli %c2, %7 : index
%11 = muli %c2, %4 : index
%12 = addi %c0, %11 : index
%13 = muli %c2, %5 : index
%14 = muli %c32, %2 : index
%15 = addi %c0, %14 : index
%16 = muli %c32, %3 : index
scf.for %arg0 = %9 to %c5 step %10 {
scf.for %arg1 = %12 to %c1 step %13 {
scf.for %arg2 = %15 to %c64 step %16 {
%17 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%18 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%19 = affine.min affine_map<(d0) -> (32, -d0 + 64)>(%arg2)
%20 = subview %1[%arg1, %arg0, %arg2] [%17, %18, %19] [1, 1, 1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%21 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%22 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%23 = affine.min affine_map<(d0) -> (32, -d0 + 64)>(%arg2)
%24 = subview %0[%arg0, %arg1, %arg2] [%21, %22, %23] [1, 1, 1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%25 = dim %20, 0 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%26 = dim %20, 1 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%27 = dim %20, 2 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%28 = dim %24, 0 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%29 = dim %24, 1 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%30 = dim %24, 2 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%31 = affine.apply affine_map<(d0) -> (d0)>(%26)
%32 = affine.apply affine_map<(d0) -> (d0)>(%25)
%33 = affine.apply affine_map<(d0) -> (d0)>(%27)
%c0_0 = constant 0 : index
%c1_1 = constant 1 : index
%c0_2 = constant 0 : index
%c1_3 = constant 1 : index
%c0_4 = constant 0 : index
%c1_5 = constant 1 : index
%34 = "gpu.thread_id"() {dimension = "x"} : () -> index
%35 = "gpu.block_dim"() {dimension = "x"} : () -> index
%36 = "gpu.thread_id"() {dimension = "y"} : () -> index
%37 = "gpu.block_dim"() {dimension = "y"} : () -> index
%38 = "gpu.thread_id"() {dimension = "z"} : () -> index
%39 = "gpu.block_dim"() {dimension = "z"} : () -> index
%40 = muli %c1_1, %38 : index
%41 = addi %c0_0, %40 : index
%42 = muli %c1_1, %39 : index
%43 = muli %c1_3, %36 : index
%44 = addi %c0_2, %43 : index
%45 = muli %c1_3, %37 : index
%46 = muli %c1_5, %34 : index
%47 = addi %c0_4, %46 : index
%48 = muli %c1_5, %35 : index
scf.for %arg3 = %41 to %31 step %42 {
scf.for %arg4 = %44 to %32 step %45 {
scf.for %arg5 = %47 to %33 step %48 {
%49 = affine.apply affine_map<(d0) -> (d0)>(%arg4)
%50 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%51 = affine.apply affine_map<(d0) -> (d0)>(%arg5)
%52 = load %20[%49, %50, %51] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%53 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%54 = affine.apply affine_map<(d0) -> (d0)>(%arg4)
%55 = affine.apply affine_map<(d0) -> (d0)>(%arg5)
%56 = load %24[%53, %54, %55] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%57 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%58 = affine.apply affine_map<(d0) -> (d0)>(%arg4)
%59 = affine.apply affine_map<(d0) -> (d0)>(%arg5)
store %52, %24[%57, %58, %59] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
}
}
}
}
}
}
return
}
// *** IR Dump After ConvertAffineToStandard ***
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c64 = constant 64 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %c2, %6 : index
%9 = addi %c0, %8 : index
%10 = muli %c2, %7 : index
%11 = muli %c2, %4 : index
%12 = addi %c0, %11 : index
%13 = muli %c2, %5 : index
%14 = muli %c32, %2 : index
%15 = addi %c0, %14 : index
%16 = muli %c32, %3 : index
scf.for %arg0 = %9 to %c5 step %10 {
scf.for %arg1 = %12 to %c1 step %13 {
scf.for %arg2 = %15 to %c64 step %16 {
%c2_0 = constant 2 : index
%c-1 = constant -1 : index
%17 = muli %arg1, %c-1 : index
%c1_1 = constant 1 : index
%18 = addi %17, %c1_1 : index
%19 = cmpi "slt", %c2_0, %18 : index
%20 = select %19, %c2_0, %18 : index
%c2_2 = constant 2 : index
%c-1_3 = constant -1 : index
%21 = muli %arg0, %c-1_3 : index
%c5_4 = constant 5 : index
%22 = addi %21, %c5_4 : index
%23 = cmpi "slt", %c2_2, %22 : index
%24 = select %23, %c2_2, %22 : index
%c32_5 = constant 32 : index
%c-1_6 = constant -1 : index
%25 = muli %arg2, %c-1_6 : index
%c64_7 = constant 64 : index
%26 = addi %25, %c64_7 : index
%27 = cmpi "slt", %c32_5, %26 : index
%28 = select %27, %c32_5, %26 : index
%29 = subview %1[%arg1, %arg0, %arg2] [%20, %24, %28] [1, 1, 1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%c2_8 = constant 2 : index
%c-1_9 = constant -1 : index
%30 = muli %arg0, %c-1_9 : index
%c5_10 = constant 5 : index
%31 = addi %30, %c5_10 : index
%32 = cmpi "slt", %c2_8, %31 : index
%33 = select %32, %c2_8, %31 : index
%c2_11 = constant 2 : index
%c-1_12 = constant -1 : index
%34 = muli %arg1, %c-1_12 : index
%c1_13 = constant 1 : index
%35 = addi %34, %c1_13 : index
%36 = cmpi "slt", %c2_11, %35 : index
%37 = select %36, %c2_11, %35 : index
%c32_14 = constant 32 : index
%c-1_15 = constant -1 : index
%38 = muli %arg2, %c-1_15 : index
%c64_16 = constant 64 : index
%39 = addi %38, %c64_16 : index
%40 = cmpi "slt", %c32_14, %39 : index
%41 = select %40, %c32_14, %39 : index
%42 = subview %0[%arg0, %arg1, %arg2] [%33, %37, %41] [1, 1, 1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%43 = dim %29, 0 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%44 = dim %29, 1 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%45 = dim %29, 2 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%46 = dim %42, 0 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%47 = dim %42, 1 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%48 = dim %42, 2 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%c0_17 = constant 0 : index
%c1_18 = constant 1 : index
%c0_19 = constant 0 : index
%c1_20 = constant 1 : index
%c0_21 = constant 0 : index
%c1_22 = constant 1 : index
%49 = "gpu.thread_id"() {dimension = "x"} : () -> index
%50 = "gpu.block_dim"() {dimension = "x"} : () -> index
%51 = "gpu.thread_id"() {dimension = "y"} : () -> index
%52 = "gpu.block_dim"() {dimension = "y"} : () -> index
%53 = "gpu.thread_id"() {dimension = "z"} : () -> index
%54 = "gpu.block_dim"() {dimension = "z"} : () -> index
%55 = muli %c1_18, %53 : index
%56 = addi %c0_17, %55 : index
%57 = muli %c1_18, %54 : index
%58 = muli %c1_20, %51 : index
%59 = addi %c0_19, %58 : index
%60 = muli %c1_20, %52 : index
%61 = muli %c1_22, %49 : index
%62 = addi %c0_21, %61 : index
%63 = muli %c1_22, %50 : index
scf.for %arg3 = %56 to %44 step %57 {
scf.for %arg4 = %59 to %43 step %60 {
scf.for %arg5 = %62 to %45 step %63 {
%64 = load %29[%arg4, %arg3, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%65 = load %42[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
store %64, %42[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
}
}
}
}
}
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c64 = constant 64 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c64 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c64 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = subview %1[%arg1, %arg0, %arg2] [%17, %21, %25] [1, 1, 1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%27 = muli %arg0, %c-1 : index
%28 = addi %27, %c5 : index
%29 = cmpi "slt", %c2, %28 : index
%30 = select %29, %c2, %28 : index
%31 = muli %arg1, %c-1 : index
%32 = addi %31, %c1 : index
%33 = cmpi "slt", %c2, %32 : index
%34 = select %33, %c2, %32 : index
%35 = muli %arg2, %c-1 : index
%36 = addi %35, %c64 : index
%37 = cmpi "slt", %c32, %36 : index
%38 = select %37, %c32, %36 : index
%39 = subview %0[%arg0, %arg1, %arg2] [%30, %34, %38] [1, 1, 1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%40 = "gpu.thread_id"() {dimension = "x"} : () -> index
%41 = "gpu.block_dim"() {dimension = "x"} : () -> index
%42 = "gpu.thread_id"() {dimension = "y"} : () -> index
%43 = "gpu.block_dim"() {dimension = "y"} : () -> index
%44 = "gpu.thread_id"() {dimension = "z"} : () -> index
%45 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %44 to %21 step %45 {
scf.for %arg4 = %42 to %17 step %43 {
scf.for %arg5 = %40 to %25 step %41 {
%46 = load %26[%arg4, %arg3, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
store %46, %39[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c64 = constant 64 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c64 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c64 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = subview %1[%arg1, %arg0, %arg2] [%17, %21, %25] [1, 1, 1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%27 = subview %0[%arg0, %arg1, %arg2] [%21, %17, %25] [1, 1, 1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%28 = "gpu.thread_id"() {dimension = "x"} : () -> index
%29 = "gpu.block_dim"() {dimension = "x"} : () -> index
%30 = "gpu.thread_id"() {dimension = "y"} : () -> index
%31 = "gpu.block_dim"() {dimension = "y"} : () -> index
%32 = "gpu.thread_id"() {dimension = "z"} : () -> index
%33 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %32 to %21 step %33 {
scf.for %arg4 = %30 to %17 step %31 {
scf.for %arg5 = %28 to %25 step %29 {
%34 = load %26[%arg4, %arg3, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
store %34, %27[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ResolveShapeOpsPass ***
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c64 = constant 64 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c64 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c64 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = subview %1[%arg1, %arg0, %arg2] [%17, %21, %25] [1, 1, 1] : memref<1x5x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
%27 = subview %0[%arg0, %arg1, %arg2] [%21, %17, %25] [1, 1, 1] : memref<5x1x64xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
%28 = "gpu.thread_id"() {dimension = "x"} : () -> index
%29 = "gpu.block_dim"() {dimension = "x"} : () -> index
%30 = "gpu.thread_id"() {dimension = "y"} : () -> index
%31 = "gpu.block_dim"() {dimension = "y"} : () -> index
%32 = "gpu.thread_id"() {dimension = "z"} : () -> index
%33 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %32 to %21 step %33 {
scf.for %arg4 = %30 to %17 step %31 {
scf.for %arg5 = %28 to %25 step %29 {
%34 = load %26[%arg4, %arg3, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 320 + s0 + d1 * 64 + d2)>>
store %34, %27[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2)>>
}
}
}
}
}
}
return
}
// *** IR Dump After LegalizeStandardForSPIRV ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c64 = constant 64 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c64 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c64 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = "gpu.thread_id"() {dimension = "x"} : () -> index
%27 = "gpu.block_dim"() {dimension = "x"} : () -> index
%28 = "gpu.thread_id"() {dimension = "y"} : () -> index
%29 = "gpu.block_dim"() {dimension = "y"} : () -> index
%30 = "gpu.thread_id"() {dimension = "z"} : () -> index
%31 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %30 to %21 step %31 {
scf.for %arg4 = %28 to %17 step %29 {
scf.for %arg5 = %26 to %25 step %27 {
%32 = addi %arg1, %arg4 : index
%33 = addi %arg0, %arg3 : index
%34 = addi %arg2, %arg5 : index
%35 = load %1[%32, %33, %34] : memref<1x5x64xf32>
%36 = addi %arg0, %arg3 : index
%37 = addi %arg1, %arg4 : index
%38 = addi %arg2, %arg5 : index
store %35, %0[%36, %37, %38] : memref<5x1x64xf32>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c64 = constant 64 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c64 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c64 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = "gpu.thread_id"() {dimension = "x"} : () -> index
%27 = "gpu.block_dim"() {dimension = "x"} : () -> index
%28 = "gpu.thread_id"() {dimension = "y"} : () -> index
%29 = "gpu.block_dim"() {dimension = "y"} : () -> index
%30 = "gpu.thread_id"() {dimension = "z"} : () -> index
%31 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %30 to %21 step %31 {
scf.for %arg4 = %28 to %17 step %29 {
scf.for %arg5 = %26 to %25 step %27 {
%32 = addi %arg1, %arg4 : index
%33 = addi %arg0, %arg3 : index
%34 = addi %arg2, %arg5 : index
%35 = load %1[%32, %33, %34] : memref<1x5x64xf32>
%36 = addi %arg0, %arg3 : index
%37 = addi %arg1, %arg4 : index
%38 = addi %arg2, %arg5 : index
store %35, %0[%36, %37, %38] : memref<5x1x64xf32>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_3() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c1 = constant 1 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c64 = constant 64 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x64xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5x64xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c64 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c64 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = "gpu.thread_id"() {dimension = "x"} : () -> index
%27 = "gpu.block_dim"() {dimension = "x"} : () -> index
%28 = "gpu.thread_id"() {dimension = "y"} : () -> index
%29 = "gpu.block_dim"() {dimension = "y"} : () -> index
%30 = "gpu.thread_id"() {dimension = "z"} : () -> index
%31 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %30 to %21 step %31 {
scf.for %arg4 = %28 to %17 step %29 {
scf.for %arg5 = %26 to %25 step %27 {
%32 = addi %arg1, %arg4 : index
%33 = addi %arg0, %arg3 : index
%34 = addi %arg2, %arg5 : index
%35 = load %1[%32, %33, %34] : memref<1x5x64xf32>
store %35, %0[%33, %32, %34] : memref<5x1x64xf32>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToSPIRVPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_3() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = spv.constant 5 : i32
%1 = spv.constant 2 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant -1 : i32
%5 = spv.constant 64 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[1 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%21 = spv.Load "Input" %20 : vector<3xi32>
%22 = spv.CompositeExtract %21[2 : i32] : vector<3xi32>
%23 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[2 : i32] : vector<3xi32>
%26 = spv.IMul %22, %1 : i32
%27 = spv.IMul %25, %1 : i32
%28 = spv.IMul %16, %1 : i32
%29 = spv.IMul %19, %1 : i32
%30 = spv.IMul %10, %3 : i32
%31 = spv.IMul %13, %3 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spv.SLessThan %32, %0 : i32
spv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%35: i32): // 2 preds: ^bb0, ^bb2
%36 = spv.SLessThan %35, %2 : i32
spv.BranchConditional %36, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%38: i32): // 2 preds: ^bb0, ^bb2
%39 = spv.SLessThan %38, %5 : i32
spv.BranchConditional %39, ^bb2, ^bb3
^bb2: // pred: ^bb1
%40 = spv.IMul %35, %4 : i32
%41 = spv.IAdd %40, %2 : i32
%42 = spv.SLessThan %1, %41 : i32
%43 = spv.Select %42, %1, %41 : i1, i32
%44 = spv.IMul %32, %4 : i32
%45 = spv.IAdd %44, %0 : i32
%46 = spv.SLessThan %1, %45 : i32
%47 = spv.Select %46, %1, %45 : i1, i32
%48 = spv.IMul %38, %4 : i32
%49 = spv.IAdd %48, %5 : i32
%50 = spv.SLessThan %3, %49 : i32
%51 = spv.Select %50, %3, %49 : i1, i32
%52 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%53 = spv.Load "Input" %52 : vector<3xi32>
%54 = spv.CompositeExtract %53[0 : i32] : vector<3xi32>
%55 = spv.constant 32 : i32
%56 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%57 = spv.Load "Input" %56 : vector<3xi32>
%58 = spv.CompositeExtract %57[1 : i32] : vector<3xi32>
%59 = spv.constant 2 : i32
%60 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%61 = spv.Load "Input" %60 : vector<3xi32>
%62 = spv.CompositeExtract %61[2 : i32] : vector<3xi32>
%63 = spv.constant 2 : i32
spv.loop {
spv.Branch ^bb1(%62 : i32)
^bb1(%65: i32): // 2 preds: ^bb0, ^bb2
%66 = spv.SLessThan %65, %47 : i32
spv.BranchConditional %66, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%58 : i32)
^bb1(%68: i32): // 2 preds: ^bb0, ^bb2
%69 = spv.SLessThan %68, %43 : i32
spv.BranchConditional %69, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%71: i32): // 2 preds: ^bb0, ^bb2
%72 = spv.SLessThan %71, %51 : i32
spv.BranchConditional %72, ^bb2, ^bb3
^bb2: // pred: ^bb1
%73 = spv.IAdd %35, %68 : i32
%74 = spv.IAdd %32, %65 : i32
%75 = spv.IAdd %38, %71 : i32
%76 = spv.constant 0 : i32
%77 = spv.constant 0 : i32
%78 = spv.constant 320 : i32
%79 = spv.IMul %78, %73 : i32
%80 = spv.IAdd %77, %79 : i32
%81 = spv.constant 64 : i32
%82 = spv.IMul %81, %74 : i32
%83 = spv.IAdd %80, %82 : i32
%84 = spv.constant 1 : i32
%85 = spv.IMul %84, %75 : i32
%86 = spv.IAdd %83, %85 : i32
%87 = spv.AccessChain %7[%76, %86] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%88 = spv.Load "StorageBuffer" %87 : f32
%89 = spv.constant 0 : i32
%90 = spv.constant 0 : i32
%91 = spv.constant 64 : i32
%92 = spv.IMul %91, %74 : i32
%93 = spv.IAdd %90, %92 : i32
%94 = spv.constant 64 : i32
%95 = spv.IMul %94, %73 : i32
%96 = spv.IAdd %93, %95 : i32
%97 = spv.constant 1 : i32
%98 = spv.IMul %97, %75 : i32
%99 = spv.IAdd %96, %98 : i32
%100 = spv.AccessChain %6[%89, %99] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %100, %88 : f32
%101 = spv.IAdd %71, %55 : i32
spv.Branch ^bb1(%101 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%70 = spv.IAdd %68, %59 : i32
spv.Branch ^bb1(%70 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%67 = spv.IAdd %65, %63 : i32
spv.Branch ^bb1(%67 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%64 = spv.IAdd %38, %31 : i32
spv.Branch ^bb1(%64 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%37 = spv.IAdd %35, %29 : i32
spv.Branch ^bb1(%37 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%34 = spv.IAdd %32, %27 : i32
spv.Branch ^bb1(%34 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After SPIRVLowerABIAttributes ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_3() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant 2 : i32
%2 = spv.constant 1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant -1 : i32
%5 = spv.constant 64 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[1 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%21 = spv.Load "Input" %20 : vector<3xi32>
%22 = spv.CompositeExtract %21[2 : i32] : vector<3xi32>
%23 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[2 : i32] : vector<3xi32>
%26 = spv.IMul %22, %1 : i32
%27 = spv.IMul %25, %1 : i32
%28 = spv.IMul %16, %1 : i32
%29 = spv.IMul %19, %1 : i32
%30 = spv.IMul %10, %3 : i32
%31 = spv.IMul %13, %3 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spv.SLessThan %32, %0 : i32
spv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%35: i32): // 2 preds: ^bb0, ^bb2
%36 = spv.SLessThan %35, %2 : i32
spv.BranchConditional %36, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%38: i32): // 2 preds: ^bb0, ^bb2
%39 = spv.SLessThan %38, %5 : i32
spv.BranchConditional %39, ^bb2, ^bb3
^bb2: // pred: ^bb1
%40 = spv.IMul %35, %4 : i32
%41 = spv.IAdd %40, %2 : i32
%42 = spv.SLessThan %1, %41 : i32
%43 = spv.Select %42, %1, %41 : i1, i32
%44 = spv.IMul %32, %4 : i32
%45 = spv.IAdd %44, %0 : i32
%46 = spv.SLessThan %1, %45 : i32
%47 = spv.Select %46, %1, %45 : i1, i32
%48 = spv.IMul %38, %4 : i32
%49 = spv.IAdd %48, %5 : i32
%50 = spv.SLessThan %3, %49 : i32
%51 = spv.Select %50, %3, %49 : i1, i32
%52 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%53 = spv.Load "Input" %52 : vector<3xi32>
%54 = spv.CompositeExtract %53[0 : i32] : vector<3xi32>
%55 = spv.constant 32 : i32
%56 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%57 = spv.Load "Input" %56 : vector<3xi32>
%58 = spv.CompositeExtract %57[1 : i32] : vector<3xi32>
%59 = spv.constant 2 : i32
%60 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%61 = spv.Load "Input" %60 : vector<3xi32>
%62 = spv.CompositeExtract %61[2 : i32] : vector<3xi32>
%63 = spv.constant 2 : i32
spv.loop {
spv.Branch ^bb1(%62 : i32)
^bb1(%65: i32): // 2 preds: ^bb0, ^bb2
%66 = spv.SLessThan %65, %47 : i32
spv.BranchConditional %66, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%58 : i32)
^bb1(%68: i32): // 2 preds: ^bb0, ^bb2
%69 = spv.SLessThan %68, %43 : i32
spv.BranchConditional %69, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%71: i32): // 2 preds: ^bb0, ^bb2
%72 = spv.SLessThan %71, %51 : i32
spv.BranchConditional %72, ^bb2, ^bb3
^bb2: // pred: ^bb1
%73 = spv.IAdd %35, %68 : i32
%74 = spv.IAdd %32, %65 : i32
%75 = spv.IAdd %38, %71 : i32
%76 = spv.constant 0 : i32
%77 = spv.constant 0 : i32
%78 = spv.constant 320 : i32
%79 = spv.IMul %78, %73 : i32
%80 = spv.IAdd %77, %79 : i32
%81 = spv.constant 64 : i32
%82 = spv.IMul %81, %74 : i32
%83 = spv.IAdd %80, %82 : i32
%84 = spv.constant 1 : i32
%85 = spv.IMul %84, %75 : i32
%86 = spv.IAdd %83, %85 : i32
%87 = spv.AccessChain %7[%76, %86] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%88 = spv.Load "StorageBuffer" %87 : f32
%89 = spv.constant 0 : i32
%90 = spv.constant 0 : i32
%91 = spv.constant 64 : i32
%92 = spv.IMul %91, %74 : i32
%93 = spv.IAdd %90, %92 : i32
%94 = spv.constant 64 : i32
%95 = spv.IMul %94, %73 : i32
%96 = spv.IAdd %93, %95 : i32
%97 = spv.constant 1 : i32
%98 = spv.IMul %97, %75 : i32
%99 = spv.IAdd %96, %98 : i32
%100 = spv.AccessChain %6[%89, %99] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %100, %88 : f32
%101 = spv.IAdd %71, %55 : i32
spv.Branch ^bb1(%101 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%70 = spv.IAdd %68, %59 : i32
spv.Branch ^bb1(%70 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%67 = spv.IAdd %65, %63 : i32
spv.Branch ^bb1(%67 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%64 = spv.IAdd %38, %31 : i32
spv.Branch ^bb1(%64 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%37 = spv.IAdd %35, %29 : i32
spv.Branch ^bb1(%37 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%34 = spv.IAdd %32, %27 : i32
spv.Branch ^bb1(%34 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_3, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_3 "LocalSize", 32, 2, 2
}
// *** IR Dump After Canonicalizer ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_3() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 2 : i32
%5 = spv.constant 320 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 64 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%17 = spv.Load "Input" %16 : vector<3xi32>
%18 = spv.CompositeExtract %17[1 : i32] : vector<3xi32>
%19 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%20 = spv.Load "Input" %19 : vector<3xi32>
%21 = spv.CompositeExtract %20[1 : i32] : vector<3xi32>
%22 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%23 = spv.Load "Input" %22 : vector<3xi32>
%24 = spv.CompositeExtract %23[2 : i32] : vector<3xi32>
%25 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%26 = spv.Load "Input" %25 : vector<3xi32>
%27 = spv.CompositeExtract %26[2 : i32] : vector<3xi32>
%28 = spv.IMul %24, %4 : i32
%29 = spv.IMul %27, %4 : i32
%30 = spv.IMul %18, %4 : i32
%31 = spv.IMul %21, %4 : i32
%32 = spv.IMul %12, %3 : i32
%33 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%34: i32): // 2 preds: ^bb0, ^bb2
%35 = spv.SLessThan %34, %0 : i32
spv.BranchConditional %35, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%37: i32): // 2 preds: ^bb0, ^bb2
%38 = spv.SLessThan %37, %1 : i32
spv.BranchConditional %38, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%32 : i32)
^bb1(%40: i32): // 2 preds: ^bb0, ^bb2
%41 = spv.SLessThan %40, %7 : i32
spv.BranchConditional %41, ^bb2, ^bb3
^bb2: // pred: ^bb1
%42 = spv.IMul %37, %2 : i32
%43 = spv.IAdd %42, %1 : i32
%44 = spv.SLessThan %4, %43 : i32
%45 = spv.Select %44, %4, %43 : i1, i32
%46 = spv.IMul %34, %2 : i32
%47 = spv.IAdd %46, %0 : i32
%48 = spv.SLessThan %4, %47 : i32
%49 = spv.Select %48, %4, %47 : i1, i32
%50 = spv.IMul %40, %2 : i32
%51 = spv.IAdd %50, %7 : i32
%52 = spv.SLessThan %3, %51 : i32
%53 = spv.Select %52, %3, %51 : i1, i32
%54 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%55 = spv.Load "Input" %54 : vector<3xi32>
%56 = spv.CompositeExtract %55[0 : i32] : vector<3xi32>
%57 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%58 = spv.Load "Input" %57 : vector<3xi32>
%59 = spv.CompositeExtract %58[1 : i32] : vector<3xi32>
%60 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%61 = spv.Load "Input" %60 : vector<3xi32>
%62 = spv.CompositeExtract %61[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%62 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %49 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%59 : i32)
^bb1(%67: i32): // 2 preds: ^bb0, ^bb2
%68 = spv.SLessThan %67, %45 : i32
spv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%70: i32): // 2 preds: ^bb0, ^bb2
%71 = spv.SLessThan %70, %53 : i32
spv.BranchConditional %71, ^bb2, ^bb3
^bb2: // pred: ^bb1
%72 = spv.IAdd %37, %67 : i32
%73 = spv.IAdd %34, %64 : i32
%74 = spv.IAdd %40, %70 : i32
%75 = spv.IMul %72, %5 : i32
%76 = spv.IMul %73, %7 : i32
%77 = spv.IAdd %75, %76 : i32
%78 = spv.IAdd %77, %74 : i32
%79 = spv.AccessChain %9[%6, %78] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%80 = spv.Load "StorageBuffer" %79 : f32
%81 = spv.IMul %73, %7 : i32
%82 = spv.IMul %72, %7 : i32
%83 = spv.IAdd %81, %82 : i32
%84 = spv.IAdd %83, %74 : i32
%85 = spv.AccessChain %8[%6, %84] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %85, %80 : f32
%86 = spv.IAdd %70, %3 : i32
spv.Branch ^bb1(%86 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%69 = spv.IAdd %67, %4 : i32
spv.Branch ^bb1(%69 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%66 = spv.IAdd %64, %4 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %40, %33 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%39 = spv.IAdd %37, %31 : i32
spv.Branch ^bb1(%39 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%36 = spv.IAdd %34, %29 : i32
spv.Branch ^bb1(%36 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_3, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_3 "LocalSize", 32, 2, 2
}
// *** IR Dump After CSE ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_3() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 2 : i32
%5 = spv.constant 320 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 64 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.Load "Input" %10 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %13 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv.Load "Input" %10 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.Load "Input" %13 : vector<3xi32>
%23 = spv.CompositeExtract %22[2 : i32] : vector<3xi32>
%24 = spv.IMul %21, %4 : i32
%25 = spv.IMul %23, %4 : i32
%26 = spv.IMul %17, %4 : i32
%27 = spv.IMul %19, %4 : i32
%28 = spv.IMul %12, %3 : i32
%29 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %0 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %1 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%36: i32): // 2 preds: ^bb0, ^bb2
%37 = spv.SLessThan %36, %7 : i32
spv.BranchConditional %37, ^bb2, ^bb3
^bb2: // pred: ^bb1
%38 = spv.IMul %33, %2 : i32
%39 = spv.IAdd %38, %1 : i32
%40 = spv.SLessThan %4, %39 : i32
%41 = spv.Select %40, %4, %39 : i1, i32
%42 = spv.IMul %30, %2 : i32
%43 = spv.IAdd %42, %0 : i32
%44 = spv.SLessThan %4, %43 : i32
%45 = spv.Select %44, %4, %43 : i1, i32
%46 = spv.IMul %36, %2 : i32
%47 = spv.IAdd %46, %7 : i32
%48 = spv.SLessThan %3, %47 : i32
%49 = spv.Select %48, %3, %47 : i1, i32
%50 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%51 = spv.Load "Input" %50 : vector<3xi32>
%52 = spv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spv.Load "Input" %50 : vector<3xi32>
%54 = spv.CompositeExtract %53[1 : i32] : vector<3xi32>
%55 = spv.Load "Input" %50 : vector<3xi32>
%56 = spv.CompositeExtract %55[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%58: i32): // 2 preds: ^bb0, ^bb2
%59 = spv.SLessThan %58, %45 : i32
spv.BranchConditional %59, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%61: i32): // 2 preds: ^bb0, ^bb2
%62 = spv.SLessThan %61, %41 : i32
spv.BranchConditional %62, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %49 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
%66 = spv.IAdd %33, %61 : i32
%67 = spv.IAdd %30, %58 : i32
%68 = spv.IAdd %36, %64 : i32
%69 = spv.IMul %66, %5 : i32
%70 = spv.IMul %67, %7 : i32
%71 = spv.IAdd %69, %70 : i32
%72 = spv.IAdd %71, %68 : i32
%73 = spv.AccessChain %9[%6, %72] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%74 = spv.Load "StorageBuffer" %73 : f32
%75 = spv.IMul %66, %7 : i32
%76 = spv.IAdd %70, %75 : i32
%77 = spv.IAdd %76, %68 : i32
%78 = spv.AccessChain %8[%6, %77] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %78, %74 : f32
%79 = spv.IAdd %64, %3 : i32
spv.Branch ^bb1(%79 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %61, %4 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%60 = spv.IAdd %58, %4 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%57 = spv.IAdd %36, %29 : i32
spv.Branch ^bb1(%57 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%35 = spv.IAdd %33, %27 : i32
spv.Branch ^bb1(%35 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %25 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_3, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_3 "LocalSize", 32, 2, 2
}
// *** IR Dump After SPIRVUpdateVCE ***
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_3() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 2 : i32
%5 = spv.constant 320 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 64 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.Load "Input" %10 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %13 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv.Load "Input" %10 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.Load "Input" %13 : vector<3xi32>
%23 = spv.CompositeExtract %22[2 : i32] : vector<3xi32>
%24 = spv.IMul %21, %4 : i32
%25 = spv.IMul %23, %4 : i32
%26 = spv.IMul %17, %4 : i32
%27 = spv.IMul %19, %4 : i32
%28 = spv.IMul %12, %3 : i32
%29 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %0 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %1 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%36: i32): // 2 preds: ^bb0, ^bb2
%37 = spv.SLessThan %36, %7 : i32
spv.BranchConditional %37, ^bb2, ^bb3
^bb2: // pred: ^bb1
%38 = spv.IMul %33, %2 : i32
%39 = spv.IAdd %38, %1 : i32
%40 = spv.SLessThan %4, %39 : i32
%41 = spv.Select %40, %4, %39 : i1, i32
%42 = spv.IMul %30, %2 : i32
%43 = spv.IAdd %42, %0 : i32
%44 = spv.SLessThan %4, %43 : i32
%45 = spv.Select %44, %4, %43 : i1, i32
%46 = spv.IMul %36, %2 : i32
%47 = spv.IAdd %46, %7 : i32
%48 = spv.SLessThan %3, %47 : i32
%49 = spv.Select %48, %3, %47 : i1, i32
%50 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%51 = spv.Load "Input" %50 : vector<3xi32>
%52 = spv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spv.Load "Input" %50 : vector<3xi32>
%54 = spv.CompositeExtract %53[1 : i32] : vector<3xi32>
%55 = spv.Load "Input" %50 : vector<3xi32>
%56 = spv.CompositeExtract %55[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%58: i32): // 2 preds: ^bb0, ^bb2
%59 = spv.SLessThan %58, %45 : i32
spv.BranchConditional %59, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%61: i32): // 2 preds: ^bb0, ^bb2
%62 = spv.SLessThan %61, %41 : i32
spv.BranchConditional %62, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %49 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
%66 = spv.IAdd %33, %61 : i32
%67 = spv.IAdd %30, %58 : i32
%68 = spv.IAdd %36, %64 : i32
%69 = spv.IMul %66, %5 : i32
%70 = spv.IMul %67, %7 : i32
%71 = spv.IAdd %69, %70 : i32
%72 = spv.IAdd %71, %68 : i32
%73 = spv.AccessChain %9[%6, %72] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%74 = spv.Load "StorageBuffer" %73 : f32
%75 = spv.IMul %66, %7 : i32
%76 = spv.IAdd %70, %75 : i32
%77 = spv.IAdd %76, %68 : i32
%78 = spv.AccessChain %8[%6, %77] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %78, %74 : f32
%79 = spv.IAdd %64, %3 : i32
spv.Branch ^bb1(%79 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %61, %4 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%60 = spv.IAdd %58, %4 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%57 = spv.IAdd %36, %29 : i32
spv.Branch ^bb1(%57 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%35 = spv.IAdd %33, %27 : i32
spv.Branch ^bb1(%35 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %25 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_3, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_3 "LocalSize", 32, 2, 2
}
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass ***
hal.executable @main_ex_dispatch_3 attributes {sym_visibility = "private"} {
hal.interface @legacy_io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
hal.executable.entry_point @main_ex_dispatch_3 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>}
hal.executable.target "vulkan*" {
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_3() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant -1 : i32
%3 = spv.constant 32 : i32
%4 = spv.constant 2 : i32
%5 = spv.constant 320 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 64 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.Load "Input" %10 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %13 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv.Load "Input" %10 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.Load "Input" %13 : vector<3xi32>
%23 = spv.CompositeExtract %22[2 : i32] : vector<3xi32>
%24 = spv.IMul %21, %4 : i32
%25 = spv.IMul %23, %4 : i32
%26 = spv.IMul %17, %4 : i32
%27 = spv.IMul %19, %4 : i32
%28 = spv.IMul %12, %3 : i32
%29 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %0 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %1 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%36: i32): // 2 preds: ^bb0, ^bb2
%37 = spv.SLessThan %36, %7 : i32
spv.BranchConditional %37, ^bb2, ^bb3
^bb2: // pred: ^bb1
%38 = spv.IMul %33, %2 : i32
%39 = spv.IAdd %38, %1 : i32
%40 = spv.SLessThan %4, %39 : i32
%41 = spv.Select %40, %4, %39 : i1, i32
%42 = spv.IMul %30, %2 : i32
%43 = spv.IAdd %42, %0 : i32
%44 = spv.SLessThan %4, %43 : i32
%45 = spv.Select %44, %4, %43 : i1, i32
%46 = spv.IMul %36, %2 : i32
%47 = spv.IAdd %46, %7 : i32
%48 = spv.SLessThan %3, %47 : i32
%49 = spv.Select %48, %3, %47 : i1, i32
%50 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%51 = spv.Load "Input" %50 : vector<3xi32>
%52 = spv.CompositeExtract %51[0 : i32] : vector<3xi32>
%53 = spv.Load "Input" %50 : vector<3xi32>
%54 = spv.CompositeExtract %53[1 : i32] : vector<3xi32>
%55 = spv.Load "Input" %50 : vector<3xi32>
%56 = spv.CompositeExtract %55[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%56 : i32)
^bb1(%58: i32): // 2 preds: ^bb0, ^bb2
%59 = spv.SLessThan %58, %45 : i32
spv.BranchConditional %59, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%61: i32): // 2 preds: ^bb0, ^bb2
%62 = spv.SLessThan %61, %41 : i32
spv.BranchConditional %62, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %49 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
%66 = spv.IAdd %33, %61 : i32
%67 = spv.IAdd %30, %58 : i32
%68 = spv.IAdd %36, %64 : i32
%69 = spv.IMul %66, %5 : i32
%70 = spv.IMul %67, %7 : i32
%71 = spv.IAdd %69, %70 : i32
%72 = spv.IAdd %71, %68 : i32
%73 = spv.AccessChain %9[%6, %72] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
%74 = spv.Load "StorageBuffer" %73 : f32
%75 = spv.IMul %66, %7 : i32
%76 = spv.IAdd %70, %75 : i32
%77 = spv.IAdd %76, %68 : i32
%78 = spv.AccessChain %8[%6, %77] : !spv.ptr<!spv.struct<!spv.array<320 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %78, %74 : f32
%79 = spv.IAdd %64, %3 : i32
spv.Branch ^bb1(%79 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %61, %4 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%60 = spv.IAdd %58, %4 : i32
spv.Branch ^bb1(%60 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%57 = spv.IAdd %36, %29 : i32
spv.Branch ^bb1(%57 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%35 = spv.IAdd %33, %27 : i32
spv.Branch ^bb1(%35 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %25 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_3, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_3 "LocalSize", 32, 2, 2
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
}
}
// *** IR Dump After Inliner ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
%2 = "xla_hlo.reshape"(%1) : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5x1x1xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::TieDynamicShapesPass ***
func @main_ex_dispatch_4() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
%2 = "xla_hlo.reshape"(%1) : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5x1x1xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::MaterializeShapeCalculationsPass ***
func @main_ex_dispatch_4() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
%2 = "xla_hlo.reshape"(%1) : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5x1x1xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::HoistShapeCalculations ***
func @main_ex_dispatch_4() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
%2 = "xla_hlo.reshape"(%1) : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5x1x1xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::DecomposeHLOClampPass ***
func @main_ex_dispatch_4() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5xf32>
%1 = "xla_hlo.transpose"(%0) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
%2 = "xla_hlo.reshape"(%1) : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5x1x1xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnTensorsPass ***
func @main_ex_dispatch_4() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5xf32>
%1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<1x5xf32> -> tensor<5x1xf32>
%2 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %1 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<5x1xf32> -> tensor<5x1x1xf32>
hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5x1x1xf32>
return
}
// *** IR Dump After LinalgFusionOfTensorOps ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x5xf32>
%1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<1x5xf32> -> tensor<5x1x1xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5x1x1xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnBuffersPass ***
func @main_ex_dispatch_4() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%c0 = constant 0 : index
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %1, %0 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
linalg.yield %arg0 : f32
}: memref<1x5xf32>, memref<5x1x1xf32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %1, %0 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
linalg.yield %arg0 : f32
}: memref<1x5xf32>, memref<5x1x1xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} %1, %0 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
linalg.yield %arg0 : f32
}: memref<1x5xf32>, memref<5x1x1xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c1) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%4 = subview %1[%arg1, %arg0] [%2, %3] [%c1, %c1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
%5 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%6 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%7 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c1, %arg2)
%8 = subview %0[%arg0, %arg1, %arg2] [%5, %6, %7] [%c1, %c1, %c1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %4, %8 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
scf.yield
}
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::SplitDispatchFunctionPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c1) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%4 = subview %1[%arg1, %arg0] [%2, %3] [%c1, %c1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
%5 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%6 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%7 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c1, %arg2)
%8 = subview %0[%arg0, %arg1, %arg2] [%5, %6, %7] [%c1, %c1, %c1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %4, %8 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c1) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%3 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%4 = subview %1[%arg1, %arg0] [%2, %3] [%c1, %c1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
%5 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c5, %arg0)
%6 = affine.min affine_map<(d0, d1, d2) -> (2, d1 - d2)>(%c2, %c1, %arg1)
%7 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c1, %arg2)
%8 = subview %0[%arg0, %arg1, %arg2] [%5, %6, %7] [%c1, %c1, %c1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %4, %8 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
scf.yield
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
scf.parallel (%arg0, %arg1, %arg2) = (%c0, %c0, %c0) to (%c5, %c1, %c1) step (%c2, %c2, %c32) {
%2 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%3 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%4 = subview %1[%arg1, %arg0] [%2, %3] [1, 1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%5 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%6 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%7 = affine.min affine_map<(d0) -> (32, -d0 + 1)>(%arg2)
%8 = subview %0[%arg0, %arg1, %arg2] [%5, %6, %7] [1, 1, 1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "workitem"} %4, %8 {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
linalg.yield %arg3 : f32
}: memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToGPUPass ***
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %c2, %6 : index
%9 = addi %c0, %8 : index
%10 = muli %c2, %7 : index
%11 = muli %c2, %4 : index
%12 = addi %c0, %11 : index
%13 = muli %c2, %5 : index
%14 = muli %c32, %2 : index
%15 = addi %c0, %14 : index
%16 = muli %c32, %3 : index
scf.for %arg0 = %9 to %c5 step %10 {
scf.for %arg1 = %12 to %c1 step %13 {
scf.for %arg2 = %15 to %c1 step %16 {
%17 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%18 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%19 = subview %1[%arg1, %arg0] [%17, %18] [1, 1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%20 = affine.min affine_map<(d0) -> (2, -d0 + 5)>(%arg0)
%21 = affine.min affine_map<(d0) -> (2, -d0 + 1)>(%arg1)
%22 = affine.min affine_map<(d0) -> (32, -d0 + 1)>(%arg2)
%23 = subview %0[%arg0, %arg1, %arg2] [%20, %21, %22] [1, 1, 1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%24 = dim %19, 0 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%25 = dim %19, 1 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%26 = dim %23, 0 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%27 = dim %23, 1 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%28 = dim %23, 2 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%29 = affine.apply affine_map<(d0) -> (d0)>(%25)
%30 = affine.apply affine_map<(d0) -> (d0)>(%24)
%31 = affine.apply affine_map<(d0) -> (d0)>(%28)
%c0_0 = constant 0 : index
%c1_1 = constant 1 : index
%c0_2 = constant 0 : index
%c1_3 = constant 1 : index
%c0_4 = constant 0 : index
%c1_5 = constant 1 : index
%32 = "gpu.thread_id"() {dimension = "x"} : () -> index
%33 = "gpu.block_dim"() {dimension = "x"} : () -> index
%34 = "gpu.thread_id"() {dimension = "y"} : () -> index
%35 = "gpu.block_dim"() {dimension = "y"} : () -> index
%36 = "gpu.thread_id"() {dimension = "z"} : () -> index
%37 = "gpu.block_dim"() {dimension = "z"} : () -> index
%38 = muli %c1_1, %36 : index
%39 = addi %c0_0, %38 : index
%40 = muli %c1_1, %37 : index
%41 = muli %c1_3, %34 : index
%42 = addi %c0_2, %41 : index
%43 = muli %c1_3, %35 : index
%44 = muli %c1_5, %32 : index
%45 = addi %c0_4, %44 : index
%46 = muli %c1_5, %33 : index
scf.for %arg3 = %39 to %29 step %40 {
scf.for %arg4 = %42 to %30 step %43 {
scf.for %arg5 = %45 to %31 step %46 {
%47 = affine.apply affine_map<(d0) -> (d0)>(%arg4)
%48 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%49 = load %19[%47, %48] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%50 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%51 = affine.apply affine_map<(d0) -> (d0)>(%arg4)
%52 = affine.apply affine_map<(d0) -> (d0)>(%arg5)
%53 = load %23[%50, %51, %52] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%54 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%55 = affine.apply affine_map<(d0) -> (d0)>(%arg4)
%56 = affine.apply affine_map<(d0) -> (d0)>(%arg5)
store %49, %23[%54, %55, %56] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
}
}
}
}
}
}
return
}
// *** IR Dump After ConvertAffineToStandard ***
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c0 = constant 0 : index
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %c2, %6 : index
%9 = addi %c0, %8 : index
%10 = muli %c2, %7 : index
%11 = muli %c2, %4 : index
%12 = addi %c0, %11 : index
%13 = muli %c2, %5 : index
%14 = muli %c32, %2 : index
%15 = addi %c0, %14 : index
%16 = muli %c32, %3 : index
scf.for %arg0 = %9 to %c5 step %10 {
scf.for %arg1 = %12 to %c1 step %13 {
scf.for %arg2 = %15 to %c1 step %16 {
%c2_0 = constant 2 : index
%c-1 = constant -1 : index
%17 = muli %arg1, %c-1 : index
%c1_1 = constant 1 : index
%18 = addi %17, %c1_1 : index
%19 = cmpi "slt", %c2_0, %18 : index
%20 = select %19, %c2_0, %18 : index
%c2_2 = constant 2 : index
%c-1_3 = constant -1 : index
%21 = muli %arg0, %c-1_3 : index
%c5_4 = constant 5 : index
%22 = addi %21, %c5_4 : index
%23 = cmpi "slt", %c2_2, %22 : index
%24 = select %23, %c2_2, %22 : index
%25 = subview %1[%arg1, %arg0] [%20, %24] [1, 1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%c2_5 = constant 2 : index
%c-1_6 = constant -1 : index
%26 = muli %arg0, %c-1_6 : index
%c5_7 = constant 5 : index
%27 = addi %26, %c5_7 : index
%28 = cmpi "slt", %c2_5, %27 : index
%29 = select %28, %c2_5, %27 : index
%c2_8 = constant 2 : index
%c-1_9 = constant -1 : index
%30 = muli %arg1, %c-1_9 : index
%c1_10 = constant 1 : index
%31 = addi %30, %c1_10 : index
%32 = cmpi "slt", %c2_8, %31 : index
%33 = select %32, %c2_8, %31 : index
%c32_11 = constant 32 : index
%c-1_12 = constant -1 : index
%34 = muli %arg2, %c-1_12 : index
%c1_13 = constant 1 : index
%35 = addi %34, %c1_13 : index
%36 = cmpi "slt", %c32_11, %35 : index
%37 = select %36, %c32_11, %35 : index
%38 = subview %0[%arg0, %arg1, %arg2] [%29, %33, %37] [1, 1, 1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%39 = dim %25, 0 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%40 = dim %25, 1 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%41 = dim %38, 0 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%42 = dim %38, 1 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%43 = dim %38, 2 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%c0_14 = constant 0 : index
%c1_15 = constant 1 : index
%c0_16 = constant 0 : index
%c1_17 = constant 1 : index
%c0_18 = constant 0 : index
%c1_19 = constant 1 : index
%44 = "gpu.thread_id"() {dimension = "x"} : () -> index
%45 = "gpu.block_dim"() {dimension = "x"} : () -> index
%46 = "gpu.thread_id"() {dimension = "y"} : () -> index
%47 = "gpu.block_dim"() {dimension = "y"} : () -> index
%48 = "gpu.thread_id"() {dimension = "z"} : () -> index
%49 = "gpu.block_dim"() {dimension = "z"} : () -> index
%50 = muli %c1_15, %48 : index
%51 = addi %c0_14, %50 : index
%52 = muli %c1_15, %49 : index
%53 = muli %c1_17, %46 : index
%54 = addi %c0_16, %53 : index
%55 = muli %c1_17, %47 : index
%56 = muli %c1_19, %44 : index
%57 = addi %c0_18, %56 : index
%58 = muli %c1_19, %45 : index
scf.for %arg3 = %51 to %40 step %52 {
scf.for %arg4 = %54 to %39 step %55 {
scf.for %arg5 = %57 to %43 step %58 {
%59 = load %25[%arg4, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%60 = load %38[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
store %59, %38[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
}
}
}
}
}
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c1 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = subview %1[%arg1, %arg0] [%17, %21] [1, 1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%23 = muli %arg0, %c-1 : index
%24 = addi %23, %c5 : index
%25 = cmpi "slt", %c2, %24 : index
%26 = select %25, %c2, %24 : index
%27 = muli %arg1, %c-1 : index
%28 = addi %27, %c1 : index
%29 = cmpi "slt", %c2, %28 : index
%30 = select %29, %c2, %28 : index
%31 = muli %arg2, %c-1 : index
%32 = addi %31, %c1 : index
%33 = cmpi "slt", %c32, %32 : index
%34 = select %33, %c32, %32 : index
%35 = subview %0[%arg0, %arg1, %arg2] [%26, %30, %34] [1, 1, 1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%36 = "gpu.thread_id"() {dimension = "x"} : () -> index
%37 = "gpu.block_dim"() {dimension = "x"} : () -> index
%38 = "gpu.thread_id"() {dimension = "y"} : () -> index
%39 = "gpu.block_dim"() {dimension = "y"} : () -> index
%40 = "gpu.thread_id"() {dimension = "z"} : () -> index
%41 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %40 to %21 step %41 {
scf.for %arg4 = %38 to %17 step %39 {
scf.for %arg5 = %36 to %34 step %37 {
%42 = load %22[%arg4, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
store %42, %35[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c1 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = subview %1[%arg1, %arg0] [%17, %21] [1, 1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c1 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = subview %0[%arg0, %arg1, %arg2] [%21, %17, %26] [1, 1, 1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%28 = "gpu.thread_id"() {dimension = "x"} : () -> index
%29 = "gpu.block_dim"() {dimension = "x"} : () -> index
%30 = "gpu.thread_id"() {dimension = "y"} : () -> index
%31 = "gpu.block_dim"() {dimension = "y"} : () -> index
%32 = "gpu.thread_id"() {dimension = "z"} : () -> index
%33 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %32 to %21 step %33 {
scf.for %arg4 = %30 to %17 step %31 {
scf.for %arg5 = %28 to %26 step %29 {
%34 = load %22[%arg4, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
store %34, %27[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ResolveShapeOpsPass ***
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c1 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = subview %1[%arg1, %arg0] [%17, %21] [1, 1] : memref<1x5xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
%23 = muli %arg2, %c-1 : index
%24 = addi %23, %c1 : index
%25 = cmpi "slt", %c32, %24 : index
%26 = select %25, %c32, %24 : index
%27 = subview %0[%arg0, %arg1, %arg2] [%21, %17, %26] [1, 1, 1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%28 = "gpu.thread_id"() {dimension = "x"} : () -> index
%29 = "gpu.block_dim"() {dimension = "x"} : () -> index
%30 = "gpu.thread_id"() {dimension = "y"} : () -> index
%31 = "gpu.block_dim"() {dimension = "y"} : () -> index
%32 = "gpu.thread_id"() {dimension = "z"} : () -> index
%33 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %32 to %21 step %33 {
scf.for %arg4 = %30 to %17 step %31 {
scf.for %arg5 = %28 to %26 step %29 {
%34 = load %22[%arg4, %arg3] : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 5 + s0 + d1)>>
store %34, %27[%arg3, %arg4, %arg5] : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
}
}
}
}
}
}
return
}
// *** IR Dump After LegalizeStandardForSPIRV ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c1 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c1 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = "gpu.thread_id"() {dimension = "x"} : () -> index
%27 = "gpu.block_dim"() {dimension = "x"} : () -> index
%28 = "gpu.thread_id"() {dimension = "y"} : () -> index
%29 = "gpu.block_dim"() {dimension = "y"} : () -> index
%30 = "gpu.thread_id"() {dimension = "z"} : () -> index
%31 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %30 to %21 step %31 {
scf.for %arg4 = %28 to %17 step %29 {
scf.for %arg5 = %26 to %25 step %27 {
%32 = addi %arg1, %arg4 : index
%33 = addi %arg0, %arg3 : index
%34 = load %1[%32, %33] : memref<1x5xf32>
%35 = addi %arg0, %arg3 : index
%36 = addi %arg1, %arg4 : index
%37 = addi %arg2, %arg5 : index
store %34, %0[%35, %36, %37] : memref<5x1x1xf32>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c1 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c1 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = "gpu.thread_id"() {dimension = "x"} : () -> index
%27 = "gpu.block_dim"() {dimension = "x"} : () -> index
%28 = "gpu.thread_id"() {dimension = "y"} : () -> index
%29 = "gpu.block_dim"() {dimension = "y"} : () -> index
%30 = "gpu.thread_id"() {dimension = "z"} : () -> index
%31 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %30 to %21 step %31 {
scf.for %arg4 = %28 to %17 step %29 {
scf.for %arg5 = %26 to %25 step %27 {
%32 = addi %arg1, %arg4 : index
%33 = addi %arg0, %arg3 : index
%34 = load %1[%32, %33] : memref<1x5xf32>
%35 = addi %arg0, %arg3 : index
%36 = addi %arg1, %arg4 : index
%37 = addi %arg2, %arg5 : index
store %34, %0[%35, %36, %37] : memref<5x1x1xf32>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_4() attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%c5 = constant 5 : index
%c2 = constant 2 : index
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5x1x1xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x5xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = "gpu.block_id"() {dimension = "y"} : () -> index
%5 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%6 = "gpu.block_id"() {dimension = "z"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%8 = muli %6, %c2 : index
%9 = muli %7, %c2 : index
%10 = muli %4, %c2 : index
%11 = muli %5, %c2 : index
%12 = muli %2, %c32 : index
%13 = muli %3, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
scf.for %arg1 = %10 to %c1 step %11 {
scf.for %arg2 = %12 to %c1 step %13 {
%14 = muli %arg1, %c-1 : index
%15 = addi %14, %c1 : index
%16 = cmpi "slt", %c2, %15 : index
%17 = select %16, %c2, %15 : index
%18 = muli %arg0, %c-1 : index
%19 = addi %18, %c5 : index
%20 = cmpi "slt", %c2, %19 : index
%21 = select %20, %c2, %19 : index
%22 = muli %arg2, %c-1 : index
%23 = addi %22, %c1 : index
%24 = cmpi "slt", %c32, %23 : index
%25 = select %24, %c32, %23 : index
%26 = "gpu.thread_id"() {dimension = "x"} : () -> index
%27 = "gpu.block_dim"() {dimension = "x"} : () -> index
%28 = "gpu.thread_id"() {dimension = "y"} : () -> index
%29 = "gpu.block_dim"() {dimension = "y"} : () -> index
%30 = "gpu.thread_id"() {dimension = "z"} : () -> index
%31 = "gpu.block_dim"() {dimension = "z"} : () -> index
scf.for %arg3 = %30 to %21 step %31 {
scf.for %arg4 = %28 to %17 step %29 {
scf.for %arg5 = %26 to %25 step %27 {
%32 = addi %arg1, %arg4 : index
%33 = addi %arg0, %arg3 : index
%34 = load %1[%32, %33] : memref<1x5xf32>
%35 = addi %arg2, %arg5 : index
store %34, %0[%33, %32, %35] : memref<5x1x1xf32>
}
}
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToSPIRVPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_4() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 2, 2]> : vector<3xi32>}} {
%0 = spv.constant 5 : i32
%1 = spv.constant 2 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant -1 : i32
%4 = spv.constant 1 : i32
%5 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%6 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%8 = spv.Load "Input" %7 : vector<3xi32>
%9 = spv.CompositeExtract %8[0 : i32] : vector<3xi32>
%10 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%17 = spv.Load "Input" %16 : vector<3xi32>
%18 = spv.CompositeExtract %17[1 : i32] : vector<3xi32>
%19 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%20 = spv.Load "Input" %19 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%23 = spv.Load "Input" %22 : vector<3xi32>
%24 = spv.CompositeExtract %23[2 : i32] : vector<3xi32>
%25 = spv.IMul %21, %1 : i32
%26 = spv.IMul %24, %1 : i32
%27 = spv.IMul %15, %1 : i32
%28 = spv.IMul %18, %1 : i32
%29 = spv.IMul %9, %2 : i32
%30 = spv.IMul %12, %2 : i32
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%31: i32): // 2 preds: ^bb0, ^bb2
%32 = spv.SLessThan %31, %0 : i32
spv.BranchConditional %32, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%27 : i32)
^bb1(%34: i32): // 2 preds: ^bb0, ^bb2
%35 = spv.SLessThan %34, %4 : i32
spv.BranchConditional %35, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%29 : i32)
^bb1(%37: i32): // 2 preds: ^bb0, ^bb2
%38 = spv.SLessThan %37, %4 : i32
spv.BranchConditional %38, ^bb2, ^bb3
^bb2: // pred: ^bb1
%39 = spv.IMul %34, %3 : i32
%40 = spv.IAdd %39, %4 : i32
%41 = spv.SLessThan %1, %40 : i32
%42 = spv.Select %41, %1, %40 : i1, i32
%43 = spv.IMul %31, %3 : i32
%44 = spv.IAdd %43, %0 : i32
%45 = spv.SLessThan %1, %44 : i32
%46 = spv.Select %45, %1, %44 : i1, i32
%47 = spv.IMul %37, %3 : i32
%48 = spv.IAdd %47, %4 : i32
%49 = spv.SLessThan %2, %48 : i32
%50 = spv.Select %49, %2, %48 : i1, i32
%51 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%52 = spv.Load "Input" %51 : vector<3xi32>
%53 = spv.CompositeExtract %52[0 : i32] : vector<3xi32>
%54 = spv.constant 32 : i32
%55 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%56 = spv.Load "Input" %55 : vector<3xi32>
%57 = spv.CompositeExtract %56[1 : i32] : vector<3xi32>
%58 = spv.constant 2 : i32
%59 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%60 = spv.Load "Input" %59 : vector<3xi32>
%61 = spv.CompositeExtract %60[2 : i32] : vector<3xi32>
%62 = spv.constant 2 : i32
spv.loop {
spv.Branch ^bb1(%61 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %46 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%57 : i32)
^bb1(%67: i32): // 2 preds: ^bb0, ^bb2
%68 = spv.SLessThan %67, %42 : i32
spv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%53 : i32)
^bb1(%70: i32): // 2 preds: ^bb0, ^bb2
%71 = spv.SLessThan %70, %50 : i32
spv.BranchConditional %71, ^bb2, ^bb3
^bb2: // pred: ^bb1
%72 = spv.IAdd %34, %67 : i32
%73 = spv.IAdd %31, %64 : i32
%74 = spv.constant 0 : i32
%75 = spv.constant 0 : i32
%76 = spv.constant 5 : i32
%77 = spv.IMul %76, %72 : i32
%78 = spv.IAdd %75, %77 : i32
%79 = spv.constant 1 : i32
%80 = spv.IMul %79, %73 : i32
%81 = spv.IAdd %78, %80 : i32
%82 = spv.AccessChain %6[%74, %81] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%83 = spv.Load "StorageBuffer" %82 : f32
%84 = spv.IAdd %37, %70 : i32
%85 = spv.constant 0 : i32
%86 = spv.constant 0 : i32
%87 = spv.constant 1 : i32
%88 = spv.IMul %87, %73 : i32
%89 = spv.IAdd %86, %88 : i32
%90 = spv.constant 1 : i32
%91 = spv.IMul %90, %72 : i32
%92 = spv.IAdd %89, %91 : i32
%93 = spv.constant 1 : i32
%94 = spv.IMul %93, %84 : i32
%95 = spv.IAdd %92, %94 : i32
%96 = spv.AccessChain %5[%85, %95] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %96, %83 : f32
%97 = spv.IAdd %70, %54 : i32
spv.Branch ^bb1(%97 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%69 = spv.IAdd %67, %58 : i32
spv.Branch ^bb1(%69 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%66 = spv.IAdd %64, %62 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %37, %30 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%36 = spv.IAdd %34, %28 : i32
spv.Branch ^bb1(%36 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%33 = spv.IAdd %31, %26 : i32
spv.Branch ^bb1(%33 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After SPIRVLowerABIAttributes ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_4() "None" {
%0 = spv.constant 5 : i32
%1 = spv.constant 2 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant -1 : i32
%4 = spv.constant 1 : i32
%5 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%6 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%8 = spv.Load "Input" %7 : vector<3xi32>
%9 = spv.CompositeExtract %8[0 : i32] : vector<3xi32>
%10 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%17 = spv.Load "Input" %16 : vector<3xi32>
%18 = spv.CompositeExtract %17[1 : i32] : vector<3xi32>
%19 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%20 = spv.Load "Input" %19 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%23 = spv.Load "Input" %22 : vector<3xi32>
%24 = spv.CompositeExtract %23[2 : i32] : vector<3xi32>
%25 = spv.IMul %21, %1 : i32
%26 = spv.IMul %24, %1 : i32
%27 = spv.IMul %15, %1 : i32
%28 = spv.IMul %18, %1 : i32
%29 = spv.IMul %9, %2 : i32
%30 = spv.IMul %12, %2 : i32
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%31: i32): // 2 preds: ^bb0, ^bb2
%32 = spv.SLessThan %31, %0 : i32
spv.BranchConditional %32, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%27 : i32)
^bb1(%34: i32): // 2 preds: ^bb0, ^bb2
%35 = spv.SLessThan %34, %4 : i32
spv.BranchConditional %35, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%29 : i32)
^bb1(%37: i32): // 2 preds: ^bb0, ^bb2
%38 = spv.SLessThan %37, %4 : i32
spv.BranchConditional %38, ^bb2, ^bb3
^bb2: // pred: ^bb1
%39 = spv.IMul %34, %3 : i32
%40 = spv.IAdd %39, %4 : i32
%41 = spv.SLessThan %1, %40 : i32
%42 = spv.Select %41, %1, %40 : i1, i32
%43 = spv.IMul %31, %3 : i32
%44 = spv.IAdd %43, %0 : i32
%45 = spv.SLessThan %1, %44 : i32
%46 = spv.Select %45, %1, %44 : i1, i32
%47 = spv.IMul %37, %3 : i32
%48 = spv.IAdd %47, %4 : i32
%49 = spv.SLessThan %2, %48 : i32
%50 = spv.Select %49, %2, %48 : i1, i32
%51 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%52 = spv.Load "Input" %51 : vector<3xi32>
%53 = spv.CompositeExtract %52[0 : i32] : vector<3xi32>
%54 = spv.constant 32 : i32
%55 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%56 = spv.Load "Input" %55 : vector<3xi32>
%57 = spv.CompositeExtract %56[1 : i32] : vector<3xi32>
%58 = spv.constant 2 : i32
%59 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%60 = spv.Load "Input" %59 : vector<3xi32>
%61 = spv.CompositeExtract %60[2 : i32] : vector<3xi32>
%62 = spv.constant 2 : i32
spv.loop {
spv.Branch ^bb1(%61 : i32)
^bb1(%64: i32): // 2 preds: ^bb0, ^bb2
%65 = spv.SLessThan %64, %46 : i32
spv.BranchConditional %65, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%57 : i32)
^bb1(%67: i32): // 2 preds: ^bb0, ^bb2
%68 = spv.SLessThan %67, %42 : i32
spv.BranchConditional %68, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%53 : i32)
^bb1(%70: i32): // 2 preds: ^bb0, ^bb2
%71 = spv.SLessThan %70, %50 : i32
spv.BranchConditional %71, ^bb2, ^bb3
^bb2: // pred: ^bb1
%72 = spv.IAdd %34, %67 : i32
%73 = spv.IAdd %31, %64 : i32
%74 = spv.constant 0 : i32
%75 = spv.constant 0 : i32
%76 = spv.constant 5 : i32
%77 = spv.IMul %76, %72 : i32
%78 = spv.IAdd %75, %77 : i32
%79 = spv.constant 1 : i32
%80 = spv.IMul %79, %73 : i32
%81 = spv.IAdd %78, %80 : i32
%82 = spv.AccessChain %6[%74, %81] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%83 = spv.Load "StorageBuffer" %82 : f32
%84 = spv.IAdd %37, %70 : i32
%85 = spv.constant 0 : i32
%86 = spv.constant 0 : i32
%87 = spv.constant 1 : i32
%88 = spv.IMul %87, %73 : i32
%89 = spv.IAdd %86, %88 : i32
%90 = spv.constant 1 : i32
%91 = spv.IMul %90, %72 : i32
%92 = spv.IAdd %89, %91 : i32
%93 = spv.constant 1 : i32
%94 = spv.IMul %93, %84 : i32
%95 = spv.IAdd %92, %94 : i32
%96 = spv.AccessChain %5[%85, %95] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %96, %83 : f32
%97 = spv.IAdd %70, %54 : i32
spv.Branch ^bb1(%97 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%69 = spv.IAdd %67, %58 : i32
spv.Branch ^bb1(%69 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%66 = spv.IAdd %64, %62 : i32
spv.Branch ^bb1(%66 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%63 = spv.IAdd %37, %30 : i32
spv.Branch ^bb1(%63 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%36 = spv.IAdd %34, %28 : i32
spv.Branch ^bb1(%36 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%33 = spv.IAdd %31, %26 : i32
spv.Branch ^bb1(%33 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_4, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_4 "LocalSize", 32, 2, 2
}
// *** IR Dump After Canonicalizer ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_4() "None" {
%0 = spv.constant -1 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 5 : i32
%5 = spv.constant 0 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%15 = spv.Load "Input" %14 : vector<3xi32>
%16 = spv.CompositeExtract %15[1 : i32] : vector<3xi32>
%17 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%18 = spv.Load "Input" %17 : vector<3xi32>
%19 = spv.CompositeExtract %18[1 : i32] : vector<3xi32>
%20 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%21 = spv.Load "Input" %20 : vector<3xi32>
%22 = spv.CompositeExtract %21[2 : i32] : vector<3xi32>
%23 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[2 : i32] : vector<3xi32>
%26 = spv.IMul %22, %3 : i32
%27 = spv.IMul %25, %3 : i32
%28 = spv.IMul %16, %3 : i32
%29 = spv.IMul %19, %3 : i32
%30 = spv.IMul %10, %2 : i32
%31 = spv.IMul %13, %2 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spv.SLessThan %32, %4 : i32
spv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%28 : i32)
^bb1(%35: i32): // 2 preds: ^bb0, ^bb2
%36 = spv.SLessThan %35, %1 : i32
spv.BranchConditional %36, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%30 : i32)
^bb1(%38: i32): // 2 preds: ^bb0, ^bb2
%39 = spv.SLessThan %38, %1 : i32
spv.BranchConditional %39, ^bb2, ^bb3
^bb2: // pred: ^bb1
%40 = spv.IMul %35, %0 : i32
%41 = spv.IAdd %40, %1 : i32
%42 = spv.SLessThan %3, %41 : i32
%43 = spv.Select %42, %3, %41 : i1, i32
%44 = spv.IMul %32, %0 : i32
%45 = spv.IAdd %44, %4 : i32
%46 = spv.SLessThan %3, %45 : i32
%47 = spv.Select %46, %3, %45 : i1, i32
%48 = spv.IMul %38, %0 : i32
%49 = spv.IAdd %48, %1 : i32
%50 = spv.SLessThan %2, %49 : i32
%51 = spv.Select %50, %2, %49 : i1, i32
%52 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%53 = spv.Load "Input" %52 : vector<3xi32>
%54 = spv.CompositeExtract %53[0 : i32] : vector<3xi32>
%55 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%56 = spv.Load "Input" %55 : vector<3xi32>
%57 = spv.CompositeExtract %56[1 : i32] : vector<3xi32>
%58 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%59 = spv.Load "Input" %58 : vector<3xi32>
%60 = spv.CompositeExtract %59[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%60 : i32)
^bb1(%62: i32): // 2 preds: ^bb0, ^bb2
%63 = spv.SLessThan %62, %47 : i32
spv.BranchConditional %63, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%57 : i32)
^bb1(%65: i32): // 2 preds: ^bb0, ^bb2
%66 = spv.SLessThan %65, %43 : i32
spv.BranchConditional %66, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%68: i32): // 2 preds: ^bb0, ^bb2
%69 = spv.SLessThan %68, %51 : i32
spv.BranchConditional %69, ^bb2, ^bb3
^bb2: // pred: ^bb1
%70 = spv.IAdd %35, %65 : i32
%71 = spv.IAdd %32, %62 : i32
%72 = spv.IMul %70, %4 : i32
%73 = spv.IAdd %72, %71 : i32
%74 = spv.AccessChain %7[%5, %73] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%75 = spv.Load "StorageBuffer" %74 : f32
%76 = spv.IAdd %38, %68 : i32
%77 = spv.IAdd %71, %70 : i32
%78 = spv.IAdd %77, %76 : i32
%79 = spv.AccessChain %6[%5, %78] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %79, %75 : f32
%80 = spv.IAdd %68, %2 : i32
spv.Branch ^bb1(%80 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%67 = spv.IAdd %65, %3 : i32
spv.Branch ^bb1(%67 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%64 = spv.IAdd %62, %3 : i32
spv.Branch ^bb1(%64 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%61 = spv.IAdd %38, %31 : i32
spv.Branch ^bb1(%61 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%37 = spv.IAdd %35, %29 : i32
spv.Branch ^bb1(%37 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%34 = spv.IAdd %32, %27 : i32
spv.Branch ^bb1(%34 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_4, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_4 "LocalSize", 32, 2, 2
}
// *** IR Dump After CSE ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_4() "None" {
%0 = spv.constant -1 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 5 : i32
%5 = spv.constant 0 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv.Load "Input" %8 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv.Load "Input" %11 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %8 : vector<3xi32>
%19 = spv.CompositeExtract %18[2 : i32] : vector<3xi32>
%20 = spv.Load "Input" %11 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.IMul %19, %3 : i32
%23 = spv.IMul %21, %3 : i32
%24 = spv.IMul %15, %3 : i32
%25 = spv.IMul %17, %3 : i32
%26 = spv.IMul %10, %2 : i32
%27 = spv.IMul %13, %2 : i32
spv.loop {
spv.Branch ^bb1(%22 : i32)
^bb1(%28: i32): // 2 preds: ^bb0, ^bb2
%29 = spv.SLessThan %28, %4 : i32
spv.BranchConditional %29, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%31: i32): // 2 preds: ^bb0, ^bb2
%32 = spv.SLessThan %31, %1 : i32
spv.BranchConditional %32, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%34: i32): // 2 preds: ^bb0, ^bb2
%35 = spv.SLessThan %34, %1 : i32
spv.BranchConditional %35, ^bb2, ^bb3
^bb2: // pred: ^bb1
%36 = spv.IMul %31, %0 : i32
%37 = spv.IAdd %36, %1 : i32
%38 = spv.SLessThan %3, %37 : i32
%39 = spv.Select %38, %3, %37 : i1, i32
%40 = spv.IMul %28, %0 : i32
%41 = spv.IAdd %40, %4 : i32
%42 = spv.SLessThan %3, %41 : i32
%43 = spv.Select %42, %3, %41 : i1, i32
%44 = spv.IMul %34, %0 : i32
%45 = spv.IAdd %44, %1 : i32
%46 = spv.SLessThan %2, %45 : i32
%47 = spv.Select %46, %2, %45 : i1, i32
%48 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%49 = spv.Load "Input" %48 : vector<3xi32>
%50 = spv.CompositeExtract %49[0 : i32] : vector<3xi32>
%51 = spv.Load "Input" %48 : vector<3xi32>
%52 = spv.CompositeExtract %51[1 : i32] : vector<3xi32>
%53 = spv.Load "Input" %48 : vector<3xi32>
%54 = spv.CompositeExtract %53[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%56: i32): // 2 preds: ^bb0, ^bb2
%57 = spv.SLessThan %56, %43 : i32
spv.BranchConditional %57, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%59: i32): // 2 preds: ^bb0, ^bb2
%60 = spv.SLessThan %59, %39 : i32
spv.BranchConditional %60, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%50 : i32)
^bb1(%62: i32): // 2 preds: ^bb0, ^bb2
%63 = spv.SLessThan %62, %47 : i32
spv.BranchConditional %63, ^bb2, ^bb3
^bb2: // pred: ^bb1
%64 = spv.IAdd %31, %59 : i32
%65 = spv.IAdd %28, %56 : i32
%66 = spv.IMul %64, %4 : i32
%67 = spv.IAdd %66, %65 : i32
%68 = spv.AccessChain %7[%5, %67] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%69 = spv.Load "StorageBuffer" %68 : f32
%70 = spv.IAdd %34, %62 : i32
%71 = spv.IAdd %65, %64 : i32
%72 = spv.IAdd %71, %70 : i32
%73 = spv.AccessChain %6[%5, %72] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %73, %69 : f32
%74 = spv.IAdd %62, %2 : i32
spv.Branch ^bb1(%74 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%61 = spv.IAdd %59, %3 : i32
spv.Branch ^bb1(%61 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%58 = spv.IAdd %56, %3 : i32
spv.Branch ^bb1(%58 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%55 = spv.IAdd %34, %27 : i32
spv.Branch ^bb1(%55 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%33 = spv.IAdd %31, %25 : i32
spv.Branch ^bb1(%33 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%30 = spv.IAdd %28, %23 : i32
spv.Branch ^bb1(%30 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_4, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_4 "LocalSize", 32, 2, 2
}
// *** IR Dump After SPIRVUpdateVCE ***
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_4() "None" {
%0 = spv.constant -1 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 5 : i32
%5 = spv.constant 0 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv.Load "Input" %8 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv.Load "Input" %11 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %8 : vector<3xi32>
%19 = spv.CompositeExtract %18[2 : i32] : vector<3xi32>
%20 = spv.Load "Input" %11 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.IMul %19, %3 : i32
%23 = spv.IMul %21, %3 : i32
%24 = spv.IMul %15, %3 : i32
%25 = spv.IMul %17, %3 : i32
%26 = spv.IMul %10, %2 : i32
%27 = spv.IMul %13, %2 : i32
spv.loop {
spv.Branch ^bb1(%22 : i32)
^bb1(%28: i32): // 2 preds: ^bb0, ^bb2
%29 = spv.SLessThan %28, %4 : i32
spv.BranchConditional %29, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%31: i32): // 2 preds: ^bb0, ^bb2
%32 = spv.SLessThan %31, %1 : i32
spv.BranchConditional %32, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%34: i32): // 2 preds: ^bb0, ^bb2
%35 = spv.SLessThan %34, %1 : i32
spv.BranchConditional %35, ^bb2, ^bb3
^bb2: // pred: ^bb1
%36 = spv.IMul %31, %0 : i32
%37 = spv.IAdd %36, %1 : i32
%38 = spv.SLessThan %3, %37 : i32
%39 = spv.Select %38, %3, %37 : i1, i32
%40 = spv.IMul %28, %0 : i32
%41 = spv.IAdd %40, %4 : i32
%42 = spv.SLessThan %3, %41 : i32
%43 = spv.Select %42, %3, %41 : i1, i32
%44 = spv.IMul %34, %0 : i32
%45 = spv.IAdd %44, %1 : i32
%46 = spv.SLessThan %2, %45 : i32
%47 = spv.Select %46, %2, %45 : i1, i32
%48 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%49 = spv.Load "Input" %48 : vector<3xi32>
%50 = spv.CompositeExtract %49[0 : i32] : vector<3xi32>
%51 = spv.Load "Input" %48 : vector<3xi32>
%52 = spv.CompositeExtract %51[1 : i32] : vector<3xi32>
%53 = spv.Load "Input" %48 : vector<3xi32>
%54 = spv.CompositeExtract %53[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%56: i32): // 2 preds: ^bb0, ^bb2
%57 = spv.SLessThan %56, %43 : i32
spv.BranchConditional %57, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%59: i32): // 2 preds: ^bb0, ^bb2
%60 = spv.SLessThan %59, %39 : i32
spv.BranchConditional %60, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%50 : i32)
^bb1(%62: i32): // 2 preds: ^bb0, ^bb2
%63 = spv.SLessThan %62, %47 : i32
spv.BranchConditional %63, ^bb2, ^bb3
^bb2: // pred: ^bb1
%64 = spv.IAdd %31, %59 : i32
%65 = spv.IAdd %28, %56 : i32
%66 = spv.IMul %64, %4 : i32
%67 = spv.IAdd %66, %65 : i32
%68 = spv.AccessChain %7[%5, %67] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%69 = spv.Load "StorageBuffer" %68 : f32
%70 = spv.IAdd %34, %62 : i32
%71 = spv.IAdd %65, %64 : i32
%72 = spv.IAdd %71, %70 : i32
%73 = spv.AccessChain %6[%5, %72] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %73, %69 : f32
%74 = spv.IAdd %62, %2 : i32
spv.Branch ^bb1(%74 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%61 = spv.IAdd %59, %3 : i32
spv.Branch ^bb1(%61 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%58 = spv.IAdd %56, %3 : i32
spv.Branch ^bb1(%58 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%55 = spv.IAdd %34, %27 : i32
spv.Branch ^bb1(%55 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%33 = spv.IAdd %31, %25 : i32
spv.Branch ^bb1(%33 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%30 = spv.IAdd %28, %23 : i32
spv.Branch ^bb1(%30 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_4, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_4 "LocalSize", 32, 2, 2
}
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass ***
hal.executable @main_ex_dispatch_4 attributes {sym_visibility = "private"} {
hal.interface @legacy_io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
hal.executable.entry_point @main_ex_dispatch_4 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (tensor<1x5xf32>) -> tensor<5x1x1xf32>}
hal.executable.target "vulkan*" {
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_4() "None" {
%0 = spv.constant -1 : i32
%1 = spv.constant 1 : i32
%2 = spv.constant 32 : i32
%3 = spv.constant 2 : i32
%4 = spv.constant 5 : i32
%5 = spv.constant 0 : i32
%6 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%7 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%9 = spv.Load "Input" %8 : vector<3xi32>
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32>
%11 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%12 = spv.Load "Input" %11 : vector<3xi32>
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32>
%14 = spv.Load "Input" %8 : vector<3xi32>
%15 = spv.CompositeExtract %14[1 : i32] : vector<3xi32>
%16 = spv.Load "Input" %11 : vector<3xi32>
%17 = spv.CompositeExtract %16[1 : i32] : vector<3xi32>
%18 = spv.Load "Input" %8 : vector<3xi32>
%19 = spv.CompositeExtract %18[2 : i32] : vector<3xi32>
%20 = spv.Load "Input" %11 : vector<3xi32>
%21 = spv.CompositeExtract %20[2 : i32] : vector<3xi32>
%22 = spv.IMul %19, %3 : i32
%23 = spv.IMul %21, %3 : i32
%24 = spv.IMul %15, %3 : i32
%25 = spv.IMul %17, %3 : i32
%26 = spv.IMul %10, %2 : i32
%27 = spv.IMul %13, %2 : i32
spv.loop {
spv.Branch ^bb1(%22 : i32)
^bb1(%28: i32): // 2 preds: ^bb0, ^bb2
%29 = spv.SLessThan %28, %4 : i32
spv.BranchConditional %29, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%24 : i32)
^bb1(%31: i32): // 2 preds: ^bb0, ^bb2
%32 = spv.SLessThan %31, %1 : i32
spv.BranchConditional %32, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%34: i32): // 2 preds: ^bb0, ^bb2
%35 = spv.SLessThan %34, %1 : i32
spv.BranchConditional %35, ^bb2, ^bb3
^bb2: // pred: ^bb1
%36 = spv.IMul %31, %0 : i32
%37 = spv.IAdd %36, %1 : i32
%38 = spv.SLessThan %3, %37 : i32
%39 = spv.Select %38, %3, %37 : i1, i32
%40 = spv.IMul %28, %0 : i32
%41 = spv.IAdd %40, %4 : i32
%42 = spv.SLessThan %3, %41 : i32
%43 = spv.Select %42, %3, %41 : i1, i32
%44 = spv.IMul %34, %0 : i32
%45 = spv.IAdd %44, %1 : i32
%46 = spv.SLessThan %2, %45 : i32
%47 = spv.Select %46, %2, %45 : i1, i32
%48 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%49 = spv.Load "Input" %48 : vector<3xi32>
%50 = spv.CompositeExtract %49[0 : i32] : vector<3xi32>
%51 = spv.Load "Input" %48 : vector<3xi32>
%52 = spv.CompositeExtract %51[1 : i32] : vector<3xi32>
%53 = spv.Load "Input" %48 : vector<3xi32>
%54 = spv.CompositeExtract %53[2 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%54 : i32)
^bb1(%56: i32): // 2 preds: ^bb0, ^bb2
%57 = spv.SLessThan %56, %43 : i32
spv.BranchConditional %57, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%52 : i32)
^bb1(%59: i32): // 2 preds: ^bb0, ^bb2
%60 = spv.SLessThan %59, %39 : i32
spv.BranchConditional %60, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%50 : i32)
^bb1(%62: i32): // 2 preds: ^bb0, ^bb2
%63 = spv.SLessThan %62, %47 : i32
spv.BranchConditional %63, ^bb2, ^bb3
^bb2: // pred: ^bb1
%64 = spv.IAdd %31, %59 : i32
%65 = spv.IAdd %28, %56 : i32
%66 = spv.IMul %64, %4 : i32
%67 = spv.IAdd %66, %65 : i32
%68 = spv.AccessChain %7[%5, %67] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%69 = spv.Load "StorageBuffer" %68 : f32
%70 = spv.IAdd %34, %62 : i32
%71 = spv.IAdd %65, %64 : i32
%72 = spv.IAdd %71, %70 : i32
%73 = spv.AccessChain %6[%5, %72] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %73, %69 : f32
%74 = spv.IAdd %62, %2 : i32
spv.Branch ^bb1(%74 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%61 = spv.IAdd %59, %3 : i32
spv.Branch ^bb1(%61 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%58 = spv.IAdd %56, %3 : i32
spv.Branch ^bb1(%58 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%55 = spv.IAdd %34, %27 : i32
spv.Branch ^bb1(%55 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%33 = spv.IAdd %31, %25 : i32
spv.Branch ^bb1(%33 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%30 = spv.IAdd %28, %23 : i32
spv.Branch ^bb1(%30 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_4, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_4 "LocalSize", 32, 2, 2
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
}
}
// *** IR Dump After Inliner ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() {
%c0 = constant 0 : index
%cst = constant dense<0x7F800000> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x1xf32>
%1 = "xla_hlo.reduce"(%0, %cst) ( {
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>): // no predecessors
%2 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
"xla_hlo.return"(%2) : (tensor<f32>) -> ()
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::TieDynamicShapesPass ***
func @main_ex_dispatch_5() {
%c0 = constant 0 : index
%cst = constant dense<0x7F800000> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x1xf32>
%1 = "xla_hlo.reduce"(%0, %cst) ( {
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>): // no predecessors
%2 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
"xla_hlo.return"(%2) : (tensor<f32>) -> ()
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::MaterializeShapeCalculationsPass ***
func @main_ex_dispatch_5() {
%c0 = constant 0 : index
%cst = constant dense<0x7F800000> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x1xf32>
%1 = "xla_hlo.reduce"(%0, %cst) ( {
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>): // no predecessors
%2 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
"xla_hlo.return"(%2) : (tensor<f32>) -> ()
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::HoistShapeCalculations ***
func @main_ex_dispatch_5() {
%c0 = constant 0 : index
%cst = constant dense<0x7F800000> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x1xf32>
%1 = "xla_hlo.reduce"(%0, %cst) ( {
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>): // no predecessors
%2 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
"xla_hlo.return"(%2) : (tensor<f32>) -> ()
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::DecomposeHLOClampPass ***
func @main_ex_dispatch_5() {
%c0 = constant 0 : index
%cst = constant dense<0x7F800000> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x1xf32>
%1 = "xla_hlo.reduce"(%0, %cst) ( {
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>): // no predecessors
%2 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
"xla_hlo.return"(%2) : (tensor<f32>) -> ()
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnTensorsPass ***
func @main_ex_dispatch_5() {
%c0 = constant 0 : index
%cst = constant dense<0x7F800000> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x1xf32>
%1 = "xla_hlo.reduce"(%0, %cst) ( {
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>): // no predecessors
%2 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
"xla_hlo.return"(%2) : (tensor<f32>) -> ()
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
return
}
// *** IR Dump After LinalgFusionOfTensorOps ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() {
%c0 = constant 0 : index
%cst = constant dense<0x7F800000> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x1xf32>
%1 = "xla_hlo.reduce"(%0, %cst) ( {
^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>): // no predecessors
%2 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
"xla_hlo.return"(%2) : (tensor<f32>) -> ()
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnBuffersPass ***
func @main_ex_dispatch_5() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%c0 = constant 0 : index
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
linalg.fill(%0, %cst_0) : memref<5xf32>, f32
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
%c0_1 = constant 0 : index
%true = constant true
%2 = cmpi "eq", %arg1, %c0_1 : index
%3 = and %true, %2 : i1
%4 = cmpi "eq", %arg2, %c0_1 : index
%5 = and %3, %4 : i1
%6 = select %5, %cst, %arg4 : f32
%7 = cmpf "olt", %arg3, %6 : f32
%8 = select %7, %arg3, %6 : f32
linalg.yield %8 : f32
}: memref<5x1x1xf32>, memref<5xf32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%true = constant true
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
linalg.fill(%0, %cst_0) : memref<5xf32>, f32
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
%2 = cmpi "eq", %arg1, %c0 : index
%3 = and %2, %true : i1
%4 = cmpi "eq", %arg2, %c0 : index
%5 = and %3, %4 : i1
%6 = select %5, %cst, %arg4 : f32
%7 = cmpf "olt", %arg3, %6 : f32
%8 = select %7, %arg3, %6 : f32
linalg.yield %8 : f32
}: memref<5x1x1xf32>, memref<5xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%true = constant true
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
linalg.fill(%0, %cst_0) : memref<5xf32>, f32
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} %1, %0 {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: f32, %arg4: f32): // no predecessors
%2 = cmpi "eq", %arg1, %c0 : index
%3 = and %2, %true : i1
%4 = cmpi "eq", %arg2, %c0 : index
%5 = and %3, %4 : i1
%6 = select %5, %cst, %arg4 : f32
%7 = cmpf "olt", %arg3, %6 : f32
%8 = select %7, %arg3, %6 : f32
linalg.yield %8 : f32
}: memref<5x1x1xf32>, memref<5xf32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c0 = constant 0 : index
%c32 = constant 32 : index
%c5 = constant 5 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%3 = subview %0[%arg0] [%2] [%c1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
linalg.fill(%3, %cst_0) {__internal_linalg_transform__ = "workitem"} : memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>, f32
scf.yield
}
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%3 = subview %1[%arg0, %c0, %c0] [%2, %c1, %c1] [%c1, %c1, %c1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
%4 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%5 = subview %0[%arg0] [%4] [%c1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} {__internal_linalg_transform__ = "workitem"} %3, %5 {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: f32, %arg5: f32): // no predecessors
%6 = cmpi "eq", %arg2, %c0 : index
%7 = and %6, %true : i1
%8 = cmpi "eq", %arg3, %c0 : index
%9 = and %7, %8 : i1
%10 = select %9, %cst, %arg5 : f32
%11 = cmpf "olt", %arg4, %10 : f32
%12 = select %11, %arg4, %10 : f32
linalg.yield %12 : f32
}: memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>, memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
scf.yield
}
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::SplitDispatchFunctionPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c0 = constant 0 : index
%c32 = constant 32 : index
%c5 = constant 5 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%3 = subview %0[%arg0] [%2] [%c1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
linalg.fill(%3, %cst_0) {__internal_linalg_transform__ = "workitem"} : memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>, f32
scf.yield
}
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%3 = subview %1[%arg0, %c0, %c0] [%2, %c1, %c1] [%c1, %c1, %c1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
%4 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%5 = subview %0[%arg0] [%4] [%c1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} {__internal_linalg_transform__ = "workitem"} %3, %5 {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: f32, %arg5: f32): // no predecessors
%6 = cmpi "eq", %arg2, %c0 : index
%7 = and %6, %true : i1
%8 = cmpi "eq", %arg3, %c0 : index
%9 = and %7, %8 : i1
%10 = select %9, %cst, %arg5 : f32
%11 = cmpf "olt", %arg4, %10 : f32
%12 = select %11, %arg4, %10 : f32
linalg.yield %12 : f32
}: memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>, memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::LinalgTileAndFusePass ***
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c0 = constant 0 : index
%c32 = constant 32 : index
%c5 = constant 5 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%3 = subview %0[%arg0] [%2] [%c1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
linalg.fill(%3, %cst_0) {__internal_linalg_transform__ = "workitem"} : memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>, f32
scf.yield
}
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%3 = subview %1[%arg0, %c0, %c0] [%2, %c1, %c1] [%c1, %c1, %c1] : memref<5x1x1xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>
%4 = affine.min affine_map<(d0, d1, d2) -> (32, d1 - d2)>(%c32, %c5, %arg0)
%5 = subview %0[%arg0] [%4] [%c1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} {__internal_linalg_transform__ = "workitem"} %3, %5 {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: f32, %arg5: f32): // no predecessors
%6 = cmpi "eq", %arg2, %c0 : index
%7 = and %6, %true : i1
%8 = cmpi "eq", %arg3, %c0 : index
%9 = and %7, %8 : i1
%10 = select %9, %cst, %arg5 : f32
%11 = cmpf "olt", %arg4, %10 : f32
%12 = select %11, %arg4, %10 : f32
linalg.yield %12 : f32
}: memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>, memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
scf.yield
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c0 = constant 0 : index
%c32 = constant 32 : index
%c5 = constant 5 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0) -> (32, -d0 + 5)>(%arg0)
%3 = subview %0[%arg0] [%2] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
linalg.fill(%3, %cst_0) {__internal_linalg_transform__ = "workitem"} : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>, f32
scf.yield
}
scf.parallel (%arg0) = (%c0) to (%c5) step (%c32) {
%2 = affine.min affine_map<(d0) -> (32, -d0 + 5)>(%arg0)
%3 = subview %1[%arg0, 0, 0] [%2, 1, 1] [1, 1, 1] : memref<5x1x1xf32> to memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%4 = affine.min affine_map<(d0) -> (32, -d0 + 5)>(%arg0)
%5 = subview %0[%arg0] [%4] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} {__internal_linalg_transform__ = "workitem"} %3, %5 {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: f32, %arg5: f32): // no predecessors
%6 = cmpi "eq", %arg2, %c0 : index
%7 = and %6, %true : i1
%8 = cmpi "eq", %arg3, %c0 : index
%9 = and %7, %8 : i1
%10 = select %9, %cst, %arg5 : f32
%11 = cmpf "olt", %arg4, %10 : f32
%12 = select %11, %arg4, %10 : f32
linalg.yield %12 : f32
}: memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>, memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
scf.yield
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToGPUPass ***
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c0 = constant 0 : index
%c32 = constant 32 : index
%c5 = constant 5 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %c32, %2 : index
%5 = addi %c0, %4 : index
%6 = muli %c32, %3 : index
scf.for %arg0 = %5 to %c5 step %6 {
%12 = affine.min affine_map<(d0) -> (32, -d0 + 5)>(%arg0)
%13 = subview %0[%arg0] [%12] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%14 = dim %13, 0 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%15 = affine.apply affine_map<(d0) -> (d0)>(%14)
%c0_1 = constant 0 : index
%c1 = constant 1 : index
%16 = "gpu.thread_id"() {dimension = "x"} : () -> index
%17 = "gpu.block_dim"() {dimension = "x"} : () -> index
%18 = muli %c1, %16 : index
%19 = addi %c0_1, %18 : index
%20 = muli %c1, %17 : index
scf.for %arg1 = %19 to %15 step %20 {
store %cst_0, %13[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
%7 = "gpu.block_id"() {dimension = "x"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%9 = muli %c32, %7 : index
%10 = addi %c0, %9 : index
%11 = muli %c32, %8 : index
scf.for %arg0 = %10 to %c5 step %11 {
%12 = affine.min affine_map<(d0) -> (32, -d0 + 5)>(%arg0)
%13 = subview %1[%arg0, 0, 0] [%12, 1, 1] [1, 1, 1] : memref<5x1x1xf32> to memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%14 = affine.min affine_map<(d0) -> (32, -d0 + 5)>(%arg0)
%15 = subview %0[%arg0] [%14] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%16 = dim %13, 0 : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%17 = dim %13, 1 : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%18 = dim %13, 2 : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%19 = dim %15, 0 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%20 = affine.apply affine_map<(d0) -> (d0)>(%16)
%21 = affine.apply affine_map<()[s0] -> (s0)>()[%17]
%22 = affine.apply affine_map<()[s0] -> (s0)>()[%18]
%c0_1 = constant 0 : index
%c1 = constant 1 : index
%c0_2 = constant 0 : index
%c1_3 = constant 1 : index
%c0_4 = constant 0 : index
%c1_5 = constant 1 : index
%23 = "gpu.thread_id"() {dimension = "x"} : () -> index
%24 = "gpu.block_dim"() {dimension = "x"} : () -> index
%25 = muli %c1, %23 : index
%26 = addi %c0_1, %25 : index
%27 = muli %c1, %24 : index
scf.for %arg1 = %26 to %20 step %27 {
scf.for %arg2 = %c0_2 to %21 step %c1_3 {
scf.for %arg3 = %c0_4 to %22 step %c1_5 {
%28 = affine.apply affine_map<(d0) -> (d0)>(%arg1)
%29 = affine.apply affine_map<(d0) -> (d0)>(%arg2)
%30 = affine.apply affine_map<(d0) -> (d0)>(%arg3)
%31 = load %13[%28, %29, %30] : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%32 = affine.apply affine_map<(d0) -> (d0)>(%arg1)
%33 = load %15[%32] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%34 = affine.apply affine_map<(d0) -> (d0)>(%arg1)
%35 = cmpi "eq", %arg2, %c0 : index
%36 = and %35, %true : i1
%37 = cmpi "eq", %arg3, %c0 : index
%38 = and %36, %37 : i1
%39 = select %38, %cst, %33 : f32
%40 = cmpf "olt", %31, %39 : f32
%41 = select %40, %31, %39 : f32
store %41, %15[%34] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
}
}
return
}
// *** IR Dump After ConvertAffineToStandard ***
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c0 = constant 0 : index
%c32 = constant 32 : index
%c5 = constant 5 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %c32, %2 : index
%5 = addi %c0, %4 : index
%6 = muli %c32, %3 : index
scf.for %arg0 = %5 to %c5 step %6 {
%c32_1 = constant 32 : index
%c-1 = constant -1 : index
%12 = muli %arg0, %c-1 : index
%c5_2 = constant 5 : index
%13 = addi %12, %c5_2 : index
%14 = cmpi "slt", %c32_1, %13 : index
%15 = select %14, %c32_1, %13 : index
%16 = subview %0[%arg0] [%15] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%17 = dim %16, 0 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%c0_3 = constant 0 : index
%c1 = constant 1 : index
%18 = "gpu.thread_id"() {dimension = "x"} : () -> index
%19 = "gpu.block_dim"() {dimension = "x"} : () -> index
%20 = muli %c1, %18 : index
%21 = addi %c0_3, %20 : index
%22 = muli %c1, %19 : index
scf.for %arg1 = %21 to %17 step %22 {
store %cst_0, %16[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
%7 = "gpu.block_id"() {dimension = "x"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%9 = muli %c32, %7 : index
%10 = addi %c0, %9 : index
%11 = muli %c32, %8 : index
scf.for %arg0 = %10 to %c5 step %11 {
%c32_1 = constant 32 : index
%c-1 = constant -1 : index
%12 = muli %arg0, %c-1 : index
%c5_2 = constant 5 : index
%13 = addi %12, %c5_2 : index
%14 = cmpi "slt", %c32_1, %13 : index
%15 = select %14, %c32_1, %13 : index
%16 = subview %1[%arg0, 0, 0] [%15, 1, 1] [1, 1, 1] : memref<5x1x1xf32> to memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%c32_3 = constant 32 : index
%c-1_4 = constant -1 : index
%17 = muli %arg0, %c-1_4 : index
%c5_5 = constant 5 : index
%18 = addi %17, %c5_5 : index
%19 = cmpi "slt", %c32_3, %18 : index
%20 = select %19, %c32_3, %18 : index
%21 = subview %0[%arg0] [%20] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%22 = dim %16, 0 : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%23 = dim %16, 1 : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%24 = dim %16, 2 : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%25 = dim %21, 0 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%c0_6 = constant 0 : index
%c1 = constant 1 : index
%c0_7 = constant 0 : index
%c1_8 = constant 1 : index
%c0_9 = constant 0 : index
%c1_10 = constant 1 : index
%26 = "gpu.thread_id"() {dimension = "x"} : () -> index
%27 = "gpu.block_dim"() {dimension = "x"} : () -> index
%28 = muli %c1, %26 : index
%29 = addi %c0_6, %28 : index
%30 = muli %c1, %27 : index
scf.for %arg1 = %29 to %22 step %30 {
scf.for %arg2 = %c0_7 to %23 step %c1_8 {
scf.for %arg3 = %c0_9 to %24 step %c1_10 {
%31 = load %16[%arg1, %arg2, %arg3] : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%32 = load %21[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%33 = cmpi "eq", %arg2, %c0 : index
%34 = and %33, %true : i1
%35 = cmpi "eq", %arg3, %c0 : index
%36 = and %34, %35 : i1
%37 = select %36, %cst, %32 : f32
%38 = cmpf "olt", %31, %37 : f32
%39 = select %38, %31, %37 : f32
store %39, %21[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
}
}
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c5 = constant 5 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %2, %c32 : index
%5 = muli %3, %c32 : index
scf.for %arg0 = %4 to %c5 step %5 {
%10 = muli %arg0, %c-1 : index
%11 = addi %10, %c5 : index
%12 = cmpi "slt", %c32, %11 : index
%13 = select %12, %c32, %11 : index
%14 = subview %0[%arg0] [%13] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%15 = "gpu.thread_id"() {dimension = "x"} : () -> index
%16 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %15 to %13 step %16 {
store %cst_0, %14[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
%6 = "gpu.block_id"() {dimension = "x"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%8 = muli %6, %c32 : index
%9 = muli %7, %c32 : index
scf.for %arg0 = %8 to %c5 step %9 {
%10 = muli %arg0, %c-1 : index
%11 = addi %10, %c5 : index
%12 = cmpi "slt", %c32, %11 : index
%13 = select %12, %c32, %11 : index
%14 = subview %1[%arg0, 0, 0] [%13, 1, 1] [1, 1, 1] : memref<5x1x1xf32> to memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%15 = muli %arg0, %c-1 : index
%16 = addi %15, %c5 : index
%17 = cmpi "slt", %c32, %16 : index
%18 = select %17, %c32, %16 : index
%19 = subview %0[%arg0] [%18] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%20 = "gpu.thread_id"() {dimension = "x"} : () -> index
%21 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %20 to %13 step %21 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
%22 = load %14[%arg1, %arg2, %arg3] : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%23 = load %19[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%24 = cmpi "eq", %arg2, %c0 : index
%25 = and %24, %true : i1
%26 = cmpi "eq", %arg3, %c0 : index
%27 = and %25, %26 : i1
%28 = select %27, %cst, %23 : f32
%29 = cmpf "olt", %22, %28 : f32
%30 = select %29, %22, %28 : f32
store %30, %19[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c5 = constant 5 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %2, %c32 : index
%5 = muli %3, %c32 : index
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = subview %0[%arg0] [%9] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%11 = "gpu.thread_id"() {dimension = "x"} : () -> index
%12 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %11 to %9 step %12 {
store %cst_0, %10[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = subview %1[%arg0, 0, 0] [%9, 1, 1] [1, 1, 1] : memref<5x1x1xf32> to memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%11 = subview %0[%arg0] [%9] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%12 = "gpu.thread_id"() {dimension = "x"} : () -> index
%13 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %12 to %9 step %13 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
%14 = load %10[%arg1, %arg2, %arg3] : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%15 = load %11[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%16 = cmpi "eq", %arg2, %c0 : index
%17 = and %16, %true : i1
%18 = cmpi "eq", %arg3, %c0 : index
%19 = and %17, %18 : i1
%20 = select %19, %cst, %15 : f32
%21 = cmpf "olt", %14, %20 : f32
%22 = select %21, %14, %20 : f32
store %22, %11[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ResolveShapeOpsPass ***
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c5 = constant 5 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %2, %c32 : index
%5 = muli %3, %c32 : index
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = subview %0[%arg0] [%9] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%11 = "gpu.thread_id"() {dimension = "x"} : () -> index
%12 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %11 to %9 step %12 {
store %cst_0, %10[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = subview %1[%arg0, 0, 0] [%9, 1, 1] [1, 1, 1] : memref<5x1x1xf32> to memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%11 = subview %0[%arg0] [%9] [1] : memref<5xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%12 = "gpu.thread_id"() {dimension = "x"} : () -> index
%13 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %12 to %9 step %13 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
%14 = load %10[%arg1, %arg2, %arg3] : memref<?x1x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>
%15 = load %11[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
%16 = cmpi "eq", %arg2, %c0 : index
%17 = and %16, %true : i1
%18 = cmpi "eq", %arg3, %c0 : index
%19 = and %17, %18 : i1
%20 = select %19, %cst, %15 : f32
%21 = cmpf "olt", %14, %20 : f32
%22 = select %21, %14, %20 : f32
store %22, %11[%arg1] : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
}
}
}
}
return
}
// *** IR Dump After LegalizeStandardForSPIRV ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c5 = constant 5 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %2, %c32 : index
%5 = muli %3, %c32 : index
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = "gpu.thread_id"() {dimension = "x"} : () -> index
%11 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %10 to %9 step %11 {
%12 = addi %arg0, %arg1 : index
store %cst_0, %0[%12] : memref<5xf32>
}
}
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = "gpu.thread_id"() {dimension = "x"} : () -> index
%11 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %10 to %9 step %11 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
%12 = addi %arg0, %arg1 : index
%13 = load %1[%12, %arg2, %arg3] : memref<5x1x1xf32>
%14 = addi %arg0, %arg1 : index
%15 = load %0[%14] : memref<5xf32>
%16 = cmpi "eq", %arg2, %c0 : index
%17 = and %16, %true : i1
%18 = cmpi "eq", %arg3, %c0 : index
%19 = and %17, %18 : i1
%20 = select %19, %cst, %15 : f32
%21 = cmpf "olt", %13, %20 : f32
%22 = select %21, %13, %20 : f32
%23 = addi %arg0, %arg1 : index
store %22, %0[%23] : memref<5xf32>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c5 = constant 5 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %2, %c32 : index
%5 = muli %3, %c32 : index
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = "gpu.thread_id"() {dimension = "x"} : () -> index
%11 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %10 to %9 step %11 {
%12 = addi %arg0, %arg1 : index
store %cst_0, %0[%12] : memref<5xf32>
}
}
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = "gpu.thread_id"() {dimension = "x"} : () -> index
%11 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %10 to %9 step %11 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
%12 = addi %arg0, %arg1 : index
%13 = load %1[%12, %arg2, %arg3] : memref<5x1x1xf32>
%14 = addi %arg0, %arg1 : index
%15 = load %0[%14] : memref<5xf32>
%16 = cmpi "eq", %arg2, %c0 : index
%17 = and %16, %true : i1
%18 = cmpi "eq", %arg3, %c0 : index
%19 = and %17, %18 : i1
%20 = select %19, %cst, %15 : f32
%21 = cmpf "olt", %13, %20 : f32
%22 = select %21, %13, %20 : f32
%23 = addi %arg0, %arg1 : index
store %22, %0[%23] : memref<5xf32>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_5() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%cst = constant 0x7F800000 : f32
%cst_0 = constant 0.000000e+00 : f32
%true = constant true
%c32 = constant 32 : index
%c-1 = constant -1 : index
%c5 = constant 5 : index
%c0 = constant 0 : index
%c1 = constant 1 : index
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x1x1xf32>
%2 = "gpu.block_id"() {dimension = "x"} : () -> index
%3 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%4 = muli %2, %c32 : index
%5 = muli %3, %c32 : index
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = "gpu.thread_id"() {dimension = "x"} : () -> index
%11 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %10 to %9 step %11 {
%12 = addi %arg0, %arg1 : index
store %cst_0, %0[%12] : memref<5xf32>
}
}
scf.for %arg0 = %4 to %c5 step %5 {
%6 = muli %arg0, %c-1 : index
%7 = addi %6, %c5 : index
%8 = cmpi "slt", %c32, %7 : index
%9 = select %8, %c32, %7 : index
%10 = "gpu.thread_id"() {dimension = "x"} : () -> index
%11 = "gpu.block_dim"() {dimension = "x"} : () -> index
scf.for %arg1 = %10 to %9 step %11 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
%12 = addi %arg0, %arg1 : index
%13 = load %1[%12, %arg2, %arg3] : memref<5x1x1xf32>
%14 = load %0[%12] : memref<5xf32>
%15 = cmpi "eq", %arg2, %c0 : index
%16 = and %15, %true : i1
%17 = cmpi "eq", %arg3, %c0 : index
%18 = and %16, %17 : i1
%19 = select %18, %cst, %14 : f32
%20 = cmpf "olt", %13, %19 : f32
%21 = select %20, %13, %19 : f32
store %21, %0[%12] : memref<5xf32>
}
}
}
}
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertToSPIRVPass ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_5() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
%0 = spv.constant 0x7F800000 : f32
%1 = spv.constant 0.000000e+00 : f32
%2 = spv.constant true
%3 = spv.constant 32 : i32
%4 = spv.constant -1 : i32
%5 = spv.constant 5 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 1 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.IMul %12, %3 : i32
%17 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%16 : i32)
^bb1(%18: i32): // 2 preds: ^bb0, ^bb2
%19 = spv.SLessThan %18, %5 : i32
spv.BranchConditional %19, ^bb2, ^bb3
^bb2: // pred: ^bb1
%20 = spv.IMul %18, %4 : i32
%21 = spv.IAdd %20, %5 : i32
%22 = spv.SLessThan %3, %21 : i32
%23 = spv.Select %22, %3, %21 : i1, i32
%24 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%25 = spv.Load "Input" %24 : vector<3xi32>
%26 = spv.CompositeExtract %25[0 : i32] : vector<3xi32>
%27 = spv.constant 32 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%29: i32): // 2 preds: ^bb0, ^bb2
%30 = spv.SLessThan %29, %23 : i32
spv.BranchConditional %30, ^bb2, ^bb3
^bb2: // pred: ^bb1
%31 = spv.IAdd %18, %29 : i32
%32 = spv.constant 0 : i32
%33 = spv.constant 0 : i32
%34 = spv.constant 1 : i32
%35 = spv.IMul %34, %31 : i32
%36 = spv.IAdd %33, %35 : i32
%37 = spv.AccessChain %8[%32, %36] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %37, %1 : f32
%38 = spv.IAdd %29, %27 : i32
spv.Branch ^bb1(%38 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%28 = spv.IAdd %18, %17 : i32
spv.Branch ^bb1(%28 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.loop {
spv.Branch ^bb1(%16 : i32)
^bb1(%18: i32): // 2 preds: ^bb0, ^bb2
%19 = spv.SLessThan %18, %5 : i32
spv.BranchConditional %19, ^bb2, ^bb3
^bb2: // pred: ^bb1
%20 = spv.IMul %18, %4 : i32
%21 = spv.IAdd %20, %5 : i32
%22 = spv.SLessThan %3, %21 : i32
%23 = spv.Select %22, %3, %21 : i1, i32
%24 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%25 = spv.Load "Input" %24 : vector<3xi32>
%26 = spv.CompositeExtract %25[0 : i32] : vector<3xi32>
%27 = spv.constant 32 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%29: i32): // 2 preds: ^bb0, ^bb2
%30 = spv.SLessThan %29, %23 : i32
spv.BranchConditional %30, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spv.SLessThan %32, %7 : i32
spv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%35: i32): // 2 preds: ^bb0, ^bb2
%36 = spv.SLessThan %35, %7 : i32
spv.BranchConditional %36, ^bb2, ^bb3
^bb2: // pred: ^bb1
%37 = spv.IAdd %18, %29 : i32
%38 = spv.constant 0 : i32
%39 = spv.constant 0 : i32
%40 = spv.constant 1 : i32
%41 = spv.IMul %40, %37 : i32
%42 = spv.IAdd %39, %41 : i32
%43 = spv.constant 1 : i32
%44 = spv.IMul %43, %32 : i32
%45 = spv.IAdd %42, %44 : i32
%46 = spv.constant 1 : i32
%47 = spv.IMul %46, %35 : i32
%48 = spv.IAdd %45, %47 : i32
%49 = spv.AccessChain %9[%38, %48] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%50 = spv.Load "StorageBuffer" %49 : f32
%51 = spv.constant 0 : i32
%52 = spv.constant 0 : i32
%53 = spv.constant 1 : i32
%54 = spv.IMul %53, %37 : i32
%55 = spv.IAdd %52, %54 : i32
%56 = spv.AccessChain %8[%51, %55] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%57 = spv.Load "StorageBuffer" %56 : f32
%58 = spv.IEqual %32, %6 : i32
%59 = spv.LogicalAnd %58, %2 : i1
%60 = spv.IEqual %35, %6 : i32
%61 = spv.LogicalAnd %59, %60 : i1
%62 = spv.Select %61, %0, %57 : i1, f32
%63 = spv.FOrdLessThan %50, %62 : f32
%64 = spv.Select %63, %50, %62 : i1, f32
%65 = spv.constant 0 : i32
%66 = spv.constant 0 : i32
%67 = spv.constant 1 : i32
%68 = spv.IMul %67, %37 : i32
%69 = spv.IAdd %66, %68 : i32
%70 = spv.AccessChain %8[%65, %69] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %70, %64 : f32
%71 = spv.IAdd %35, %7 : i32
spv.Branch ^bb1(%71 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%34 = spv.IAdd %32, %7 : i32
spv.Branch ^bb1(%34 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%31 = spv.IAdd %29, %27 : i32
spv.Branch ^bb1(%31 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%28 = spv.IAdd %18, %17 : i32
spv.Branch ^bb1(%28 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After SPIRVLowerABIAttributes ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_5() "None" {
%0 = spv.constant 0x7F800000 : f32
%1 = spv.constant 0.000000e+00 : f32
%2 = spv.constant true
%3 = spv.constant 32 : i32
%4 = spv.constant -1 : i32
%5 = spv.constant 5 : i32
%6 = spv.constant 0 : i32
%7 = spv.constant 1 : i32
%8 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%10 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%11 = spv.Load "Input" %10 : vector<3xi32>
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32>
%13 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%14 = spv.Load "Input" %13 : vector<3xi32>
%15 = spv.CompositeExtract %14[0 : i32] : vector<3xi32>
%16 = spv.IMul %12, %3 : i32
%17 = spv.IMul %15, %3 : i32
spv.loop {
spv.Branch ^bb1(%16 : i32)
^bb1(%18: i32): // 2 preds: ^bb0, ^bb2
%19 = spv.SLessThan %18, %5 : i32
spv.BranchConditional %19, ^bb2, ^bb3
^bb2: // pred: ^bb1
%20 = spv.IMul %18, %4 : i32
%21 = spv.IAdd %20, %5 : i32
%22 = spv.SLessThan %3, %21 : i32
%23 = spv.Select %22, %3, %21 : i1, i32
%24 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%25 = spv.Load "Input" %24 : vector<3xi32>
%26 = spv.CompositeExtract %25[0 : i32] : vector<3xi32>
%27 = spv.constant 32 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%29: i32): // 2 preds: ^bb0, ^bb2
%30 = spv.SLessThan %29, %23 : i32
spv.BranchConditional %30, ^bb2, ^bb3
^bb2: // pred: ^bb1
%31 = spv.IAdd %18, %29 : i32
%32 = spv.constant 0 : i32
%33 = spv.constant 0 : i32
%34 = spv.constant 1 : i32
%35 = spv.IMul %34, %31 : i32
%36 = spv.IAdd %33, %35 : i32
%37 = spv.AccessChain %8[%32, %36] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %37, %1 : f32
%38 = spv.IAdd %29, %27 : i32
spv.Branch ^bb1(%38 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%28 = spv.IAdd %18, %17 : i32
spv.Branch ^bb1(%28 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.loop {
spv.Branch ^bb1(%16 : i32)
^bb1(%18: i32): // 2 preds: ^bb0, ^bb2
%19 = spv.SLessThan %18, %5 : i32
spv.BranchConditional %19, ^bb2, ^bb3
^bb2: // pred: ^bb1
%20 = spv.IMul %18, %4 : i32
%21 = spv.IAdd %20, %5 : i32
%22 = spv.SLessThan %3, %21 : i32
%23 = spv.Select %22, %3, %21 : i1, i32
%24 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%25 = spv.Load "Input" %24 : vector<3xi32>
%26 = spv.CompositeExtract %25[0 : i32] : vector<3xi32>
%27 = spv.constant 32 : i32
spv.loop {
spv.Branch ^bb1(%26 : i32)
^bb1(%29: i32): // 2 preds: ^bb0, ^bb2
%30 = spv.SLessThan %29, %23 : i32
spv.BranchConditional %30, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%32: i32): // 2 preds: ^bb0, ^bb2
%33 = spv.SLessThan %32, %7 : i32
spv.BranchConditional %33, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%35: i32): // 2 preds: ^bb0, ^bb2
%36 = spv.SLessThan %35, %7 : i32
spv.BranchConditional %36, ^bb2, ^bb3
^bb2: // pred: ^bb1
%37 = spv.IAdd %18, %29 : i32
%38 = spv.constant 0 : i32
%39 = spv.constant 0 : i32
%40 = spv.constant 1 : i32
%41 = spv.IMul %40, %37 : i32
%42 = spv.IAdd %39, %41 : i32
%43 = spv.constant 1 : i32
%44 = spv.IMul %43, %32 : i32
%45 = spv.IAdd %42, %44 : i32
%46 = spv.constant 1 : i32
%47 = spv.IMul %46, %35 : i32
%48 = spv.IAdd %45, %47 : i32
%49 = spv.AccessChain %9[%38, %48] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%50 = spv.Load "StorageBuffer" %49 : f32
%51 = spv.constant 0 : i32
%52 = spv.constant 0 : i32
%53 = spv.constant 1 : i32
%54 = spv.IMul %53, %37 : i32
%55 = spv.IAdd %52, %54 : i32
%56 = spv.AccessChain %8[%51, %55] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%57 = spv.Load "StorageBuffer" %56 : f32
%58 = spv.IEqual %32, %6 : i32
%59 = spv.LogicalAnd %58, %2 : i1
%60 = spv.IEqual %35, %6 : i32
%61 = spv.LogicalAnd %59, %60 : i1
%62 = spv.Select %61, %0, %57 : i1, f32
%63 = spv.FOrdLessThan %50, %62 : f32
%64 = spv.Select %63, %50, %62 : i1, f32
%65 = spv.constant 0 : i32
%66 = spv.constant 0 : i32
%67 = spv.constant 1 : i32
%68 = spv.IMul %67, %37 : i32
%69 = spv.IAdd %66, %68 : i32
%70 = spv.AccessChain %8[%65, %69] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %70, %64 : f32
%71 = spv.IAdd %35, %7 : i32
spv.Branch ^bb1(%71 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%34 = spv.IAdd %32, %7 : i32
spv.Branch ^bb1(%34 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%31 = spv.IAdd %29, %27 : i32
spv.Branch ^bb1(%31 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%28 = spv.IAdd %18, %17 : i32
spv.Branch ^bb1(%28 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_5, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_5 "LocalSize", 32, 1, 1
}
// *** IR Dump After Canonicalizer ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_5() "None" {
%0 = spv.constant 0x7F800000 : f32
%1 = spv.constant 0.000000e+00 : f32
%2 = spv.constant -1 : i32
%3 = spv.constant 5 : i32
%4 = spv.constant 1 : i32
%5 = spv.constant 32 : i32
%6 = spv.constant 0 : i32
%7 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%10 = spv.Load "Input" %9 : vector<3xi32>
%11 = spv.CompositeExtract %10[0 : i32] : vector<3xi32>
%12 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%13 = spv.Load "Input" %12 : vector<3xi32>
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32>
%15 = spv.IMul %11, %5 : i32
%16 = spv.IMul %14, %5 : i32
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
%29 = spv.IAdd %17, %27 : i32
%30 = spv.AccessChain %7[%6, %29] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %30, %1 : f32
%31 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%31 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %4 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %4 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
%35 = spv.IAdd %17, %27 : i32
%36 = spv.IAdd %35, %30 : i32
%37 = spv.IAdd %36, %33 : i32
%38 = spv.AccessChain %8[%6, %37] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%39 = spv.Load "StorageBuffer" %38 : f32
%40 = spv.AccessChain %7[%6, %35] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%41 = spv.Load "StorageBuffer" %40 : f32
%42 = spv.IEqual %30, %6 : i32
%43 = spv.IEqual %33, %6 : i32
%44 = spv.LogicalAnd %42, %43 : i1
%45 = spv.Select %44, %0, %41 : i1, f32
%46 = spv.FOrdLessThan %39, %45 : f32
%47 = spv.Select %46, %39, %45 : i1, f32
%48 = spv.AccessChain %7[%6, %35] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %48, %47 : f32
%49 = spv.IAdd %33, %4 : i32
spv.Branch ^bb1(%49 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %4 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%29 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%29 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_5, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_5 "LocalSize", 32, 1, 1
}
// *** IR Dump After CSE ***
spv.module Logical GLSL450 {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_5() "None" {
%0 = spv.constant 0x7F800000 : f32
%1 = spv.constant 0.000000e+00 : f32
%2 = spv.constant -1 : i32
%3 = spv.constant 5 : i32
%4 = spv.constant 1 : i32
%5 = spv.constant 32 : i32
%6 = spv.constant 0 : i32
%7 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%10 = spv.Load "Input" %9 : vector<3xi32>
%11 = spv.CompositeExtract %10[0 : i32] : vector<3xi32>
%12 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%13 = spv.Load "Input" %12 : vector<3xi32>
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32>
%15 = spv.IMul %11, %5 : i32
%16 = spv.IMul %14, %5 : i32
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
%29 = spv.IAdd %17, %27 : i32
%30 = spv.AccessChain %7[%6, %29] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %30, %1 : f32
%31 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%31 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %4 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %4 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
%35 = spv.IAdd %17, %27 : i32
%36 = spv.IAdd %35, %30 : i32
%37 = spv.IAdd %36, %33 : i32
%38 = spv.AccessChain %8[%6, %37] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%39 = spv.Load "StorageBuffer" %38 : f32
%40 = spv.AccessChain %7[%6, %35] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%41 = spv.Load "StorageBuffer" %40 : f32
%42 = spv.IEqual %30, %6 : i32
%43 = spv.IEqual %33, %6 : i32
%44 = spv.LogicalAnd %42, %43 : i1
%45 = spv.Select %44, %0, %41 : i1, f32
%46 = spv.FOrdLessThan %39, %45 : f32
%47 = spv.Select %46, %39, %45 : i1, f32
spv.Store "StorageBuffer" %40, %47 : f32
%48 = spv.IAdd %33, %4 : i32
spv.Branch ^bb1(%48 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %4 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%29 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%29 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_5, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_5 "LocalSize", 32, 1, 1
}
// *** IR Dump After SPIRVUpdateVCE ***
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_5() "None" {
%0 = spv.constant 0x7F800000 : f32
%1 = spv.constant 0.000000e+00 : f32
%2 = spv.constant -1 : i32
%3 = spv.constant 5 : i32
%4 = spv.constant 1 : i32
%5 = spv.constant 32 : i32
%6 = spv.constant 0 : i32
%7 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%10 = spv.Load "Input" %9 : vector<3xi32>
%11 = spv.CompositeExtract %10[0 : i32] : vector<3xi32>
%12 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%13 = spv.Load "Input" %12 : vector<3xi32>
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32>
%15 = spv.IMul %11, %5 : i32
%16 = spv.IMul %14, %5 : i32
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
%29 = spv.IAdd %17, %27 : i32
%30 = spv.AccessChain %7[%6, %29] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %30, %1 : f32
%31 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%31 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %4 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %4 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
%35 = spv.IAdd %17, %27 : i32
%36 = spv.IAdd %35, %30 : i32
%37 = spv.IAdd %36, %33 : i32
%38 = spv.AccessChain %8[%6, %37] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%39 = spv.Load "StorageBuffer" %38 : f32
%40 = spv.AccessChain %7[%6, %35] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%41 = spv.Load "StorageBuffer" %40 : f32
%42 = spv.IEqual %30, %6 : i32
%43 = spv.IEqual %33, %6 : i32
%44 = spv.LogicalAnd %42, %43 : i1
%45 = spv.Select %44, %0, %41 : i1, f32
%46 = spv.FOrdLessThan %39, %45 : f32
%47 = spv.Select %46, %39, %45 : i1, f32
spv.Store "StorageBuffer" %40, %47 : f32
%48 = spv.IAdd %33, %4 : i32
spv.Branch ^bb1(%48 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %4 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%29 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%29 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_5, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_5 "LocalSize", 32, 1, 1
}
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass ***
hal.executable @main_ex_dispatch_5 attributes {sym_visibility = "private"} {
hal.interface @legacy_io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
hal.executable.entry_point @main_ex_dispatch_5 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (tensor<5x1x1xf32>) -> tensor<5xf32>}
hal.executable.target "vulkan*" {
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_NumWorkgroups__ built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
spv.globalVariable @__resource_var_0_0__ bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.globalVariable @__resource_var_0_1__ bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.func @main_ex_dispatch_5() "None" {
%0 = spv.constant 0x7F800000 : f32
%1 = spv.constant 0.000000e+00 : f32
%2 = spv.constant -1 : i32
%3 = spv.constant 5 : i32
%4 = spv.constant 1 : i32
%5 = spv.constant 32 : i32
%6 = spv.constant 0 : i32
%7 = spv._address_of @__resource_var_0_1__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%8 = spv._address_of @__resource_var_0_0__ : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%9 = spv._address_of @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input>
%10 = spv.Load "Input" %9 : vector<3xi32>
%11 = spv.CompositeExtract %10[0 : i32] : vector<3xi32>
%12 = spv._address_of @__builtin_var_NumWorkgroups__ : !spv.ptr<vector<3xi32>, Input>
%13 = spv.Load "Input" %12 : vector<3xi32>
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32>
%15 = spv.IMul %11, %5 : i32
%16 = spv.IMul %14, %5 : i32
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
%29 = spv.IAdd %17, %27 : i32
%30 = spv.AccessChain %7[%6, %29] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
spv.Store "StorageBuffer" %30, %1 : f32
%31 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%31 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.loop {
spv.Branch ^bb1(%15 : i32)
^bb1(%17: i32): // 2 preds: ^bb0, ^bb2
%18 = spv.SLessThan %17, %3 : i32
spv.BranchConditional %18, ^bb2, ^bb3
^bb2: // pred: ^bb1
%19 = spv.IMul %17, %2 : i32
%20 = spv.IAdd %19, %3 : i32
%21 = spv.SLessThan %5, %20 : i32
%22 = spv.Select %21, %5, %20 : i1, i32
%23 = spv._address_of @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input>
%24 = spv.Load "Input" %23 : vector<3xi32>
%25 = spv.CompositeExtract %24[0 : i32] : vector<3xi32>
spv.loop {
spv.Branch ^bb1(%25 : i32)
^bb1(%27: i32): // 2 preds: ^bb0, ^bb2
%28 = spv.SLessThan %27, %22 : i32
spv.BranchConditional %28, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%30: i32): // 2 preds: ^bb0, ^bb2
%31 = spv.SLessThan %30, %4 : i32
spv.BranchConditional %31, ^bb2, ^bb3
^bb2: // pred: ^bb1
spv.loop {
spv.Branch ^bb1(%6 : i32)
^bb1(%33: i32): // 2 preds: ^bb0, ^bb2
%34 = spv.SLessThan %33, %4 : i32
spv.BranchConditional %34, ^bb2, ^bb3
^bb2: // pred: ^bb1
%35 = spv.IAdd %17, %27 : i32
%36 = spv.IAdd %35, %30 : i32
%37 = spv.IAdd %36, %33 : i32
%38 = spv.AccessChain %8[%6, %37] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%39 = spv.Load "StorageBuffer" %38 : f32
%40 = spv.AccessChain %7[%6, %35] : !spv.ptr<!spv.struct<!spv.array<5 x f32, stride=4> [0]>, StorageBuffer>
%41 = spv.Load "StorageBuffer" %40 : f32
%42 = spv.IEqual %30, %6 : i32
%43 = spv.IEqual %33, %6 : i32
%44 = spv.LogicalAnd %42, %43 : i1
%45 = spv.Select %44, %0, %41 : i1, f32
%46 = spv.FOrdLessThan %39, %45 : f32
%47 = spv.Select %46, %39, %45 : i1, f32
spv.Store "StorageBuffer" %40, %47 : f32
%48 = spv.IAdd %33, %4 : i32
spv.Branch ^bb1(%48 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%32 = spv.IAdd %30, %4 : i32
spv.Branch ^bb1(%32 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%29 = spv.IAdd %27, %5 : i32
spv.Branch ^bb1(%29 : i32)
^bb3: // pred: ^bb1
spv._merge
}
%26 = spv.IAdd %17, %16 : i32
spv.Branch ^bb1(%26 : i32)
^bb3: // pred: ^bb1
spv._merge
}
spv.Return
}
spv.EntryPoint "GLCompute" @main_ex_dispatch_5, @__builtin_var_WorkgroupId__, @__builtin_var_NumWorkgroups__, @__builtin_var_LocalInvocationId__
spv.ExecutionMode @main_ex_dispatch_5 "LocalSize", 32, 1, 1
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
}
}
}
}
// *** IR Dump After Inliner ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_6() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xi32>
%2 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%3 = "xla_hlo.compare"(%0, %2) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%4 = "xla_hlo.convert"(%3) : (tensor<5xi1>) -> tensor<5xi32>
%5 = xla_hlo.multiply %4, %1 : tensor<5xi32>
%6 = "xla_hlo.reverse"(%0) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xf32>) -> tensor<5xf32>
%7 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%8 = "xla_hlo.compare"(%6, %7) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%9 = "xla_hlo.convert"(%8) : (tensor<5xi1>) -> tensor<5xi32>
%10 = xla_hlo.multiply %9, %1 : tensor<5xi32>
hal.interface.store.tensor %5, @legacy_io::@ret0, offset = %c0 : tensor<5xi32>
hal.interface.store.tensor %10, @legacy_io::@ret1, offset = %c0 : tensor<5xi32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
hal.interface.binding @ret1, set=0, binding=3, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::TieDynamicShapesPass ***
func @main_ex_dispatch_6() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xi32>
%2 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%3 = "xla_hlo.compare"(%0, %2) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%4 = "xla_hlo.convert"(%3) : (tensor<5xi1>) -> tensor<5xi32>
%5 = xla_hlo.multiply %4, %1 : tensor<5xi32>
%6 = "xla_hlo.reverse"(%0) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xf32>) -> tensor<5xf32>
%7 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%8 = "xla_hlo.compare"(%6, %7) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%9 = "xla_hlo.convert"(%8) : (tensor<5xi1>) -> tensor<5xi32>
%10 = xla_hlo.multiply %9, %1 : tensor<5xi32>
hal.interface.store.tensor %5, @legacy_io::@ret0, offset = %c0 : tensor<5xi32>
hal.interface.store.tensor %10, @legacy_io::@ret1, offset = %c0 : tensor<5xi32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::MaterializeShapeCalculationsPass ***
func @main_ex_dispatch_6() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xi32>
%2 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%3 = "xla_hlo.compare"(%0, %2) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%4 = "xla_hlo.convert"(%3) : (tensor<5xi1>) -> tensor<5xi32>
%5 = xla_hlo.multiply %4, %1 : tensor<5xi32>
%6 = "xla_hlo.reverse"(%0) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xf32>) -> tensor<5xf32>
%7 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%8 = "xla_hlo.compare"(%6, %7) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%9 = "xla_hlo.convert"(%8) : (tensor<5xi1>) -> tensor<5xi32>
%10 = xla_hlo.multiply %9, %1 : tensor<5xi32>
hal.interface.store.tensor %5, @legacy_io::@ret0, offset = %c0 : tensor<5xi32>
hal.interface.store.tensor %10, @legacy_io::@ret1, offset = %c0 : tensor<5xi32>
return
}
// *** IR Dump After mlir::iree_compiler::Shape::`anonymous-namespace'::HoistShapeCalculations ***
func @main_ex_dispatch_6() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xi32>
%2 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%3 = "xla_hlo.compare"(%0, %2) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%4 = "xla_hlo.convert"(%3) : (tensor<5xi1>) -> tensor<5xi32>
%5 = xla_hlo.multiply %4, %1 : tensor<5xi32>
%6 = "xla_hlo.reverse"(%0) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xf32>) -> tensor<5xf32>
%7 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%8 = "xla_hlo.compare"(%6, %7) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%9 = "xla_hlo.convert"(%8) : (tensor<5xi1>) -> tensor<5xi32>
%10 = xla_hlo.multiply %9, %1 : tensor<5xi32>
hal.interface.store.tensor %5, @legacy_io::@ret0, offset = %c0 : tensor<5xi32>
hal.interface.store.tensor %10, @legacy_io::@ret1, offset = %c0 : tensor<5xi32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::DecomposeHLOClampPass ***
func @main_ex_dispatch_6() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xi32>
%2 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%3 = "xla_hlo.compare"(%0, %2) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%4 = "xla_hlo.convert"(%3) : (tensor<5xi1>) -> tensor<5xi32>
%5 = xla_hlo.multiply %4, %1 : tensor<5xi32>
%6 = "xla_hlo.reverse"(%0) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xf32>) -> tensor<5xf32>
%7 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
%8 = "xla_hlo.compare"(%6, %7) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
%9 = "xla_hlo.convert"(%8) : (tensor<5xi1>) -> tensor<5xi32>
%10 = xla_hlo.multiply %9, %1 : tensor<5xi32>
hal.interface.store.tensor %5, @legacy_io::@ret0, offset = %c0 : tensor<5xi32>
hal.interface.store.tensor %10, @legacy_io::@ret1, offset = %c0 : tensor<5xi32>
return
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnTensorsPass ***
func @main_ex_dispatch_6() {
%c0 = constant 0 : index
%cst = constant dense<0.000000e+00> : tensor<f32>
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xi32>
%2 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %cst {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<f32> -> tensor<5xf32>
%3 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %0, %2 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
%11 = cmpf "oeq", %arg0, %arg1 : f32
linalg.yield %11 : i1
}: tensor<5xf32>, tensor<5xf32> -> tensor<5xi1>
%4 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %3 {
^bb0(%arg0: i1): // no predecessors
%11 = zexti %arg0 : i1 to i32
linalg.yield %11 : i32
}: tensor<5xi1> -> tensor<5xi32>
%5 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %4, %1 {
^bb0(%arg0: i32, %arg1: i32): // no predecessors
%11 = muli %arg0, %arg1 : i32
linalg.yield %11 : i32
}: tensor<5xi32>, tensor<5xi32> -> tensor<5xi32>
%6 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (-d0 + 4)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %0 {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<5xf32> -> tensor<5xf32>
%7 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %cst {
^bb0(%arg0: f32): // no predecessors
linalg.yield %arg0 : f32
}: tensor<f32> -> tensor<5xf32>
%8 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %6, %7 {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
%11 = cmpf "oeq", %arg0, %arg1 : f32
linalg.yield %11 : i1
}: tensor<5xf32>, tensor<5xf32> -> tensor<5xi1>
%9 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %8 {
^bb0(%arg0: i1): // no predecessors
%11 = zexti %arg0 : i1 to i32
linalg.yield %11 : i32
}: tensor<5xi1> -> tensor<5xi32>
%10 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %9, %1 {
^bb0(%arg0: i32, %arg1: i32): // no predecessors
%11 = muli %arg0, %arg1 : i32
linalg.yield %11 : i32
}: tensor<5xi32>, tensor<5xi32> -> tensor<5xi32>
hal.interface.store.tensor %5, @legacy_io::@ret0, offset = %c0 : tensor<5xi32>
hal.interface.store.tensor %10, @legacy_io::@ret1, offset = %c0 : tensor<5xi32>
return
}
// *** IR Dump After LinalgFusionOfTensorOps ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_6() {
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xi32>
%2 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %0, %1 {
^bb0(%arg0: f32, %arg1: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: tensor<5xf32>, tensor<5xi32> -> tensor<5xi32>
%3 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (-d0 + 4)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %0, %1 {
^bb0(%arg0: f32, %arg1: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: tensor<5xf32>, tensor<5xi32> -> tensor<5xi32>
hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5xi32>
hal.interface.store.tensor %3, @legacy_io::@ret1, offset = %c0 : tensor<5xi32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
hal.interface.binding @ret1, set=0, binding=3, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After mlir::iree_compiler::`anonymous-namespace'::ConvertHLOToLinalgOnBuffersPass ***
func @main_ex_dispatch_6() {
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xi32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret1} : memref<5xi32>
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5xf32>
%3 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<5xi32>
linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %2, %3, %0 {
^bb0(%arg0: f32, %arg1: i32, %arg2: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: memref<5xf32>, memref<5xi32>, memref<5xi32>
linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (-d0 + 4)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %2, %3, %1 {
^bb0(%arg0: f32, %arg1: i32, %arg2: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: memref<5xf32>, memref<5xi32>, memref<5xi32>
return
}
// *** IR Dump After Canonicalizer ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_6() {
%cst = constant 0.000000e+00 : f32
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xi32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret1} : memref<5xi32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5xf32>
%3 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<5xi32>
linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %2, %3, %0 {
^bb0(%arg0: f32, %arg1: i32, %arg2: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: memref<5xf32>, memref<5xi32>, memref<5xi32>
linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (-d0 + 4)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %2, %3, %1 {
^bb0(%arg0: f32, %arg1: i32, %arg2: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: memref<5xf32>, memref<5xi32>, memref<5xi32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
hal.interface.binding @ret1, set=0, binding=3, type="StorageBuffer", access="Write|Discard"
}
}
// *** IR Dump After CSE ***
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @main_ex_dispatch_6() {
%cst = constant 0.000000e+00 : f32
%0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xi32>
%1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret1} : memref<5xi32>
%2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5xf32>
%3 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<5xi32>
linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %2, %3, %0 {
^bb0(%arg0: f32, %arg1: i32, %arg2: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: memref<5xf32>, memref<5xi32>, memref<5xi32>
linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (-d0 + 4)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %2, %3, %1 {
^bb0(%arg0: f32, %arg1: i32, %arg2: i32): // no predecessors
%4 = cmpf "oeq", %arg0, %cst : f32
%5 = zexti %4 : i1 to i32
%6 = muli %5, %arg1 : i32
linalg.yield %6 : i32
}: memref<5xf32>, memref<5xi32>, memref<5xi32>
return
}
hal.interface @legacy_io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
hal.interface.binding @ret1, set=0, binding=3, type="StorageBuffer", access="Write|Discard"
}
}
Assertion failed: expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 && "nonpositive multiplying coefficient", file D:\Dev\iree\third_party\llvm-project\mlir\lib\Dialect\Linalg\Transforms\Tiling.cpp, line 107
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment