Skip to content

Instantly share code, notes, and snippets.

@antiagainst
Created August 31, 2022 21:25
Show Gist options
  • Save antiagainst/45be45b8ee91bbd1d6cb69f60aab5756 to your computer and use it in GitHub Desktop.
Save antiagainst/45be45b8ee91bbd1d6cb69f60aab5756 to your computer and use it in GitHub Desktop.
func.func @conv_pad_dispatch_1_conv_2d_nhwc_hwcf_1x112x112x16x3x3x3() {
%cst = arith.constant dense<0.000000e+00> : vector<1x2x2x4xf32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
%c112 = arith.constant 112 : index
%c16 = arith.constant 16 : index
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:1x225x225x3xf32>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:3x3x3x16xf32>
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:1x112x112x16xf32>
%3 = hal.interface.binding.subspan set(0) binding(3) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:1x112x112x16xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_count_y = hal.interface.workgroup.count[1] : index
%workgroup_id_z = hal.interface.workgroup.id[2] : index
%workgroup_count_z = hal.interface.workgroup.count[2] : index
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_z]
%5 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_z]
scf.for %arg0 = %4 to %c112 step %5 {
%6 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y]
%7 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y]
scf.for %arg1 = %6 to %c112 step %7 {
%8 = affine.apply affine_map<()[s0] -> (s0 * 16)>()[%workgroup_id_x]
%9 = affine.apply affine_map<()[s0] -> (s0 * 16)>()[%workgroup_count_x]
scf.for %arg2 = %8 to %c16 step %9 {
%10 = flow.dispatch.tensor.load %3, offsets = [0, %arg0, %arg1, %arg2], sizes = [1, 4, 4, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:1x112x112x16xf32> -> tensor<1x4x4x16xf32>
%11 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg0)
%12 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg1)
%13 = flow.dispatch.tensor.load %0, offsets = [0, %11, %12, 0], sizes = [1, 9, 9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x9x9x3xf32>
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, %arg2], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x16xf32> -> tensor<3x3x3x16xf32>
%15 = flow.dispatch.tensor.load %2, offsets = [0, %arg0, %arg1, %arg2], sizes = [1, 4, 4, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x112x112x16xf32> -> tensor<1x4x4x16xf32>
%16 = scf.for %arg3 = %c0 to %c4 step %c2 iter_args(%arg4 = %10) -> (tensor<1x4x4x16xf32>) {
%17 = scf.for %arg5 = %c0 to %c4 step %c2 iter_args(%arg6 = %arg4) -> (tensor<1x4x4x16xf32>) {
%18 = scf.for %arg7 = %c0 to %c16 step %c4 iter_args(%arg8 = %arg6) -> (tensor<1x4x4x16xf32>) {
%19 = tensor.extract_slice %15[0, %arg3, %arg5, %arg7] [1, 2, 2, 4] [1, 1, 1, 1] : tensor<1x4x4x16xf32> to tensor<1x2x2x4xf32>
%20 = tensor.extract_slice %arg8[0, %arg3, %arg5, %arg7] [1, 2, 2, 4] [1, 1, 1, 1] : tensor<1x4x4x16xf32> to tensor<1x2x2x4xf32>
%21 = vector.transfer_write %cst, %20[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true]} : vector<1x2x2x4xf32>, tensor<1x2x2x4xf32>
%22 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%23 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg5)
%24 = tensor.extract_slice %13[0, %22, %23, 0] [1, 5, 5, 3] [1, 1, 1, 1] : tensor<1x9x9x3xf32> to tensor<1x5x5x3xf32>
%25 = tensor.extract_slice %14[0, 0, 0, %arg7] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x16xf32> to tensor<3x3x3x4xf32>
%26 = scf.for %arg9 = %c0 to %c3 step %c1 iter_args(%arg10 = %21) -> (tensor<1x2x2x4xf32>) {
%32 = scf.for %arg11 = %c0 to %c3 step %c1 iter_args(%arg12 = %arg10) -> (tensor<1x2x2x4xf32>) {
%33 = tensor.extract_slice %24[0, %arg9, %arg11, 0] [1, 3, 3, 3] [1, 1, 1, 1] : tensor<1x5x5x3xf32> to tensor<1x3x3x3xf32>
%34 = tensor.extract_slice %25[%arg9, %arg11, 0, 0] [1, 1, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x4xf32> to tensor<1x1x3x4xf32>
%35 = scf.for %arg13 = %c0 to %c2 step %c1 iter_args(%arg14 = %arg12) -> (tensor<1x2x2x4xf32>) {
%36 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg13)
%37 = tensor.extract_slice %33[0, %36, 0, 0] [1, 1, 3, 3] [1, 1, 1, 1] : tensor<1x3x3x3xf32> to tensor<1x1x3x3xf32>
%38 = tensor.extract_slice %arg14[0, %arg13, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x2x2x4xf32> to tensor<1x1x2x4xf32>
%39 = tensor.extract_slice %37[0, 0, 0, 0] [1, 1, 3, 3] [1, 1, 1, 1] : tensor<1x1x3x3xf32> to tensor<1x3x3xf32>
%40 = tensor.extract_slice %34[0, 0, 0, 0] [1, 1, 3, 4] [1, 1, 1, 1] : tensor<1x1x3x4xf32> to tensor<1x3x4xf32>
%41 = tensor.extract_slice %38[0, 0, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x1x2x4xf32> to tensor<1x2x4xf32>
%42 = vector.transfer_read %39[%c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x3x3xf32>, vector<1x3x3xf32>
%43 = vector.transfer_read %40[%c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x3x4xf32>, vector<1x3x4xf32>
%44 = vector.transfer_read %41[%c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x2x4xf32>, vector<1x2x4xf32>
%45 = vector.extract_strided_slice %42 {offsets = [0, 0, 0], sizes = [1, 1, 3], strides = [1, 1, 1]} : vector<1x3x3xf32> to vector<1x1x3xf32>
%46 = vector.extract_strided_slice %42 {offsets = [0, 2, 0], sizes = [1, 1, 3], strides = [1, 1, 1]} : vector<1x3x3xf32> to vector<1x1x3xf32>
%47 = vector.extract %43[0] : vector<1x3x4xf32>
%48 = vector.extract_strided_slice %44 {offsets = [0, 0, 0], sizes = [1, 1, 4], strides = [1, 1, 1]} : vector<1x2x4xf32> to vector<1x1x4xf32>
%49 = vector.extract_strided_slice %44 {offsets = [0, 1, 0], sizes = [1, 1, 4], strides = [1, 1, 1]} : vector<1x2x4xf32> to vector<1x1x4xf32>
%50 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"], kind = #vector.kind<add>} %45, %47, %48 : vector<1x1x3xf32>, vector<3x4xf32> into vector<1x1x4xf32>
%51 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"], kind = #vector.kind<add>} %46, %47, %49 : vector<1x1x3xf32>, vector<3x4xf32> into vector<1x1x4xf32>
%52 = vector.insert_strided_slice %50, %44 {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<1x1x4xf32> into vector<1x2x4xf32>
%53 = vector.insert_strided_slice %51, %52 {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<1x1x4xf32> into vector<1x2x4xf32>
%54 = vector.transfer_write %53, %41[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x2x4xf32>, tensor<1x2x4xf32>
%55 = tensor.insert_slice %54 into %38[0, 0, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x2x4xf32> into tensor<1x1x2x4xf32>
%56 = tensor.insert_slice %55 into %arg14[0, %arg13, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x1x2x4xf32> into tensor<1x2x2x4xf32>
scf.yield %56 : tensor<1x2x2x4xf32>
} {spirv.unroll}
scf.yield %35 : tensor<1x2x2x4xf32>
}
scf.yield %32 : tensor<1x2x2x4xf32>
}
%27 = vector.transfer_read %19[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true, true]} : tensor<1x2x2x4xf32>, vector<1x2x2x4xf32>
%28 = vector.transfer_read %26[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true, true]} : tensor<1x2x2x4xf32>, vector<1x2x2x4xf32>
%29 = arith.subf %28, %27 : vector<1x2x2x4xf32>
%30 = vector.transfer_write %29, %26[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true]} : vector<1x2x2x4xf32>, tensor<1x2x2x4xf32>
%31 = tensor.insert_slice %30 into %arg8[0, %arg3, %arg5, %arg7] [1, 2, 2, 4] [1, 1, 1, 1] : tensor<1x2x2x4xf32> into tensor<1x4x4x16xf32>
scf.yield %31 : tensor<1x4x4x16xf32>
} {iree.spirv.distribute_dim = 0 : index}
scf.yield %18 : tensor<1x4x4x16xf32>
} {iree.spirv.distribute_dim = 1 : index}
scf.yield %17 : tensor<1x4x4x16xf32>
} {iree.spirv.distribute_dim = 2 : index}
flow.dispatch.tensor.store %16, %3, offsets = [0, %arg0, %arg1, %arg2], sizes = [1, 4, 4, 16], strides = [1, 1, 1, 1] : tensor<1x4x4x16xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x16xf32>
}
}
}
return
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment