Created
August 31, 2022 21:25
-
-
Save antiagainst/45be45b8ee91bbd1d6cb69f60aab5756 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
func.func @conv_pad_dispatch_1_conv_2d_nhwc_hwcf_1x112x112x16x3x3x3() { | |
%cst = arith.constant dense<0.000000e+00> : vector<1x2x2x4xf32> | |
%c0 = arith.constant 0 : index | |
%c1 = arith.constant 1 : index | |
%c3 = arith.constant 3 : index | |
%c2 = arith.constant 2 : index | |
%c4 = arith.constant 4 : index | |
%c112 = arith.constant 112 : index | |
%c16 = arith.constant 16 : index | |
%cst_0 = arith.constant 0.000000e+00 : f32 | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:1x225x225x3xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:3x3x3x16xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:1x112x112x16xf32> | |
%3 = hal.interface.binding.subspan set(0) binding(3) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:1x112x112x16xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_z] | |
%5 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_z] | |
scf.for %arg0 = %4 to %c112 step %5 { | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%7 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %6 to %c112 step %7 { | |
%8 = affine.apply affine_map<()[s0] -> (s0 * 16)>()[%workgroup_id_x] | |
%9 = affine.apply affine_map<()[s0] -> (s0 * 16)>()[%workgroup_count_x] | |
scf.for %arg2 = %8 to %c16 step %9 { | |
%10 = flow.dispatch.tensor.load %3, offsets = [0, %arg0, %arg1, %arg2], sizes = [1, 4, 4, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:1x112x112x16xf32> -> tensor<1x4x4x16xf32> | |
%11 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg0) | |
%12 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg1) | |
%13 = flow.dispatch.tensor.load %0, offsets = [0, %11, %12, 0], sizes = [1, 9, 9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x9x9x3xf32> | |
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, %arg2], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x16xf32> -> tensor<3x3x3x16xf32> | |
%15 = flow.dispatch.tensor.load %2, offsets = [0, %arg0, %arg1, %arg2], sizes = [1, 4, 4, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x112x112x16xf32> -> tensor<1x4x4x16xf32> | |
%16 = scf.for %arg3 = %c0 to %c4 step %c2 iter_args(%arg4 = %10) -> (tensor<1x4x4x16xf32>) { | |
%17 = scf.for %arg5 = %c0 to %c4 step %c2 iter_args(%arg6 = %arg4) -> (tensor<1x4x4x16xf32>) { | |
%18 = scf.for %arg7 = %c0 to %c16 step %c4 iter_args(%arg8 = %arg6) -> (tensor<1x4x4x16xf32>) { | |
%19 = tensor.extract_slice %15[0, %arg3, %arg5, %arg7] [1, 2, 2, 4] [1, 1, 1, 1] : tensor<1x4x4x16xf32> to tensor<1x2x2x4xf32> | |
%20 = tensor.extract_slice %arg8[0, %arg3, %arg5, %arg7] [1, 2, 2, 4] [1, 1, 1, 1] : tensor<1x4x4x16xf32> to tensor<1x2x2x4xf32> | |
%21 = vector.transfer_write %cst, %20[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true]} : vector<1x2x2x4xf32>, tensor<1x2x2x4xf32> | |
%22 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3) | |
%23 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg5) | |
%24 = tensor.extract_slice %13[0, %22, %23, 0] [1, 5, 5, 3] [1, 1, 1, 1] : tensor<1x9x9x3xf32> to tensor<1x5x5x3xf32> | |
%25 = tensor.extract_slice %14[0, 0, 0, %arg7] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x16xf32> to tensor<3x3x3x4xf32> | |
%26 = scf.for %arg9 = %c0 to %c3 step %c1 iter_args(%arg10 = %21) -> (tensor<1x2x2x4xf32>) { | |
%32 = scf.for %arg11 = %c0 to %c3 step %c1 iter_args(%arg12 = %arg10) -> (tensor<1x2x2x4xf32>) { | |
%33 = tensor.extract_slice %24[0, %arg9, %arg11, 0] [1, 3, 3, 3] [1, 1, 1, 1] : tensor<1x5x5x3xf32> to tensor<1x3x3x3xf32> | |
%34 = tensor.extract_slice %25[%arg9, %arg11, 0, 0] [1, 1, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x4xf32> to tensor<1x1x3x4xf32> | |
%35 = scf.for %arg13 = %c0 to %c2 step %c1 iter_args(%arg14 = %arg12) -> (tensor<1x2x2x4xf32>) { | |
%36 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg13) | |
%37 = tensor.extract_slice %33[0, %36, 0, 0] [1, 1, 3, 3] [1, 1, 1, 1] : tensor<1x3x3x3xf32> to tensor<1x1x3x3xf32> | |
%38 = tensor.extract_slice %arg14[0, %arg13, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x2x2x4xf32> to tensor<1x1x2x4xf32> | |
%39 = tensor.extract_slice %37[0, 0, 0, 0] [1, 1, 3, 3] [1, 1, 1, 1] : tensor<1x1x3x3xf32> to tensor<1x3x3xf32> | |
%40 = tensor.extract_slice %34[0, 0, 0, 0] [1, 1, 3, 4] [1, 1, 1, 1] : tensor<1x1x3x4xf32> to tensor<1x3x4xf32> | |
%41 = tensor.extract_slice %38[0, 0, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x1x2x4xf32> to tensor<1x2x4xf32> | |
%42 = vector.transfer_read %39[%c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x3x3xf32>, vector<1x3x3xf32> | |
%43 = vector.transfer_read %40[%c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x3x4xf32>, vector<1x3x4xf32> | |
%44 = vector.transfer_read %41[%c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x2x4xf32>, vector<1x2x4xf32> | |
%45 = vector.extract_strided_slice %42 {offsets = [0, 0, 0], sizes = [1, 1, 3], strides = [1, 1, 1]} : vector<1x3x3xf32> to vector<1x1x3xf32> | |
%46 = vector.extract_strided_slice %42 {offsets = [0, 2, 0], sizes = [1, 1, 3], strides = [1, 1, 1]} : vector<1x3x3xf32> to vector<1x1x3xf32> | |
%47 = vector.extract %43[0] : vector<1x3x4xf32> | |
%48 = vector.extract_strided_slice %44 {offsets = [0, 0, 0], sizes = [1, 1, 4], strides = [1, 1, 1]} : vector<1x2x4xf32> to vector<1x1x4xf32> | |
%49 = vector.extract_strided_slice %44 {offsets = [0, 1, 0], sizes = [1, 1, 4], strides = [1, 1, 1]} : vector<1x2x4xf32> to vector<1x1x4xf32> | |
%50 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"], kind = #vector.kind<add>} %45, %47, %48 : vector<1x1x3xf32>, vector<3x4xf32> into vector<1x1x4xf32> | |
%51 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"], kind = #vector.kind<add>} %46, %47, %49 : vector<1x1x3xf32>, vector<3x4xf32> into vector<1x1x4xf32> | |
%52 = vector.insert_strided_slice %50, %44 {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<1x1x4xf32> into vector<1x2x4xf32> | |
%53 = vector.insert_strided_slice %51, %52 {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<1x1x4xf32> into vector<1x2x4xf32> | |
%54 = vector.transfer_write %53, %41[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x2x4xf32>, tensor<1x2x4xf32> | |
%55 = tensor.insert_slice %54 into %38[0, 0, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x2x4xf32> into tensor<1x1x2x4xf32> | |
%56 = tensor.insert_slice %55 into %arg14[0, %arg13, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x1x2x4xf32> into tensor<1x2x2x4xf32> | |
scf.yield %56 : tensor<1x2x2x4xf32> | |
} {spirv.unroll} | |
scf.yield %35 : tensor<1x2x2x4xf32> | |
} | |
scf.yield %32 : tensor<1x2x2x4xf32> | |
} | |
%27 = vector.transfer_read %19[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true, true]} : tensor<1x2x2x4xf32>, vector<1x2x2x4xf32> | |
%28 = vector.transfer_read %26[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true, true]} : tensor<1x2x2x4xf32>, vector<1x2x2x4xf32> | |
%29 = arith.subf %28, %27 : vector<1x2x2x4xf32> | |
%30 = vector.transfer_write %29, %26[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true]} : vector<1x2x2x4xf32>, tensor<1x2x2x4xf32> | |
%31 = tensor.insert_slice %30 into %arg8[0, %arg3, %arg5, %arg7] [1, 2, 2, 4] [1, 1, 1, 1] : tensor<1x2x2x4xf32> into tensor<1x4x4x16xf32> | |
scf.yield %31 : tensor<1x4x4x16xf32> | |
} {iree.spirv.distribute_dim = 0 : index} | |
scf.yield %18 : tensor<1x4x4x16xf32> | |
} {iree.spirv.distribute_dim = 1 : index} | |
scf.yield %17 : tensor<1x4x4x16xf32> | |
} {iree.spirv.distribute_dim = 2 : index} | |
flow.dispatch.tensor.store %16, %3, offsets = [0, %arg0, %arg1, %arg2], sizes = [1, 4, 4, 16], strides = [1, 1, 1, 1] : tensor<1x4x4x16xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x16xf32> | |
} | |
} | |
} | |
return | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment