Created
May 30, 2021 20:19
-
-
Save stellaraccident/a960d2ac3226e9bcf041f3fd11eeeaed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
func @dynamicUpdateSlice(%arg0: tensor<2x4xi32>, %arg1: tensor<1x1xi32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<2x4xi32> { | |
%c2 = constant 2 : index | |
%c4 = constant 4 : index | |
%c1 = constant 1 : index | |
%0 = flow.tensor.reshape %arg1 : tensor<1x1xi32> -> tensor<i32> | |
%1 = flow.dispatch.workgroups[%c4, %c2, %c1](%0, %arg0, %arg2, %arg3) : (tensor<i32>, tensor<2x4xi32>, tensor<i32>, tensor<i32>) -> %arg0 = | |
(%arg4: !flow.dispatch.tensor<readonly:i32>, %arg5: !flow.dispatch.tensor<readwrite:2x4xi32>, %arg6: !flow.dispatch.tensor<readonly:i32>, %arg7: !flow.dispatch.tensor<readonly:i32>) { | |
%c0_i32 = constant 0 : i32 | |
%c3_i32 = constant 3 : i32 | |
%c1_i32 = constant 1 : i32 | |
%3 = flow.dispatch.tensor.load %arg4, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = flow.dispatch.tensor.load %arg5, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readwrite:2x4xi32> -> tensor<2x4xi32> | |
%5 = flow.dispatch.tensor.load %arg6, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%6 = flow.dispatch.tensor.load %arg7, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%7 = tensor.extract %6[] : tensor<i32> | |
%8 = tensor.extract %5[] : tensor<i32> | |
%9 = cmpi slt, %7, %c3_i32 : i32 | |
%10 = select %9, %7, %c3_i32 : i32 | |
%11 = cmpi slt, %8, %c1_i32 : i32 | |
%12 = select %11, %8, %c1_i32 : i32 | |
%13 = cmpi sgt, %10, %c0_i32 : i32 | |
%14 = cmpi sgt, %12, %c0_i32 : i32 | |
%15 = select %13, %10, %c0_i32 : i32 | |
%16 = select %14, %12, %c0_i32 : i32 | |
%17 = index_cast %15 : i32 to index | |
%18 = index_cast %16 : i32 to index | |
%19 = subtensor_insert %3 into %4[%18, %17] [1, 1] [1, 1] : tensor<i32> into tensor<2x4xi32> | |
flow.dispatch.tensor.store %19, %arg5, offsets = [], sizes = [], strides = [] : tensor<2x4xi32> -> !flow.dispatch.tensor<readwrite:2x4xi32> | |
flow.return | |
} | |
%2 = flow.dispatch.workgroups[%c4, %c2, %c1](%arg0, %1) : (tensor<2x4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32> = | |
(%arg4: !flow.dispatch.tensor<readonly:2x4xi32>, %arg5: !flow.dispatch.tensor<readonly:2x4xi32>, %arg6: !flow.dispatch.tensor<writeonly:2x4xi32>) { | |
%c2_0 = constant 2 : index | |
%c4_1 = constant 4 : index | |
%3 = linalg.init_tensor [2, 4] : tensor<2x4xi32> | |
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index | |
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index | |
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index | |
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index | |
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index | |
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index | |
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1] | |
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1] | |
scf.for %arg7 = %4 to %c2_0 step %5 { | |
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0] | |
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0] | |
scf.for %arg8 = %6 to %c4_1 step %7 { | |
%8 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 2)>(%arg7, %workgroup_size_1) | |
%9 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 4)>(%arg8, %workgroup_size_0) | |
%10 = flow.dispatch.tensor.load %arg4, offsets = [%arg7, %arg8], sizes = [%8, %9], strides = [1, 1] : !flow.dispatch.tensor<readonly:2x4xi32> -> tensor<?x?xi32> | |
%11 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 2)>(%arg7, %workgroup_size_1) | |
%12 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 4)>(%arg8, %workgroup_size_0) | |
%13 = flow.dispatch.tensor.load %arg5, offsets = [%arg7, %arg8], sizes = [%11, %12], strides = [1, 1] : !flow.dispatch.tensor<readonly:2x4xi32> -> tensor<?x?xi32> | |
%14 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 2)>(%arg7, %workgroup_size_1) | |
%15 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 4)>(%arg8, %workgroup_size_0) | |
%16 = subtensor %3[%arg7, %arg8] [%14, %15] [1, 1] : tensor<2x4xi32> to tensor<?x?xi32> | |
%17 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%10, %13 : tensor<?x?xi32>, tensor<?x?xi32>) outs(%16 : tensor<?x?xi32>) attrs = {__internal_linalg_transform__ = "workgroup"} { | |
^bb0(%arg9: i32, %arg10: i32, %arg11: i32): // no predecessors | |
%18 = addi %arg9, %arg10 : i32 | |
linalg.yield %18 : i32 | |
} -> tensor<?x?xi32> | |
flow.dispatch.tensor.store %17, %arg6, offsets = [%arg7, %arg8], sizes = [%14, %15], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:2x4xi32> | |
} | |
} | |
flow.return | |
} | |
return %2 : tensor<2x4xi32> | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment