Skip to content

Instantly share code, notes, and snippets.

@stellaraccident
Created May 30, 2021 20:19
Show Gist options
  • Save stellaraccident/a960d2ac3226e9bcf041f3fd11eeeaed to your computer and use it in GitHub Desktop.
Save stellaraccident/a960d2ac3226e9bcf041f3fd11eeeaed to your computer and use it in GitHub Desktop.
func @dynamicUpdateSlice(%arg0: tensor<2x4xi32>, %arg1: tensor<1x1xi32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<2x4xi32> {
%c2 = constant 2 : index
%c4 = constant 4 : index
%c1 = constant 1 : index
%0 = flow.tensor.reshape %arg1 : tensor<1x1xi32> -> tensor<i32>
%1 = flow.dispatch.workgroups[%c4, %c2, %c1](%0, %arg0, %arg2, %arg3) : (tensor<i32>, tensor<2x4xi32>, tensor<i32>, tensor<i32>) -> %arg0 =
(%arg4: !flow.dispatch.tensor<readonly:i32>, %arg5: !flow.dispatch.tensor<readwrite:2x4xi32>, %arg6: !flow.dispatch.tensor<readonly:i32>, %arg7: !flow.dispatch.tensor<readonly:i32>) {
%c0_i32 = constant 0 : i32
%c3_i32 = constant 3 : i32
%c1_i32 = constant 1 : i32
%3 = flow.dispatch.tensor.load %arg4, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32>
%4 = flow.dispatch.tensor.load %arg5, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readwrite:2x4xi32> -> tensor<2x4xi32>
%5 = flow.dispatch.tensor.load %arg6, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32>
%6 = flow.dispatch.tensor.load %arg7, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32>
%7 = tensor.extract %6[] : tensor<i32>
%8 = tensor.extract %5[] : tensor<i32>
%9 = cmpi slt, %7, %c3_i32 : i32
%10 = select %9, %7, %c3_i32 : i32
%11 = cmpi slt, %8, %c1_i32 : i32
%12 = select %11, %8, %c1_i32 : i32
%13 = cmpi sgt, %10, %c0_i32 : i32
%14 = cmpi sgt, %12, %c0_i32 : i32
%15 = select %13, %10, %c0_i32 : i32
%16 = select %14, %12, %c0_i32 : i32
%17 = index_cast %15 : i32 to index
%18 = index_cast %16 : i32 to index
%19 = subtensor_insert %3 into %4[%18, %17] [1, 1] [1, 1] : tensor<i32> into tensor<2x4xi32>
flow.dispatch.tensor.store %19, %arg5, offsets = [], sizes = [], strides = [] : tensor<2x4xi32> -> !flow.dispatch.tensor<readwrite:2x4xi32>
flow.return
}
%2 = flow.dispatch.workgroups[%c4, %c2, %c1](%arg0, %1) : (tensor<2x4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32> =
(%arg4: !flow.dispatch.tensor<readonly:2x4xi32>, %arg5: !flow.dispatch.tensor<readonly:2x4xi32>, %arg6: !flow.dispatch.tensor<writeonly:2x4xi32>) {
%c2_0 = constant 2 : index
%c4_1 = constant 4 : index
%3 = linalg.init_tensor [2, 4] : tensor<2x4xi32>
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg7 = %4 to %c2_0 step %5 {
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg8 = %6 to %c4_1 step %7 {
%8 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 2)>(%arg7, %workgroup_size_1)
%9 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 4)>(%arg8, %workgroup_size_0)
%10 = flow.dispatch.tensor.load %arg4, offsets = [%arg7, %arg8], sizes = [%8, %9], strides = [1, 1] : !flow.dispatch.tensor<readonly:2x4xi32> -> tensor<?x?xi32>
%11 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 2)>(%arg7, %workgroup_size_1)
%12 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 4)>(%arg8, %workgroup_size_0)
%13 = flow.dispatch.tensor.load %arg5, offsets = [%arg7, %arg8], sizes = [%11, %12], strides = [1, 1] : !flow.dispatch.tensor<readonly:2x4xi32> -> tensor<?x?xi32>
%14 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 2)>(%arg7, %workgroup_size_1)
%15 = affine.min affine_map<(d0, d1) -> (d1, -d0 + 4)>(%arg8, %workgroup_size_0)
%16 = subtensor %3[%arg7, %arg8] [%14, %15] [1, 1] : tensor<2x4xi32> to tensor<?x?xi32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%10, %13 : tensor<?x?xi32>, tensor<?x?xi32>) outs(%16 : tensor<?x?xi32>) attrs = {__internal_linalg_transform__ = "workgroup"} {
^bb0(%arg9: i32, %arg10: i32, %arg11: i32): // no predecessors
%18 = addi %arg9, %arg10 : i32
linalg.yield %18 : i32
} -> tensor<?x?xi32>
flow.dispatch.tensor.store %17, %arg6, offsets = [%arg7, %arg8], sizes = [%14, %15], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:2x4xi32>
}
}
flow.return
}
return %2 : tensor<2x4xi32>
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment