Skip to content

Instantly share code, notes, and snippets.

@Max191
Created May 30, 2024 17:10
Show Gist options
  • Save Max191/92f02f5f13212c48a8c49078d7e2e9d9 to your computer and use it in GitHub Desktop.
Save Max191/92f02f5f13212c48a8c49078d7e2e9d9 to your computer and use it in GitHub Desktop.
Pad + winograd input transform
module {
func.func @main$async_dispatch_1_winograd_input_transform_11x11x16x8x8xf32() attributes {translation_info = #iree_codegen.translation_info<CPULinalgExtTileAndVectorize>} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c262144 = arith.constant 262144 : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x64x64xf32>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c262144) : !flow.dispatch.tensor<writeonly:tensor<11x11x16x8x8xbf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 64, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x64x64xf32>> -> tensor<16x64x64xf32>
%3 = tensor.empty() : tensor<11x11x16x8x8xbf16>
%4 = tensor.empty() : tensor<11x11x16x8x8xf32>
%padded = tensor.pad %2 low[0, 1, 1] high[0, 1, 1] {
^bb0(%arg0: index, %arg1: index, %arg2: index):
tensor.yield %cst : f32
} {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[2, 0, 0], [1, 8, 8], [0, 0, 0], [0, 0, 0]]>} : tensor<16x64x64xf32> to tensor<16x66x66xf32>
%5 = iree_linalg_ext.winograd.input_transform {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 2, 0, 0], [1, 1, 1, 8, 8]]>} output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) input_tile_dimensions([3, 4]) ins(%padded : tensor<16x66x66xf32>) outs(%4 : tensor<11x11x16x8x8xf32>) -> tensor<11x11x16x8x8xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%5 : tensor<11x11x16x8x8xf32>) outs(%3 : tensor<11x11x16x8x8xbf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 2, 0, 0], [1, 1, 1, 8, 8], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]>} {
^bb0(%in: f32, %out: bf16):
%7 = arith.truncf %in : f32 to bf16
linalg.yield %7 : bf16
} -> tensor<11x11x16x8x8xbf16>
flow.dispatch.tensor.store %6, %1, offsets = [0, 0, 0, 0, 0], sizes = [11, 11, 16, 8, 8], strides = [1, 1, 1, 1, 1] : tensor<11x11x16x8x8xbf16> -> !flow.dispatch.tensor<writeonly:tensor<11x11x16x8x8xbf16>>
return
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment