Created
December 10, 2021 15:25
-
-
Save gysit/c56deb5d5ab0004d906f888d343f1fe9 to your computer and use it in GitHub Desktop.
conv2d effect of canonicalization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ir after applyPatternsAndFoldGreedily in LinalgStrategyVectorizePass (after the first execution) | |
%0 = vector.transfer_write %cst_0, %arg2[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true]} : vector<8x16x15x64xf32>, tensor<8x16x15x64xf32> | |
%1 = scf.for %arg3 = %c0 to %c8 step %c1 iter_args(%arg4 = %0) -> (tensor<8x16x15x64xf32>) { | |
%2 = scf.for %arg5 = %c0 to %c16 step %c1 iter_args(%arg6 = %arg4) -> (tensor<8x16x15x64xf32>) { | |
%3 = scf.for %arg7 = %c0 to %c15 step %c8 iter_args(%arg8 = %arg6) -> (tensor<8x16x15x64xf32>) { | |
%4 = affine.min affine_map<(d0) -> (8, -d0 + 15)>(%arg7) | |
%5 = affine.apply affine_map<(d0) -> (-d0 + 8)>(%4) | |
%6 = scf.for %arg9 = %c0 to %c64 step %c32 iter_args(%arg10 = %arg8) -> (tensor<8x16x15x64xf32>) { | |
%7 = scf.for %arg11 = %c0 to %c3 step %c1 iter_args(%arg12 = %arg10) -> (tensor<8x16x15x64xf32>) { | |
%8 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%arg5, %arg11) | |
%9 = scf.for %arg13 = %c0 to %c3 step %c1 iter_args(%arg14 = %arg12) -> (tensor<8x16x15x64xf32>) { | |
%10 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%arg7, %arg13) | |
%11 = affine.min affine_map<(d0, d1) -> (8, -d0 - d1 + 17)>(%arg7, %arg13) | |
%12 = affine.apply affine_map<(d0) -> (-d0 + 8)>(%11) | |
%13 = scf.for %arg15 = %c0 to %c32 step %c8 iter_args(%arg16 = %arg14) -> (tensor<8x16x15x64xf32>) { | |
%14 = tensor.extract_slice %arg0[%arg3, %8, %10, %arg15] [1, 1, %11, 8] [1, 1, 1, 1] : tensor<8x18x17x32xf32> to tensor<1x1x?x8xf32> | |
%15 = tensor.extract_slice %arg1[%arg11, %arg13, %arg15, %arg9] [1, 1, 8, 32] [1, 1, 1, 1] : tensor<3x3x32x64xf32> to tensor<1x1x8x32xf32> | |
%16 = tensor.extract_slice %arg16[%arg3, %arg5, %arg7, %arg9] [1, 1, %4, 32] [1, 1, 1, 1] : tensor<8x16x15x64xf32> to tensor<1x1x?x32xf32> | |
%17 = linalg.pad_tensor %14 low[%c0, %c0, %c0, %c0] high[%c0, %c0, %12, %c0] { | |
^bb0(%arg17: index, %arg18: index, %arg19: index, %arg20: index): // no predecessors | |
linalg.yield %cst : f32 | |
} : tensor<1x1x?x8xf32> to tensor<1x1x8x8xf32> | |
%18 = linalg.pad_tensor %16 low[%c0, %c0, %c0, %c0] high[%c0, %c0, %5, %c0] { | |
^bb0(%arg17: index, %arg18: index, %arg19: index, %arg20: index): // no predecessors | |
linalg.yield %cst : f32 | |
} : tensor<1x1x?x32xf32> to tensor<1x1x8x32xf32> | |
%19 = tensor.extract_slice %17[0, 0, 0, 0] [1, 1, 8, 8] [1, 1, 1, 1] : tensor<1x1x8x8xf32> to tensor<1x8x8xf32> | |
%20 = tensor.extract_slice %15[0, 0, 0, 0] [1, 1, 8, 32] [1, 1, 1, 1] : tensor<1x1x8x32xf32> to tensor<1x8x32xf32> | |
%21 = tensor.extract_slice %18[0, 0, 0, 0] [1, 1, 8, 32] [1, 1, 1, 1] : tensor<1x1x8x32xf32> to tensor<1x8x32xf32> | |
%22 = vector.transfer_read %19[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : tensor<1x8x8xf32>, vector<1x8x8xf32> | |
%23 = vector.transfer_read %20[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : tensor<1x8x32xf32>, vector<1x8x32xf32> | |
%24 = vector.transfer_read %21[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : tensor<1x8x32xf32>, vector<1x8x32xf32> | |
%25 = vector.extract %23[0] : vector<1x8x32xf32> | |
%26 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} %22, %25, %24 : vector<1x8x8xf32>, vector<8x32xf32> into vector<1x8x32xf32> | |
%27 = vector.transfer_write %26, %21[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x8x32xf32>, tensor<1x8x32xf32> | |
%28 = tensor.insert_slice %27 into %18[0, 0, 0, 0] [1, 1, 8, 32] [1, 1, 1, 1] : tensor<1x8x32xf32> into tensor<1x1x8x32xf32> | |
%29 = tensor.extract_slice %28[0, 0, 0, 0] [1, 1, %4, 32] [1, 1, 1, 1] : tensor<1x1x8x32xf32> to tensor<1x1x?x32xf32> | |
%30 = tensor.insert_slice %29 into %arg16[%arg3, %arg5, %arg7, %arg9] [1, 1, %4, 32] [1, 1, 1, 1] : tensor<1x1x?x32xf32> into tensor<8x16x15x64xf32> | |
scf.yield %30 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %13 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %9 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %7 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %6 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %3 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %2 : tensor<8x16x15x64xf32> | |
} | |
// after canonicalization the ir changes to | |
%1 = scf.for %arg3 = %c0 to %c8 step %c1 iter_args(%arg4 = %0) -> (tensor<8x16x15x64xf32>) { | |
%2 = scf.for %arg5 = %c0 to %c16 step %c1 iter_args(%arg6 = %arg4) -> (tensor<8x16x15x64xf32>) { | |
%3 = scf.for %arg7 = %c0 to %c15 step %c8 iter_args(%arg8 = %arg6) -> (tensor<8x16x15x64xf32>) { | |
%4 = affine.min affine_map<(d0) -> (8, -d0 + 15)>(%arg7) | |
%5 = affine.apply affine_map<(d0) -> (-d0 + 8)>(%4) | |
%6 = scf.for %arg9 = %c0 to %c64 step %c32 iter_args(%arg10 = %arg8) -> (tensor<8x16x15x64xf32>) { | |
%7 = scf.for %arg11 = %c0 to %c3 step %c1 iter_args(%arg12 = %arg10) -> (tensor<8x16x15x64xf32>) { | |
%8 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%arg5, %arg11) | |
%9 = scf.for %arg13 = %c0 to %c3 step %c1 iter_args(%arg14 = %arg12) -> (tensor<8x16x15x64xf32>) { | |
%10 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%arg7, %arg13) | |
%11 = affine.min affine_map<(d0, d1) -> (8, -d0 - d1 + 17)>(%arg7, %arg13) | |
%12 = affine.apply affine_map<(d0) -> (-d0 + 8)>(%11) | |
%13 = scf.for %arg15 = %c0 to %c32 step %c8 iter_args(%arg16 = %arg14) -> (tensor<8x16x15x64xf32>) { | |
%14 = tensor.extract_slice %arg0[%arg3, %8, %10, %arg15] [1, 1, %11, 8] [1, 1, 1, 1] : tensor<8x18x17x32xf32> to tensor<1x1x?x8xf32> | |
%15 = tensor.extract_slice %arg1[%arg11, %arg13, %arg15, %arg9] [1, 1, 8, 32] [1, 1, 1, 1] : tensor<3x3x32x64xf32> to tensor<1x1x8x32xf32> | |
%16 = tensor.extract_slice %arg16[%arg3, %arg5, %arg7, %arg9] [1, 1, %4, 32] [1, 1, 1, 1] : tensor<8x16x15x64xf32> to tensor<1x1x?x32xf32> | |
%17 = linalg.pad_tensor %14 low[%c0, %c0, %c0, %c0] high[%c0, %c0, %12, %c0] { | |
^bb0(%arg17: index, %arg18: index, %arg19: index, %arg20: index): // no predecessors | |
linalg.yield %cst_0 : f32 | |
} : tensor<1x1x?x8xf32> to tensor<1x1x8x8xf32> | |
%18 = linalg.pad_tensor %16 low[%c0, %c0, %c0, %c0] high[%c0, %c0, %5, %c0] { | |
^bb0(%arg17: index, %arg18: index, %arg19: index, %arg20: index): // no predecessors | |
linalg.yield %cst_0 : f32 | |
} : tensor<1x1x?x32xf32> to tensor<1x1x8x32xf32> | |
%19 = vector.transfer_read %17[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x1x8x8xf32>, vector<1x8x8xf32> | |
%20 = vector.transfer_read %15[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x1x8x32xf32>, vector<1x8x32xf32> | |
%21 = vector.transfer_read %18[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true]} : tensor<1x1x8x32xf32>, vector<1x8x32xf32> | |
%22 = vector.extract %20[0] : vector<1x8x32xf32> | |
%23 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} %19, %22, %21 : vector<1x8x8xf32>, vector<8x32xf32> into vector<1x8x32xf32> | |
%24 = vector.transfer_write %23, %18[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x8x32xf32>, tensor<1x1x8x32xf32> | |
%25 = tensor.extract_slice %24[0, 0, 0, 0] [1, 1, %4, 32] [1, 1, 1, 1] : tensor<1x1x8x32xf32> to tensor<1x1x?x32xf32> | |
%26 = tensor.insert_slice %25 into %arg16[%arg3, %arg5, %arg7, %arg9] [1, 1, %4, 32] [1, 1, 1, 1] : tensor<1x1x?x32xf32> into tensor<8x16x15x64xf32> | |
scf.yield %26 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %13 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %9 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %7 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %6 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %3 : tensor<8x16x15x64xf32> | |
} | |
scf.yield %2 : tensor<8x16x15x64xf32> | |
} | |
// note how the rank-reducing extract slice operations are canonicalized. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment