Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
This file has been truncated, but you can view the full file.
// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = call @_conv(%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
func private @_conv(%arg0: tensor<1x225x225x3xf32>, %arg1: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%0 = mhlo.convolution(%arg0, %arg1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
return %0 : tensor<1x112x112x32xf32>
}
}
// -----// IR Dump After Canonicalizer //----- //
func private @_conv(%arg0: tensor<1x225x225x3xf32>, %arg1: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%0 = mhlo.convolution(%arg0, %arg1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
return %0 : tensor<1x112x112x32xf32>
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = call @_conv(%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After Inliner //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After CSE //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After SymbolDCE //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After TopLevelSCFToCFG //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After ShapeToShapeLowering //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After ConvertShapeToStandard //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After Inliner //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After MHLOToMHLOPreprocessing //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After LegalizeInputTypes //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After CSE //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After ConvertMHLOToLinalgExt //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = mhlo.convolution(%0, %1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After ConvertMHLOToLinalgOnTensors //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%cst = constant 0.000000e+00 : f32
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After VerifyCompilerMHLOInputLegality //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
}
// -----// IR Dump After IREEImportPublic //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
}
// -----// IR Dump After ConvertConv2D1x1ConvToMatmul //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After VerifyInputLegality //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After CSE //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After ExpandGlobalDynamicDims //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
}
// -----// IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::ExpandFunctionDynamicDimsPass //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
}
// -----// IR Dump After PadTensorToSubTensorInsert //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
}
// -----// IR Dump After ConvertElementwiseToLinalg //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After LinalgFoldUnitExtentDims //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After InterchangeGenericOps //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After FusionOfTensorOps //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After CSE //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After ResolveShapedTypeResultDims //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
}
// -----// IR Dump After ConvertToFlowBeforeDispatchFormation //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%cst = constant 0.000000e+00 : f32
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%0, %1 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%3 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%5 = hal.tensor.cast %4 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %5 : !hal.buffer_view
}
// -----// IR Dump After DispatchLinalgOnTensors //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c32 = constant 32 : index
%c112 = constant 112 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch.workgroups[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg3: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg4: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112_0 = constant 112 : index
%c32_1 = constant 32 : index
%4 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg5 = %5 to %c112_0 step %6 {
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%8 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg6 = %7 to %c112_0 step %8 {
%9 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%10 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg7 = %9 to %c32_1 step %10 {
%11 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg5)
%12 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg5)
%13 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg6)
%14 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg6)
%15 = flow.dispatch.tensor.load %arg2, offsets = [0, %11, %13, 0], sizes = [1, %12, %14, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%16 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%17 = flow.dispatch.tensor.load %arg3, offsets = [0, 0, 0, %arg7], sizes = [3, 3, 3, %16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%18 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg5)
%19 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg6)
%20 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%21 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg5, %workgroup_size_2)
%22 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg6, %workgroup_size_1)
%23 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg7, %workgroup_size_0)
%24 = tensor.extract_slice %4[0, %arg5, %arg6, %arg7] [1, %21, %22, %23] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x?x?x?xf32>
%25 = linalg.fill(%cst, %24) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%26 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%15, %17 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%25 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %26, %arg4, offsets = [0, %arg5, %arg6, %arg7], sizes = [1, %18, %19, %20], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
flow.return
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After ResolveShapedTypeResultDims //----- //
module {
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c32 = constant 32 : index
%c112 = constant 112 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch.workgroups[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg3: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg4: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112_0 = constant 112 : index
%c32_1 = constant 32 : index
%4 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg5 = %5 to %c112_0 step %6 {
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%8 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg6 = %7 to %c112_0 step %8 {
%9 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%10 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg7 = %9 to %c32_1 step %10 {
%11 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg5)
%12 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg5)
%13 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg6)
%14 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg6)
%15 = flow.dispatch.tensor.load %arg2, offsets = [0, %11, %13, 0], sizes = [1, %12, %14, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%16 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%17 = flow.dispatch.tensor.load %arg3, offsets = [0, 0, 0, %arg7], sizes = [3, 3, 3, %16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%18 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg5)
%19 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg6)
%20 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%21 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg5, %workgroup_size_2)
%22 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg6, %workgroup_size_1)
%23 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg7, %workgroup_size_0)
%24 = tensor.extract_slice %4[0, %arg5, %arg6, %arg7] [1, %21, %22, %23] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x?x?x?xf32>
%25 = linalg.fill(%cst, %24) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%26 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%15, %17 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%25 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %26, %arg4, offsets = [0, %arg5, %arg6, %arg7], sizes = [1, %18, %19, %20], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
flow.return
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After ConvertToFlowAfterDispatchFormation //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c32 = constant 32 : index
%c112 = constant 112 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch.workgroups[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg3: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg4: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112_0 = constant 112 : index
%c32_1 = constant 32 : index
%4 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg5 = %5 to %c112_0 step %6 {
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%8 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg6 = %7 to %c112_0 step %8 {
%9 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%10 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg7 = %9 to %c32_1 step %10 {
%11 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg5)
%12 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg5)
%13 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg6)
%14 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg6)
%15 = flow.dispatch.tensor.load %arg2, offsets = [0, %11, %13, 0], sizes = [1, %12, %14, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%16 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%17 = flow.dispatch.tensor.load %arg3, offsets = [0, 0, 0, %arg7], sizes = [3, 3, 3, %16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%18 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg5)
%19 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg6)
%20 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%21 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg5, %workgroup_size_2)
%22 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg6, %workgroup_size_1)
%23 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg7, %workgroup_size_0)
%24 = tensor.extract_slice %4[0, %arg5, %arg6, %arg7] [1, %21, %22, %23] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x?x?x?xf32>
%25 = linalg.fill(%cst, %24) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%26 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%15, %17 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%25 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %26, %arg4, offsets = [0, %arg5, %arg6, %arg7], sizes = [1, %18, %19, %20], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
flow.return
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch.workgroups[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg3: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg4: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%c32_0 = constant 32 : index
%c112_1 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg5 = %4 to %c112_1 step %5 {
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg6 = %6 to %c112_1 step %7 {
%8 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%9 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg7 = %8 to %c32_0 step %9 {
%10 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg5)
%11 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg5)
%12 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg6)
%13 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg6)
%14 = flow.dispatch.tensor.load %arg2, offsets = [0, %10, %12, 0], sizes = [1, %11, %13, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%15 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%16 = flow.dispatch.tensor.load %arg3, offsets = [0, 0, 0, %arg7], sizes = [3, 3, 3, %15], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%17 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg5)
%18 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg6)
%19 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg7)
%20 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg5, %workgroup_size_2)
%21 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg6, %workgroup_size_1)
%22 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg7, %workgroup_size_0)
%23 = linalg.init_tensor [1, %20, %21, %22] : tensor<1x?x?x?xf32>
%24 = linalg.fill(%cst, %23) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%25 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%14, %16 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%24 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %25, %arg4, offsets = [0, %arg5, %arg6, %arg7], sizes = [1, %17, %18, %19], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
flow.return
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After OutlineDispatchRegions //----- //
module {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%c32 = constant 32 : index
%c112 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg3)
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg4)
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg3)
%14 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg4)
%15 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%16 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg3, %workgroup_size_2)
%17 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg4, %workgroup_size_1)
%18 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg5, %workgroup_size_0)
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c32 = constant 32 : index
%c112 = constant 112 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After DeduplicateExecutables //----- //
module {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%c32 = constant 32 : index
%c112 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg3)
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg4)
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg3)
%14 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg4)
%15 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%16 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg3, %workgroup_size_2)
%17 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg4, %workgroup_size_1)
%18 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg5, %workgroup_size_0)
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c32 = constant 32 : index
%c112 = constant 112 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After CSE //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After HoistUnstreamableOps //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%c32 = constant 32 : index
%c112 = constant 112 : index
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After CSE //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After InsertConstantClones //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After FormStreams //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%c32, %c112, %1, %0) : (index, index, tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: index, %arg3: index, %arg4: tensor<1x225x225x3xf32>, %arg5: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%arg2, %arg3, %arg3](%arg4, %arg5) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After OutlineLargeConstants //----- //
module {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%c32 = constant 32 : index
%c112 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg3)
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg4)
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg3)
%14 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg4)
%15 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%16 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg3, %workgroup_size_2)
%17 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg4, %workgroup_size_1)
%18 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg5, %workgroup_size_0)
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c112 = constant 112 : index
%c32 = constant 32 : index
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%c32, %c112, %1, %0) : (index, index, tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: index, %arg3: index, %arg4: tensor<1x225x225x3xf32>, %arg5: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%arg2, %arg3, %arg3](%arg4, %arg5) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c32 = constant 32 : index
%c112 = constant 112 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After CSE //----- //
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c32 = constant 32 : index
%c112 = constant 112 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
// -----// IR Dump After SymbolDCE //----- //
module {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%c32 = constant 32 : index
%c112 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_2, %arg3)
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0, d1) -> (d0 * 2 + 1, d1 * -2 + 227)>(%workgroup_size_1, %arg4)
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_2, %arg3)
%14 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 112)>(%workgroup_size_1, %arg4)
%15 = affine.min affine_map<(d0, d1) -> (d0, -d1 + 32)>(%workgroup_size_0, %arg5)
%16 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg3, %workgroup_size_2)
%17 = affine.min affine_map<(d0, d1) -> (-d0 + 112, d1)>(%arg4, %workgroup_size_1)
%18 = affine.min affine_map<(d0, d1) -> (-d0 + 32, d1)>(%arg5, %workgroup_size_0)
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c32 = constant 32 : index
%c112 = constant 112 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
module {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112 = constant 112 : index
%c32 = constant 32 : index
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg3)[%workgroup_size_2]
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg4)[%workgroup_size_1]
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg3)[%workgroup_size_2]
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg4)[%workgroup_size_1]
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg3)[%workgroup_size_2]
%17 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg4)[%workgroup_size_1]
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg5)[%workgroup_size_0]
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c112 = constant 112 : index
%c32 = constant 32 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::AssignTargetDevicesPass //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112 = constant 112 : index
%c32 = constant 32 : index
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg3)[%workgroup_size_2]
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg4)[%workgroup_size_1]
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg3)[%workgroup_size_2]
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg4)[%workgroup_size_1]
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg3)[%workgroup_size_2]
%17 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg4)[%workgroup_size_1]
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg5)[%workgroup_size_0]
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c112 = constant 112 : index
%c32 = constant 32 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::VerifyTargetEnvironmentPass //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112 = constant 112 : index
%c32 = constant 32 : index
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg3)[%workgroup_size_2]
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg4)[%workgroup_size_1]
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg3)[%workgroup_size_2]
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg4)[%workgroup_size_1]
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg3)[%workgroup_size_2]
%17 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg4)[%workgroup_size_1]
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg5)[%workgroup_size_0]
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c112 = constant 112 : index
%c32 = constant 32 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::IdentifyConstantPoolsPass //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112 = constant 112 : index
%c32 = constant 32 : index
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg3)[%workgroup_size_2]
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg4)[%workgroup_size_1]
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg3)[%workgroup_size_2]
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg4)[%workgroup_size_1]
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg3)[%workgroup_size_2]
%17 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg4)[%workgroup_size_1]
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg5)[%workgroup_size_0]
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c112 = constant 112 : index
%c32 = constant 32 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeConstantPoolBuffersPass //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%cst = constant 0.000000e+00 : f32
%c112 = constant 112 : index
%c32 = constant 32 : index
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg3)[%workgroup_size_2]
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg4)[%workgroup_size_1]
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg3)[%workgroup_size_2]
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg4)[%workgroup_size_1]
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg3)[%workgroup_size_2]
%17 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg4)[%workgroup_size_1]
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg5)[%workgroup_size_0]
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c112 = constant 112 : index
%c32 = constant 32 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%c32 = constant 32 : index
%c112 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg3)[%workgroup_size_2]
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg4)[%workgroup_size_1]
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg3)[%workgroup_size_2]
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg4)[%workgroup_size_1]
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg3)[%workgroup_size_2]
%17 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg4)[%workgroup_size_1]
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg5)[%workgroup_size_0]
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c32 = constant 32 : index
%c112 = constant 112 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After SymbolDCE //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
flow.executable private @conv_dispatch_0 {
flow.dispatch.entry public @conv_dispatch_0 attributes {workgroup_rank = 3 : index}
builtin.module {
func @conv_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:1x225x225x3xf32>, %arg1: !flow.dispatch.tensor<readonly:3x3x3x32xf32>, %arg2: !flow.dispatch.tensor<writeonly:1x112x112x32xf32>) {
%c32 = constant 32 : index
%c112 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
%workgroup_size_1 = flow.dispatch.workgroup.size[1] : index
%workgroup_size_2 = flow.dispatch.workgroup.size[2] : index
%workgroup_id_0 = flow.dispatch.workgroup.id[0] : index
%workgroup_count_0 = flow.dispatch.workgroup.count[0] : index
%workgroup_id_1 = flow.dispatch.workgroup.id[1] : index
%workgroup_count_1 = flow.dispatch.workgroup.count[1] : index
%workgroup_id_2 = flow.dispatch.workgroup.id[2] : index
%workgroup_count_2 = flow.dispatch.workgroup.count[2] : index
%0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_2, %workgroup_size_2]
%1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_2, %workgroup_size_2]
scf.for %arg3 = %0 to %c112 step %1 {
%2 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_1, %workgroup_size_1]
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_1, %workgroup_size_1]
scf.for %arg4 = %2 to %c112 step %3 {
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_0, %workgroup_size_0]
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_0, %workgroup_size_0]
scf.for %arg5 = %4 to %c32 step %5 {
%6 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg3)
%7 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg3)[%workgroup_size_2]
%8 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg4)
%9 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg4)[%workgroup_size_1]
%10 = flow.dispatch.tensor.load %arg0, offsets = [0, %6, %8, 0], sizes = [1, %7, %9, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%12 = flow.dispatch.tensor.load %arg1, offsets = [0, 0, 0, %arg5], sizes = [3, 3, 3, %11], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg3)[%workgroup_size_2]
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg4)[%workgroup_size_1]
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg5)[%workgroup_size_0]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg3)[%workgroup_size_2]
%17 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg4)[%workgroup_size_1]
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg5)[%workgroup_size_0]
%19 = linalg.init_tensor [1, %16, %17, %18] : tensor<1x?x?x?xf32>
%20 = linalg.fill(%cst, %19) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%21 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%10, %12 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %21, %arg2, offsets = [0, %arg3, %arg4, %arg5], sizes = [1, %13, %14, %15], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c32 = constant 32 : index
%c112 = constant 112 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeInterfacesPass //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
hal.executable private @conv_dispatch_0 {
hal.interface public @io {
hal.interface.binding public @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding public @s0b2_xw_external, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}> {
hal.executable.entry_point public @conv_dispatch_0 attributes {interface = @io, ordinal = 0 : index}
builtin.module {
func @conv_dispatch_0() {
%c0 = constant 0 : index
%c32 = constant 32 : index
%c112 = constant 112 : index
%cst = constant 0.000000e+00 : f32
%0 = hal.interface.binding.subspan @io::@s0b0_ro_external[%c0] : !flow.dispatch.tensor<readonly:1x225x225x3xf32>
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:3x3x3x32xf32>
%2 = hal.interface.binding.subspan @io::@s0b2_xw_external[%c0] : !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
%workgroup_size_z = hal.interface.workgroup.size[2] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_count_y = hal.interface.workgroup.count[1] : index
%workgroup_id_z = hal.interface.workgroup.id[2] : index
%workgroup_count_z = hal.interface.workgroup.count[2] : index
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_z, %workgroup_size_z]
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_z, %workgroup_size_z]
scf.for %arg0 = %3 to %c112 step %4 {
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_y, %workgroup_size_y]
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_y, %workgroup_size_y]
scf.for %arg1 = %5 to %c112 step %6 {
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_x, %workgroup_size_x]
%8 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_x, %workgroup_size_x]
scf.for %arg2 = %7 to %c32 step %8 {
%9 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg0)
%10 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg0)[%workgroup_size_z]
%11 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg1)
%12 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg1)[%workgroup_size_y]
%13 = flow.dispatch.tensor.load %0, offsets = [0, %9, %11, 0], sizes = [1, %10, %12, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg2)[%workgroup_size_x]
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, %arg2], sizes = [3, 3, 3, %14], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%16 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg0)[%workgroup_size_z]
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg1)[%workgroup_size_y]
%18 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg2)[%workgroup_size_x]
%19 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg0)[%workgroup_size_z]
%20 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg1)[%workgroup_size_y]
%21 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg2)[%workgroup_size_x]
%22 = linalg.init_tensor [1, %19, %20, %21] : tensor<1x?x?x?xf32>
%23 = linalg.fill(%cst, %22) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
%24 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%13, %15 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%23 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
flow.dispatch.tensor.store %24, %2, offsets = [0, %arg0, %arg1, %arg2], sizes = [1, %16, %17, %18], strides = [1, 1, 1, 1] : tensor<1x?x?x?xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
}
}
}
return
}
hal.interface private @io {
hal.interface.binding public @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding public @s0b2_xw_external, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
}
}
}
func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%0 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
%1 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> =
(%arg2: tensor<1x225x225x3xf32>, %arg3: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {
%c32 = constant 32 : index
%c112 = constant 112 : index
%4 = flow.dispatch @conv_dispatch_0::@conv_dispatch_0[%c32, %c112, %c112](%arg2, %arg3) {hal.bindings = [#hal.ex.operand_buffer<"s0b0_ro_external", 0 : index>, #hal.ex.operand_buffer<"s0b1_ro_external", 1 : index>, #hal.ex.result_buffer<"s0b2_xw_external", 0 : index>]} : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
flow.return %4 : tensor<1x112x112x32xf32>
}
%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
return %3 : !hal.buffer_view
}
}
// -----// IR Dump After Canonicalizer //----- //
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {buffer_constraints = #hal.buffer_constraints<max_allocation_size = 1073741824, min_buffer_offset_alignment = 256, max_buffer_range = 134217728, min_buffer_range_alignment = 16>, executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}>]}>]} {
hal.executable private @conv_dispatch_0 {
hal.interface public @io {
hal.interface.binding public @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding public @s0b2_xw_external, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.4, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>}> {
hal.executable.entry_point public @conv_dispatch_0 attributes {interface = @io, ordinal = 0 : index}
builtin.module {
func @conv_dispatch_0() {
%cst = constant 0.000000e+00 : f32
%c112 = constant 112 : index
%c32 = constant 32 : index
%c0 = constant 0 : index
%0 = hal.interface.binding.subspan @io::@s0b0_ro_external[%c0] : !flow.dispatch.tensor<readonly:1x225x225x3xf32>
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:3x3x3x32xf32>
%2 = hal.interface.binding.subspan @io::@s0b2_xw_external[%c0] : !flow.dispatch.tensor<writeonly:1x112x112x32xf32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
%workgroup_size_z = hal.interface.workgroup.size[2] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_count_y = hal.interface.workgroup.count[1] : index
%workgroup_id_z = hal.interface.workgroup.id[2] : index
%workgroup_count_z = hal.interface.workgroup.count[2] : index
%3 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_z, %workgroup_size_z]
%4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_z, %workgroup_size_z]
scf.for %arg0 = %3 to %c112 step %4 {
%5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_y, %workgroup_size_y]
%6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_y, %workgroup_size_y]
scf.for %arg1 = %5 to %c112 step %6 {
%7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_x, %workgroup_size_x]
%8 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_x, %workgroup_size_x]
scf.for %arg2 = %7 to %c32 step %8 {
%9 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg0)
%10 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg0)[%workgroup_size_z]
%11 = affine.apply affine_map<(d0) -> (d0 * 2)>(%arg1)
%12 = affine.min affine_map<(d0)[s0] -> (s0 * 2 + 1, d0 * -2 + 227)>(%arg1)[%workgroup_size_y]
%13 = flow.dispatch.tensor.load %0, offsets = [0, %9, %11, 0], sizes = [1, %10, %12, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x?x?x3xf32>
%14 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg2)[%workgroup_size_x]
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, %arg2], sizes = [3, 3, 3, %14], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x?xf32>
%16 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg0)[%workgroup_size_z]
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 112)>(%arg1)[%workgroup_size_y]
%18 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 32)>(%arg2)[%workgroup_size_x]
%19 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg0)[%workgroup_size_z]
%20 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg1)[%workgroup_size_y]