Skip to content

Instantly share code, notes, and snippets.

@vmurali
Created March 8, 2023 22:13
Show Gist options
  • Save vmurali/816e7d728e27ac37242caa6850dbbc66 to your computer and use it in GitHub Desktop.
Save vmurali/816e7d728e27ac37242caa6850dbbc66 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
// -----// IR Dump Before TosaToSCF (tosa-to-scf) //----- //
func.func @tensor_float() {
%0 = util.unfoldable_constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%1 = util.unfoldable_constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%2 = util.unfoldable_constant dense<1.000000e+00> : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq_const(%3, dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToSCF (tosa-to-scf) //----- //
func.func @tensor_float() {
%0 = util.unfoldable_constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%1 = util.unfoldable_constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%2 = util.unfoldable_constant dense<1.000000e+00> : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq_const(%3, dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TopLevelSCFToCFG (iree-top-level-scf-to-cfg) //----- //
func.func @tensor_float() {
%0 = util.unfoldable_constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%1 = util.unfoldable_constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%2 = util.unfoldable_constant dense<1.000000e+00> : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq_const(%3, dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>) : tensor<2x1xf32>
return
}
// -----// IR Dump After TopLevelSCFToCFG (iree-top-level-scf-to-cfg) //----- //
func.func @tensor_float() {
%0 = util.unfoldable_constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%1 = util.unfoldable_constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%2 = util.unfoldable_constant dense<1.000000e+00> : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq_const(%3, dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Inliner (inline) //----- //
module {
func.func @tensor_float() {
%0 = util.unfoldable_constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%1 = util.unfoldable_constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%2 = util.unfoldable_constant dense<1.000000e+00> : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq_const(%3, dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%0 = util.unfoldable_constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%1 = util.unfoldable_constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%2 = util.unfoldable_constant dense<1.000000e+00> : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq_const(%3, dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After Inliner (inline) //----- //
module {
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before TosaMakeBroadcastable (tosa-make-broadcastable) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaMakeBroadcastable (tosa-make-broadcastable) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaToArith (tosa-to-arith) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToArith (tosa-to-arith) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaToTensor (tosa-to-tensor) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToTensor (tosa-to-tensor) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaToLinalgExt (iree-tosa-to-linalg-ext) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToLinalgExt (iree-tosa-to-linalg-ext) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaOptionalDecompositions (tosa-optional-decompositions) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaOptionalDecompositions (tosa-optional-decompositions) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaMakeBroadcastable (tosa-make-broadcastable) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaMakeBroadcastable (tosa-make-broadcastable) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaToLinalgNamed (tosa-to-linalg-named) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = "tosa.fully_connected"(%0, %1, %2) : (tensor<2x3xf32>, tensor<1x3xf32>, tensor<1xf32>) -> tensor<2x1xf32>
check.expect_eq(%3, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToLinalgNamed (tosa-to-linalg-named) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%cst_3 = arith.constant 0.000000e+00 : f32
%4 = linalg.fill ins(%cst_3 : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%cst_4 = arith.constant dense<[1, 0]> : tensor<2xi64>
%5 = "tosa.transpose"(%1, %cst_4) : (tensor<1x3xf32>, tensor<2xi64>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%9 = arith.addf %in, %in_5 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%cst_3 = arith.constant 0.000000e+00 : f32
%4 = linalg.fill ins(%cst_3 : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%cst_4 = arith.constant dense<[1, 0]> : tensor<2xi64>
%5 = "tosa.transpose"(%1, %cst_4) : (tensor<1x3xf32>, tensor<2xi64>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%9 = arith.addf %in, %in_5 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaLayerwiseConstantFoldPass (tosa-layerwise-constant-fold) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaLayerwiseConstantFoldPass (tosa-layerwise-constant-fold) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaMakeBroadcastable (tosa-make-broadcastable) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaMakeBroadcastable (tosa-make-broadcastable) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaValidation (tosa-validate) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaValidation (tosa-validate) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaToLinalg (tosa-to-linalg) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToLinalg (tosa-to-linalg) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaToArith (tosa-to-arith) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToArith (tosa-to-arith) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before TosaToTensor (tosa-to-tensor) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = "tosa.reshape"(%1) {new_shape = array<i64: 3, 1>} : (tensor<1x3xf32>) -> tensor<3x1xf32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.matmul ins(%0, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %7 : tensor<1xf32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%9 = arith.addf %in, %in_4 : f32
linalg.yield %9 : f32
} -> tensor<2x1xf32>
check.expect_eq(%8, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After TosaToTensor (tosa-to-tensor) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before StripSignedness (iree-flow-strip-signedness) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After StripSignedness (iree-flow-strip-signedness) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before LinalgQuantizedMatmulToMatmulPass (iree-linalg-quantized-matmul-to-matmul) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After LinalgQuantizedMatmulToMatmulPass (iree-linalg-quantized-matmul-to-matmul) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before LinalgQuantizedConvToConvPass (iree-linalg-quantized-conv-to-conv) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After LinalgQuantizedConvToConvPass (iree-linalg-quantized-conv-to-conv) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before VerifyCompilerTOSAInputLegality (iree-tosa-verify-compiler-input-legality) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After VerifyCompilerTOSAInputLegality (iree-tosa-verify-compiler-input-legality) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before IREEImportPublic (iree-import-public) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After IREEImportPublic (iree-import-public) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before ImportMLProgram (iree-import-ml-program) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ImportMLProgram (iree-import-ml-program) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before SanitizeModuleNames (iree-sanitize-module-names) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After SanitizeModuleNames (iree-sanitize-module-names) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass (iree-abi-wrap-entry-points) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass (iree-abi-wrap-entry-points) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Inliner (inline) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Inliner (inline) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = tensor.empty() : tensor<2x1xf32>
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %6 : tensor<1xf32>, tensor<2x1xf32>) outs(%5 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%8 = arith.addf %in, %in_4 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SymbolDCE (symbol-dce) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After SymbolDCE (symbol-dce) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before DemoteF64ToF32 (iree-util-demote-f64-to-f32) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After DemoteF64ToF32 (iree-util-demote-f64-to-f32) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before DetachElementwiseFromNamedOps (iree-flow-detach-elementwise-from-named-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before DetachElementwiseFromNamedOps (iree-flow-detach-elementwise-from-named-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After DetachElementwiseFromNamedOps (iree-flow-detach-elementwise-from-named-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before LinalgNamedOpConversion (linalg-named-op-conversion) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After LinalgNamedOpConversion (linalg-named-op-conversion) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Convert1X1FilterConv2DToMatmul (iree-flow-convert-1x1-filter-conv2d-to-matmul) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Convert1X1FilterConv2DToMatmul (iree-flow-convert-1x1-filter-conv2d-to-matmul) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After DetachElementwiseFromNamedOps (iree-flow-detach-elementwise-from-named-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before LinalgNamedOpConversion (linalg-named-op-conversion) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After LinalgNamedOpConversion (linalg-named-op-conversion) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Convert1X1FilterConv2DToMatmul (iree-flow-convert-1x1-filter-conv2d-to-matmul) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After Convert1X1FilterConv2DToMatmul (iree-flow-convert-1x1-filter-conv2d-to-matmul) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before EraseUnusedLinalgOperands (iree-flow-erase-unused-linalg-operands) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After EraseUnusedLinalgOperands (iree-flow-erase-unused-linalg-operands) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before VerifyInputLegality (iree-verify-input-legality) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After VerifyInputLegality (iree-verify-input-legality) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before ExpandTensorShapes (iree-flow-expand-tensor-shapes) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ExpandTensorShapes (iree-flow-expand-tensor-shapes) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FixedPointIterator (iree-util-fixed-point-iterator) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module attributes {iree.fixedpoint.iteration = 0 : index} {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module attributes {iree.fixedpoint.iteration = 0 : index} {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module attributes {iree.fixedpoint.iteration = 0 : index} {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module attributes {iree.fixedpoint.iteration = 0 : index} {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module attributes {iree.fixedpoint.iteration = 0 : index} {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module attributes {iree.fixedpoint.iteration = 0 : index} {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After FixedPointIterator (iree-util-fixed-point-iterator) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before TensorPadToTensorInsertSlice (iree-flow-tensor-pad-to-tensor-insert-slice) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After TensorPadToTensorInsertSlice (iree-flow-tensor-pad-to-tensor-insert-slice) //----- //
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before ConvertElementwiseToLinalg (convert-elementwise-to-linalg) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before ConvertElementwiseToLinalg (convert-elementwise-to-linalg) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After ConvertElementwiseToLinalg (convert-elementwise-to-linalg) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before LinalgFoldUnitExtentDims (linalg-fold-unit-extent-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After LinalgFoldUnitExtentDims (linalg-fold-unit-extent-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before RaiseSpecialOps (iree-flow-raise-special-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After ConvertElementwiseToLinalg (convert-elementwise-to-linalg) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before LinalgFoldUnitExtentDims (linalg-fold-unit-extent-dims) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<1xf32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%7 = arith.addf %in, %in_4 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After RaiseSpecialOps (iree-flow-raise-special-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before FusionOfTensorOps (iree-flow-fusion-of-tensor-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After LinalgFoldUnitExtentDims (linalg-fold-unit-extent-dims) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before RaiseSpecialOps (iree-flow-raise-special-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After FusionOfTensorOps (iree-flow-fusion-of-tensor-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before LinalgDetensorize (linalg-detensorize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After RaiseSpecialOps (iree-flow-raise-special-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After LinalgDetensorize (linalg-detensorize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CollapseDims (iree-flow-collapse-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CollapseDims (iree-flow-collapse-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SplitReduction (iree-flow-split-reduction-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After SplitReduction (iree-flow-split-reduction-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before FormDispatchRegions (iree-flow-form-dispatch-regions) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After FormDispatchRegions (iree-flow-form-dispatch-regions) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CollapseDimensions (iree-flow-collapse-dimensions) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CollapseDimensions (iree-flow-collapse-dimensions) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before FormDispatchWorkgroups (iree-flow-form-dispatch-workgroups) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After FormDispatchWorkgroups (iree-flow-form-dispatch-workgroups) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CaptureDispatchDynamicDims (iree-flow-capture-dispatch-dynamic-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After CaptureDispatchDynamicDims (iree-flow-capture-dispatch-dynamic-dims) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before FusionOfTensorOps (iree-flow-fusion-of-tensor-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%collapsed_5 = tensor.collapse_shape %5 [[0, 1]] : tensor<2x1xf32> into tensor<2xf32>
%6 = tensor.empty() : tensor<2xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%collapsed_4, %collapsed_5 : tensor<f32>, tensor<2xf32>) outs(%6 : tensor<2xf32>) {
^bb0(%in: f32, %in_7: f32, %out: f32):
%8 = arith.addf %in, %in_7 : f32
linalg.yield %8 : f32
} -> tensor<2xf32>
%expanded_6 = tensor.expand_shape %7 [[0, 1]] : tensor<2xf32> into tensor<2x1xf32>
check.expect_eq(%expanded_6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before InitializeEmptyTensors (iree-flow-initialize-empty-tensors) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After InitializeEmptyTensors (iree-flow-initialize-empty-tensors) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After FusionOfTensorOps (iree-flow-fusion-of-tensor-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before LinalgDetensorize (linalg-detensorize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After LinalgDetensorize (linalg-detensorize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = tensor.empty() : tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%6 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CollapseDims (iree-flow-collapse-dims) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After CollapseDims (iree-flow-collapse-dims) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SplitReduction (iree-flow-split-reduction-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After SplitReduction (iree-flow-split-reduction-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After InterchangeGenericOps (iree-flow-interchange-generic-ops) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before FormDispatchRegions (iree-flow-form-dispatch-regions) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%5 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%7 = arith.addf %in, %in_5 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
check.expect_eq(%6, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After FormDispatchRegions (iree-flow-form-dispatch-regions) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c1_5 = arith.constant 1 : index
%5 = affine.apply affine_map<()[s0, s1, s2] -> ((s1 - s0) ceildiv s2)>()[%c0, %c2, %c1_5]
%c0_6 = arith.constant 0 : index
%c1_7 = arith.constant 1 : index
%c1_8 = arith.constant 1 : index
%6 = affine.apply affine_map<()[s0, s1, s2] -> ((s1 - s0) ceildiv s2)>()[%c0_6, %c1_7, %c1_8]
%7 = flow.dispatch.region[%5, %6] -> (tensor<2x1xf32>) {
%8 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %8 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_9: f32, %out: f32):
%10 = arith.addf %in, %in_9 : f32
linalg.yield %10 : f32
} -> tensor<2x1xf32>
flow.return %9 : tensor<2x1xf32>
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CollapseDimensions (iree-flow-collapse-dimensions) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c1_5 = arith.constant 1 : index
%5 = affine.apply affine_map<()[s0, s1, s2] -> ((s1 - s0) ceildiv s2)>()[%c0, %c2, %c1_5]
%c0_6 = arith.constant 0 : index
%c1_7 = arith.constant 1 : index
%c1_8 = arith.constant 1 : index
%6 = affine.apply affine_map<()[s0, s1, s2] -> ((s1 - s0) ceildiv s2)>()[%c0_6, %c1_7, %c1_8]
%7 = flow.dispatch.region[%5, %6] -> (tensor<2x1xf32>) {
%8 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %8 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_9: f32, %out: f32):
%10 = arith.addf %in, %in_9 : f32
linalg.yield %10 : f32
} -> tensor<2x1xf32>
flow.return %9 : tensor<2x1xf32>
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%7, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After CollapseDimensions (iree-flow-collapse-dimensions) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%5 = flow.dispatch.region[%c2, %c1] -> (tensor<2x1xf32>) {
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %6 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
flow.return %7 : tensor<2x1xf32>
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump Before FormDispatchWorkgroups (iree-flow-form-dispatch-workgroups) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_1 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_2 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_3 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_3 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_2 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_1 : tensor<1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%collapsed = tensor.collapse_shape %1 [[0, 1]] : tensor<1x3xf32> into tensor<3xf32>
%expanded = tensor.expand_shape %collapsed [[0, 1]] : tensor<3xf32> into tensor<3x1xf32>
%collapsed_4 = tensor.collapse_shape %2 [] : tensor<1xf32> into tensor<f32>
%5 = flow.dispatch.region[%c2, %c1] -> (tensor<2x1xf32>) {
%6 = linalg.matmul ins(%0, %expanded : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%collapsed_4, %6 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_5: f32, %out: f32):
%8 = arith.addf %in, %in_5 : f32
linalg.yield %8 : f32
} -> tensor<2x1xf32>
flow.return %7 : tensor<2x1xf32>
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst_0) : tensor<2x1xf32>
return
}
// -----// IR Dump After FormDispatchWorkgroups (iree-flow-form-dispatch-workgroups) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CaptureDispatchDynamicDims (iree-flow-capture-dispatch-dynamic-dims) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After CaptureDispatchDynamicDims (iree-flow-capture-dispatch-dynamic-dims) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before InitializeEmptyTensors (iree-flow-initialize-empty-tensors) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After InitializeEmptyTensors (iree-flow-initialize-empty-tensors) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before OutlineDispatchRegions (iree-flow-outline-dispatch-regions) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch.workgroups[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32> =
(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst_3 = arith.constant 0.000000e+00 : f32
%6 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%7 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%8 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%9 = tensor.empty() : tensor<2x1xf32>
%10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x1xf32>) -> tensor<2x1xf32>
%11 = linalg.matmul ins(%6, %7 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%10 : tensor<2x1xf32>) -> tensor<2x1xf32>
%12 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%8, %11 : tensor<f32>, tensor<2x1xf32>) outs(%9 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_4: f32, %out: f32):
%13 = arith.addf %in, %in_4 : f32
linalg.yield %13 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %12, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
flow.return
} count(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After OutlineDispatchRegions (iree-flow-outline-dispatch-regions) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before StripDebugOps (iree-util-strip-debug-ops) //----- //
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After StripDebugOps (iree-util-strip-debug-ops) //----- //
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before DeduplicateExecutables (iree-flow-deduplicate-executables) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After DeduplicateExecutables (iree-flow-deduplicate-executables) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before CleanupTensorShapes (iree-flow-cleanup-tensor-shapes) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump Before CleanupTensorShapes (iree-flow-cleanup-tensor-shapes) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After CleanupTensorShapes (iree-flow-cleanup-tensor-shapes) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CleanupTensorShapes (iree-flow-cleanup-tensor-shapes) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump Before CSE (cse) //----- //
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SymbolDCE (symbol-dce) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After SymbolDCE (symbol-dce) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before VerifyInput (iree-stream-verify-input) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After VerifyInput (iree-stream-verify-input) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before OutlineConstants (iree-stream-outline-constants) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_2 = arith.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = util.optimization_barrier %cst_2 : tensor<2x3xf32>
%1 = util.optimization_barrier %cst_1 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst_0 : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %cst) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After OutlineConstants (iree-stream-outline-constants) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%_constant = util.global.load @_constant : tensor<2x1xf32>
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%_constant = util.global.load @_constant : tensor<2x1xf32>
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before ConvertToStream (iree-stream-conversion) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant {noinline} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global private @_constant_0 {noinline} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global private @_constant_1 {noinline} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
flow.executable private @_tensor_float_dispatch_0 {
flow.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
flow.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !flow.dispatch.tensor<readonly:tensor<2x3xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<3x1xf32>>, %arg2: !flow.dispatch.tensor<readonly:tensor<f32>>, %arg3: !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%1 = flow.dispatch.tensor.load %arg1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%2 = flow.dispatch.tensor.load %arg2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x1xf32>) -> tensor<2x1xf32>
%5 = linalg.matmul ins(%0, %1 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%4 : tensor<2x1xf32>) -> tensor<2x1xf32>
%6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%2, %5 : tensor<f32>, tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %6, %arg3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%_constant = util.global.load @_constant : tensor<2x1xf32>
%_constant_0 = util.global.load @_constant_0 : tensor<1x3xf32>
%_constant_1 = util.global.load @_constant_1 : tensor<2x3xf32>
%0 = util.optimization_barrier %_constant_1 : tensor<2x3xf32>
%1 = util.optimization_barrier %_constant_0 : tensor<1x3xf32>
%2 = util.optimization_barrier %cst : tensor<1xf32>
%3 = flow.tensor.reshape %1 : tensor<1x3xf32> -> tensor<3x1xf32>
%4 = flow.tensor.reshape %2 : tensor<1xf32> -> tensor<f32>
%5 = flow.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%0, %3, %4) : (tensor<2x3xf32>, tensor<3x1xf32>, tensor<f32>) -> tensor<2x1xf32>
check.expect_eq(%5, %_constant) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ConvertToStream (iree-stream-conversion) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%cst = arith.constant 0.000000e+00 : f32
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = stream.tensor.constant : tensor<1xf32> in !stream.resource<constant> = dense<1.000000e+00> : tensor<1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
%1 = stream.async.transfer %cst : !stream.resource<constant>{%0} -> !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.resource.size %6 : !stream.resource<*>
%9 = stream.tensor.sizeof tensor<3x1xf32> : index
%10 = stream.tensor.clone %6 : tensor<1x3xf32> in !stream.resource<*>{%8} -> tensor<3x1xf32> in !stream.resource<*>{%9}
%11 = stream.resource.size %7 : !stream.resource<*>
%12 = stream.tensor.sizeof tensor<f32> : index
%13 = stream.tensor.clone %7 : tensor<1xf32> in !stream.resource<*>{%11} -> tensor<f32> in !stream.resource<*>{%12}
%c0 = arith.constant 0 : index
%14 = stream.resource.size %5 : !stream.resource<*>
%15 = stream.tensor.sizeof tensor<2x1xf32> : index
%16 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %14 for %14], %10[%c0 to %9 for %9], %13[%c0 to %12 for %12]) : (!stream.resource<*>{%14}, !stream.resource<*>{%9}, !stream.resource<*>{%12}) -> !stream.resource<*>{%15}
%17 = stream.async.transfer %16 : !stream.resource<*>{%15} -> !stream.resource<external>{%15}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%15} -> tensor<2x1xf32>
%19 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%20 = stream.tensor.export %19 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%18, %20) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before VerifyLoweringToTensors (iree-stream-verify-lowering-to-tensors) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%cst = arith.constant 0.000000e+00 : f32
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = stream.tensor.constant : tensor<1xf32> in !stream.resource<constant> = dense<1.000000e+00> : tensor<1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
%1 = stream.async.transfer %cst : !stream.resource<constant>{%0} -> !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.resource.size %6 : !stream.resource<*>
%9 = stream.tensor.sizeof tensor<3x1xf32> : index
%10 = stream.tensor.clone %6 : tensor<1x3xf32> in !stream.resource<*>{%8} -> tensor<3x1xf32> in !stream.resource<*>{%9}
%11 = stream.resource.size %7 : !stream.resource<*>
%12 = stream.tensor.sizeof tensor<f32> : index
%13 = stream.tensor.clone %7 : tensor<1xf32> in !stream.resource<*>{%11} -> tensor<f32> in !stream.resource<*>{%12}
%c0 = arith.constant 0 : index
%14 = stream.resource.size %5 : !stream.resource<*>
%15 = stream.tensor.sizeof tensor<2x1xf32> : index
%16 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %14 for %14], %10[%c0 to %9 for %9], %13[%c0 to %12 for %12]) : (!stream.resource<*>{%14}, !stream.resource<*>{%9}, !stream.resource<*>{%12}) -> !stream.resource<*>{%15}
%17 = stream.async.transfer %16 : !stream.resource<*>{%15} -> !stream.resource<external>{%15}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%15} -> tensor<2x1xf32>
%19 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%20 = stream.tensor.export %19 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%18, %20) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After VerifyLoweringToTensors (iree-stream-verify-lowering-to-tensors) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%cst = arith.constant 0.000000e+00 : f32
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = stream.tensor.constant : tensor<1xf32> in !stream.resource<constant> = dense<1.000000e+00> : tensor<1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
%1 = stream.async.transfer %cst : !stream.resource<constant>{%0} -> !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.resource.size %6 : !stream.resource<*>
%9 = stream.tensor.sizeof tensor<3x1xf32> : index
%10 = stream.tensor.clone %6 : tensor<1x3xf32> in !stream.resource<*>{%8} -> tensor<3x1xf32> in !stream.resource<*>{%9}
%11 = stream.resource.size %7 : !stream.resource<*>
%12 = stream.tensor.sizeof tensor<f32> : index
%13 = stream.tensor.clone %7 : tensor<1xf32> in !stream.resource<*>{%11} -> tensor<f32> in !stream.resource<*>{%12}
%c0 = arith.constant 0 : index
%14 = stream.resource.size %5 : !stream.resource<*>
%15 = stream.tensor.sizeof tensor<2x1xf32> : index
%16 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %14 for %14], %10[%c0 to %9 for %9], %13[%c0 to %12 for %12]) : (!stream.resource<*>{%14}, !stream.resource<*>{%9}, !stream.resource<*>{%12}) -> !stream.resource<*>{%15}
%17 = stream.async.transfer %16 : !stream.resource<*>{%15} -> !stream.resource<external>{%15}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%15} -> tensor<2x1xf32>
%19 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%20 = stream.tensor.export %19 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%18, %20) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
// -----// IR Dump Before CSE (cse) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst = stream.tensor.constant : tensor<1xf32> in !stream.resource<constant> = dense<1.000000e+00> : tensor<1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
%1 = stream.async.transfer %cst : !stream.resource<constant>{%0} -> !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.resource.size %6 : !stream.resource<*>
%9 = stream.tensor.sizeof tensor<3x1xf32> : index
%10 = stream.tensor.clone %6 : tensor<1x3xf32> in !stream.resource<*>{%8} -> tensor<3x1xf32> in !stream.resource<*>{%9}
%11 = stream.resource.size %7 : !stream.resource<*>
%12 = stream.tensor.sizeof tensor<f32> : index
%13 = stream.tensor.clone %7 : tensor<1xf32> in !stream.resource<*>{%11} -> tensor<f32> in !stream.resource<*>{%12}
%c0 = arith.constant 0 : index
%14 = stream.resource.size %5 : !stream.resource<*>
%15 = stream.tensor.sizeof tensor<2x1xf32> : index
%16 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %14 for %14], %10[%c0 to %9 for %9], %13[%c0 to %12 for %12]) : (!stream.resource<*>{%14}, !stream.resource<*>{%9}, !stream.resource<*>{%12}) -> !stream.resource<*>{%15}
%17 = stream.async.transfer %16 : !stream.resource<*>{%15} -> !stream.resource<external>{%15}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%15} -> tensor<2x1xf32>
%19 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%20 = stream.tensor.export %19 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%18, %20) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
// -----// IR Dump Before CSE (cse) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
// -----// IR Dump After CSE (cse) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 1.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 1.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump Before CSE (cse) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
// -----// IR Dump After CSE (cse) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 1.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%cst = arith.constant 1.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%cst = arith.constant 1.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%cst = arith.constant 0.000000e+00 : f32
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%cst = arith.constant 1.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before CombineInitializers (iree-util-combine-initializers) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
util.initializer.return
}
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_0 : !stream.resource<constant>
util.global.store %0, @_constant_0__size : index
util.initializer.return
}
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant_1 : !stream.resource<constant>
util.global.store %0, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After CombineInitializers (iree-util-combine-initializers) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
%cst_0 = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%1 = stream.resource.size %cst_0 : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %1, @_constant_0__size : index
%cst_1 = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%2 = stream.resource.size %cst_1 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %2, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before EncodeHostTensors (iree-stream-encode-host-tensors) //----- //
util.initializer {
%cst = stream.tensor.constant : tensor<2x1xf32> in !stream.resource<constant> = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%0 = stream.resource.size %cst : !stream.resource<constant>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %0, @_constant__size : index
%cst_0 = stream.tensor.constant : tensor<1x3xf32> in !stream.resource<constant> = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%1 = stream.resource.size %cst_0 : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %1, @_constant_0__size : index
%cst_1 = stream.tensor.constant : tensor<2x3xf32> in !stream.resource<constant> = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
%2 = stream.resource.size %cst_1 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %2, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump Before EncodeDeviceTensors (iree-stream-encode-device-tensors) //----- //
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump Before EncodeHostTensors (iree-stream-encode-host-tensors) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before EncodeHostTensors (iree-stream-encode-host-tensors) //----- //
func.func private @_tensor_float() {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e+00 : f32
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.tensor.sizeof tensor<1xf32> : index
%1 = stream.tensor.splat %cst : f32 -> tensor<1xf32> in !stream.resource<*>{%0}
%2 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%3 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%4 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%5 = util.optimization_barrier %4 : !stream.resource<*>
%6 = util.optimization_barrier %3 : !stream.resource<*>
%7 = util.optimization_barrier %1 : !stream.resource<*>
%8 = stream.tensor.sizeof tensor<3x1xf32> : index
%9 = stream.tensor.sizeof tensor<f32> : index
%10 = stream.resource.size %5 : !stream.resource<*>
%11 = stream.tensor.sizeof tensor<2x1xf32> : index
%12 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%5[%c0 to %10 for %10], %6[%c0 to %8 for %8], %7[%c0 to %9 for %9]) : (!stream.resource<*>{%10}, !stream.resource<*>{%8}, !stream.resource<*>{%9}) -> !stream.resource<*>{%11}
%13 = stream.async.transfer %12 : !stream.resource<*>{%11} -> !stream.resource<external>{%11}
%14 = stream.tensor.export %13 : tensor<2x1xf32> in !stream.resource<external>{%11} -> tensor<2x1xf32>
%15 = stream.async.transfer %2 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%14, %16) : tensor<2x1xf32>
return
}
// -----// IR Dump After EncodeHostTensors (iree-stream-encode-host-tensors) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump After EncodeHostTensors (iree-stream-encode-host-tensors) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After EncodeDeviceTensors (iree-stream-encode-device-tensors) //----- //
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
// -----// IR Dump After EncodeHostTensors (iree-stream-encode-host-tensors) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%1 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%2 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%3 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%4 = util.optimization_barrier %3 : !stream.resource<*>
%5 = util.optimization_barrier %2 : !stream.resource<*>
%6 = util.optimization_barrier %0 : !stream.resource<*>
%7 = stream.resource.size %4 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%4[%c0 to %7 for %7], %5[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %1 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before MaterializeBuiltins (iree-stream-materialize-builtins) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%1 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%2 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%3 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%4 = util.optimization_barrier %3 : !stream.resource<*>
%5 = util.optimization_barrier %2 : !stream.resource<*>
%6 = util.optimization_barrier %0 : !stream.resource<*>
%7 = stream.resource.size %4 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%4[%c0 to %7 for %7], %5[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %1 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After MaterializeBuiltins (iree-stream-materialize-builtins) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%1 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%2 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%3 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%4 = util.optimization_barrier %3 : !stream.resource<*>
%5 = util.optimization_barrier %2 : !stream.resource<*>
%6 = util.optimization_barrier %0 : !stream.resource<*>
%7 = stream.resource.size %4 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%4[%c0 to %7 for %7], %5[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %1 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%1 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%2 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%3 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%4 = util.optimization_barrier %3 : !stream.resource<*>
%5 = util.optimization_barrier %2 : !stream.resource<*>
%6 = util.optimization_barrier %0 : !stream.resource<*>
%7 = stream.resource.size %4 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%4[%c0 to %7 for %7], %5[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %1 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump Before CSE (cse) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %c8, @_constant__size : index
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.initializer.return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.global.store %c8, @_constant__size : index
util.initializer.return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size : index
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__size : index
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__size : index
util.global.store %c8, @_constant__size : index
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size = 8 : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size = 12 : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size = 24 : index
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant__size = 8 : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_0__size = 12 : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private @_constant_1__size = 24 : index
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__size = util.global.load @_constant__size : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__size = util.global.load @_constant_0__size : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__size = util.global.load @_constant_1__size : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%_constant__size} -> !stream.resource<*>{%_constant__size}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%_constant_0__size} -> !stream.resource<*>{%_constant_0__size}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%_constant_1__size} -> !stream.resource<*>{%_constant_1__size}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%_constant__size} -> !stream.resource<external>{%_constant__size}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%_constant__size} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before MaterializeCopyOnWrite (iree-stream-materialize-copy-on-write) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before MaterializeCopyOnWrite (iree-stream-materialize-copy-on-write) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before MaterializeCopyOnWrite (iree-stream-materialize-copy-on-write) //----- //
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After MaterializeCopyOnWrite (iree-stream-materialize-copy-on-write) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After MaterializeCopyOnWrite (iree-stream-materialize-copy-on-write) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After MaterializeCopyOnWrite (iree-stream-materialize-copy-on-write) //----- //
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ElideAsyncCopies (iree-stream-elide-async-copies) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ElideAsyncCopies (iree-stream-elide-async-copies) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before EmplaceAllocations (iree-stream-emplace-allocations) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before EmplaceAllocations (iree-stream-emplace-allocations) //----- //
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After EmplaceAllocations (iree-stream-emplace-allocations) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before EmplaceAllocations (iree-stream-emplace-allocations) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After EmplaceAllocations (iree-stream-emplace-allocations) //----- //
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After EmplaceAllocations (iree-stream-emplace-allocations) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before RefineUsage (iree-stream-refine-usage) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c24 = arith.constant 24 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<*>{%c8}
%1 = stream.async.transfer %_constant_0 : !stream.resource<constant>{%c12} -> !stream.resource<*>{%c12}
%2 = stream.async.transfer %_constant_1 : !stream.resource<constant>{%c24} -> !stream.resource<*>{%c24}
%3 = util.optimization_barrier %2 : !stream.resource<*>
%4 = util.optimization_barrier %1 : !stream.resource<*>
%5 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<*>{%c4}
%6 = util.optimization_barrier %5 : !stream.resource<*>
%7 = stream.resource.size %3 : !stream.resource<*>
%8 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%3[%c0 to %7 for %7], %4[%c0 to %c12 for %c12], %6[%c0 to %c4 for %c4]) : (!stream.resource<*>{%7}, !stream.resource<*>{%c12}, !stream.resource<*>{%c4}) -> !stream.resource<*>{%c8}
%9 = stream.async.transfer %8 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.async.transfer %0 : !stream.resource<*>{%c8} -> !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After RefineUsage (iree-stream-refine-usage) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before CSE (cse) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before ScheduleExecution (iree-stream-schedule-execution) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
util.global.store %cst, @_constant : !stream.resource<constant>
util.global.store %cst_0, @_constant_0 : !stream.resource<constant>
util.global.store %cst_1, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before ScheduleExecution (iree-stream-schedule-execution) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before ScheduleExecution (iree-stream-schedule-execution) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.async.transfer %_constant : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
%1 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%2 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%3 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
%4 = util.optimization_barrier %3 : !stream.resource<transient>
%5 = stream.resource.size %1 : !stream.resource<constant>
%6 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%1[%c0 to %5 for %5], %2[%c0 to %c12 for %c12], %4[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%5}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
%7 = stream.tensor.export %6 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%8 = stream.tensor.export %0 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%7, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump After ScheduleExecution (iree-stream-schedule-execution) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before ScheduleConcurrency (iree-stream-schedule-concurrency) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After ScheduleConcurrency (iree-stream-schedule-concurrency) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After ScheduleExecution (iree-stream-schedule-execution) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
%0:3 = stream.timepoint.await %result_timepoint => %results#0, %results#1, %results#2 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
util.global.store %0#0, @_constant : !stream.resource<constant>
util.global.store %0#1, @_constant_0 : !stream.resource<constant>
util.global.store %0#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before ScheduleConcurrency (iree-stream-schedule-concurrency) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
%0:3 = stream.timepoint.await %result_timepoint => %results#0, %results#1, %results#2 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
util.global.store %0#0, @_constant : !stream.resource<constant>
util.global.store %0#1, @_constant_0 : !stream.resource<constant>
util.global.store %0#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After ScheduleExecution (iree-stream-schedule-execution) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%9 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%0 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%1 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%9 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %9 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%2 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%3 = util.optimization_barrier %2 : !stream.resource<transient>
%4 = stream.resource.size %0 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%0 as %arg0: !stream.resource<constant>{%4}, %1 as %arg1: !stream.resource<constant>{%c12}, %3 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%9 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %4 for %4], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%4}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%5 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%6 = stream.tensor.export %5 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%7 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%8 = stream.tensor.export %7 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%6, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ScheduleConcurrency (iree-stream-schedule-concurrency) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%9 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%0 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%1 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%9 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %9 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%2 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%3 = util.optimization_barrier %2 : !stream.resource<transient>
%4 = stream.resource.size %0 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%0 as %arg0: !stream.resource<constant>{%4}, %1 as %arg1: !stream.resource<constant>{%c12}, %3 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%9 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %4 for %4], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%4}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%5 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%6 = stream.tensor.export %5 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%7 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%8 = stream.tensor.export %7 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%6, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump After ScheduleConcurrency (iree-stream-schedule-concurrency) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
%0:3 = stream.timepoint.await %result_timepoint => %results#0, %results#1, %results#2 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
util.global.store %0#0, @_constant : !stream.resource<constant>
util.global.store %0#1, @_constant_0 : !stream.resource<constant>
util.global.store %0#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After ScheduleConcurrency (iree-stream-schedule-concurrency) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%9 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%0 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%1 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%9 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %9 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%2 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%3 = util.optimization_barrier %2 : !stream.resource<transient>
%4 = stream.resource.size %0 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%0 as %arg0: !stream.resource<constant>{%4}, %1 as %arg1: !stream.resource<constant>{%c12}, %3 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%9 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %4 for %4], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%4}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%5 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%6 = stream.tensor.export %5 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%7 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%8 = stream.tensor.export %7 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%6, %8) : tensor<2x1xf32>
return
}
// -----// IR Dump Before PropagateTimepoints (iree-stream-propagate-timepoints) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
%0:3 = stream.timepoint.await %result_timepoint => %results#0, %results#1, %results#2 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
util.global.store %0#0, @_constant : !stream.resource<constant>
util.global.store %0#1, @_constant_0 : !stream.resource<constant>
util.global.store %0#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%9 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%0 = util.optimization_barrier %_constant_1 : !stream.resource<constant>
%1 = util.optimization_barrier %_constant_0 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%9 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %9 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%2 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%3 = util.optimization_barrier %2 : !stream.resource<transient>
%4 = stream.resource.size %0 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%0 as %arg0: !stream.resource<constant>{%4}, %1 as %arg1: !stream.resource<constant>{%c12}, %3 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%9 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %4 for %4], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%4}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %9 : !stream.resource<external>{%c8}
} => !stream.timepoint
%5 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%6 = stream.tensor.export %5 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%7 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%8 = stream.tensor.export %7 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%6, %8) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After PropagateTimepoints (iree-stream-propagate-timepoints) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant_0__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_1__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
%0:3 = stream.timepoint.await %result_timepoint => %results#0, %results#1, %results#2 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%0 = stream.timepoint.await %_constant__timepoint => %_constant : !stream.resource<constant>{%c8}
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_0 : !stream.resource<constant>
%2 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%1}
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%3 = stream.resource.size %_constant_1 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%3}
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%18 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %18 : !stream.resource<external>{%c8}
} => !stream.timepoint
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = util.optimization_barrier %2 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%18 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %18 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%7 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%8 = util.optimization_barrier %7 : !stream.resource<transient>
%9 = stream.resource.size %5 : !stream.resource<constant>
%10 = stream.timepoint.immediate => !stream.timepoint
%11 = stream.timepoint.immediate => !stream.timepoint
%12 = stream.timepoint.immediate => !stream.timepoint
%13 = stream.timepoint.immediate => !stream.timepoint
%results_2, %result_timepoint_3 = stream.async.execute await(%13) => with(%5 as %arg0: !stream.resource<constant>{%9}, %6 as %arg1: !stream.resource<constant>{%c12}, %8 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%18 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %9 for %9], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%9}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %18 : !stream.resource<external>{%c8}
} => !stream.timepoint
%14 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%15 = stream.tensor.export %14 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%16 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%17 = stream.tensor.export %16 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%15, %17) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
%0:3 = stream.timepoint.await %result_timepoint => %results#0, %results#1, %results#2 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%0 = stream.timepoint.await %_constant__timepoint => %_constant : !stream.resource<constant>{%c8}
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_0 : !stream.resource<constant>
%2 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%1}
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%3 = stream.resource.size %_constant_1 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%3}
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%18 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %18 : !stream.resource<external>{%c8}
} => !stream.timepoint
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = util.optimization_barrier %2 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%18 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %18 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%7 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%8 = util.optimization_barrier %7 : !stream.resource<transient>
%9 = stream.resource.size %5 : !stream.resource<constant>
%10 = stream.timepoint.immediate => !stream.timepoint
%11 = stream.timepoint.immediate => !stream.timepoint
%12 = stream.timepoint.immediate => !stream.timepoint
%13 = stream.timepoint.immediate => !stream.timepoint
%results_2, %result_timepoint_3 = stream.async.execute await(%13) => with(%5 as %arg0: !stream.resource<constant>{%9}, %6 as %arg1: !stream.resource<constant>{%c12}, %8 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%18 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %9 for %9], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%9}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %18 : !stream.resource<external>{%c8}
} => !stream.timepoint
%14 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%15 = stream.tensor.export %14 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%16 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%17 = stream.tensor.export %16 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%15, %17) : tensor<2x1xf32>
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before CSE (cse) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.initializer.return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant_0__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_1__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ApplyPatterns (iree-util-apply-patterns) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant_0__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_1__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant_0__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_1__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FoldGlobals (iree-util-fold-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant_0__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_1__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant_0__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_1__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_0__timepoint : !stream.timepoint
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant_1__timepoint : !stream.timepoint
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant_1__timepoint = util.global.load @_constant_1__timepoint : !stream.timepoint
%_constant_0__timepoint = util.global.load @_constant_0__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant_1__timepoint => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant_0__timepoint => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_0, %result_timepoint_1 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_1 => %results_0 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FuseGlobals (iree-util-fuse-globals) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_4, %result_timepoint_5 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_5 => %results_4 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_4, %result_timepoint_5 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_5 => %results_4 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After IPO (iree-util-ipo) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_4, %result_timepoint_5 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_5 => %results_4 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before VerifyLoweringToAsync (iree-stream-verify-lowering-to-async) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_4, %result_timepoint_5 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_5 => %results_4 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After VerifyLoweringToAsync (iree-stream-verify-lowering-to-async) //----- //
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_4, %result_timepoint_5 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_5 => %results_4 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before ScheduleAllocation (iree-stream-schedule-allocation) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before ScheduleAllocation (iree-stream-schedule-allocation) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%results, %result_timepoint = stream.async.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}) -> !stream.resource<external>{%c8} {
%13 = stream.async.transfer %arg0 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%3 = util.optimization_barrier %2 : !stream.resource<constant>
%4 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%results_2, %result_timepoint_3 = stream.async.execute with() -> !stream.resource<transient>{%c4} {
%13 = stream.async.splat %c1065353216_i32 : i32 -> !stream.resource<transient>{%c4}
stream.yield %13 : !stream.resource<transient>{%c4}
} => !stream.timepoint
%6 = stream.timepoint.await %result_timepoint_3 => %results_2 : !stream.resource<transient>{%c4}
%7 = util.optimization_barrier %6 : !stream.resource<transient>
%8 = stream.resource.size %3 : !stream.resource<constant>
%results_4, %result_timepoint_5 = stream.async.execute with(%3 as %arg0: !stream.resource<constant>{%8}, %5 as %arg1: !stream.resource<constant>{%c12}, %7 as %arg2: !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8} {
%13 = stream.async.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1](%arg0[%c0 to %8 for %8], %arg1[%c0 to %c12 for %c12], %arg2[%c0 to %c4 for %c4]) : (!stream.resource<constant>{%8}, !stream.resource<constant>{%c12}, !stream.resource<transient>{%c4}) -> !stream.resource<external>{%c8}
stream.yield %13 : !stream.resource<external>{%c8}
} => !stream.timepoint
%9 = stream.timepoint.await %result_timepoint_5 => %results_4 : !stream.resource<external>{%c8}
%10 = stream.tensor.export %9 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%11 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c8}
%12 = stream.tensor.export %11 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%10, %12) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ScheduleAllocation (iree-stream-schedule-allocation) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.async.execute with() -> (!stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}) {
%cst = stream.async.constant : !stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>
%cst_0 = stream.async.constant : !stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>
%cst_1 = stream.async.constant : !stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
stream.yield %cst, %cst_0, %cst_1 : !stream.resource<constant>{%c8}, !stream.resource<constant>{%c12}, !stream.resource<constant>{%c24}
} => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %result_timepoint, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After ScheduleAllocation (iree-stream-schedule-allocation) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before PackConstants (iree-stream-pack-constants) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After ScheduleAllocation (iree-stream-schedule-allocation) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump Before PackConstants (iree-stream-pack-constants) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump After PackConstants (iree-stream-pack-constants) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before PackAllocations (iree-stream-pack-allocations) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After ScheduleAllocation (iree-stream-schedule-allocation) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.resource.constants :
!stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
!stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
!stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
=> !stream.timepoint
%0 = stream.cmd.execute with() {
} => !stream.timepoint
%1 = stream.timepoint.join max(%result_timepoint, %0) => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump Before PackConstants (iree-stream-pack-constants) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%results:3, %result_timepoint = stream.resource.constants :
!stream.resource<constant>{%c8} = dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
!stream.resource<constant>{%c12} = dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
!stream.resource<constant>{%c24} = dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
=> !stream.timepoint
%0 = stream.cmd.execute with() {
} => !stream.timepoint
%1 = stream.timepoint.join max(%result_timepoint, %0) => !stream.timepoint
util.global.store %results#0, @_constant : !stream.resource<constant>
util.global.store %results#1, @_constant_0 : !stream.resource<constant>
util.global.store %results#2, @_constant_1 : !stream.resource<constant>
util.global.store %1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After PackConstants (iree-stream-pack-constants) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump Before PackAllocations (iree-stream-pack-allocations) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump After PackConstants (iree-stream-pack-constants) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%c0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %1, @_constant : !stream.resource<constant>
util.global.store %2, @_constant_0 : !stream.resource<constant>
util.global.store %3, @_constant_1 : !stream.resource<constant>
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump Before PackAllocations (iree-stream-pack-allocations) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%c0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %1, @_constant : !stream.resource<constant>
util.global.store %2, @_constant_0 : !stream.resource<constant>
util.global.store %3, @_constant_1 : !stream.resource<constant>
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After PackAllocations (iree-stream-pack-allocations) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump Before LayoutSlices (iree-stream-layout-slices) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump After PackAllocations (iree-stream-pack-allocations) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%c0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %1, @_constant : !stream.resource<constant>
util.global.store %2, @_constant_0 : !stream.resource<constant>
util.global.store %3, @_constant_1 : !stream.resource<constant>
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump Before LayoutSlices (iree-stream-layout-slices) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%c0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %1, @_constant : !stream.resource<constant>
util.global.store %2, @_constant_0 : !stream.resource<constant>
util.global.store %3, @_constant_1 : !stream.resource<constant>
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After LayoutSlices (iree-stream-layout-slices) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump After PackAllocations (iree-stream-pack-allocations) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before LayoutSlices (iree-stream-layout-slices) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After LayoutSlices (iree-stream-layout-slices) //----- //
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%c0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %1, @_constant : !stream.resource<constant>
util.global.store %2, @_constant_0 : !stream.resource<constant>
util.global.store %3, @_constant_1 : !stream.resource<constant>
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After LayoutSlices (iree-stream-layout-slices) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before PropagateSubranges (iree-util-propagate-subranges) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%c0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %1, @_constant : !stream.resource<constant>
util.global.store %2, @_constant_0 : !stream.resource<constant>
util.global.store %3, @_constant_1 : !stream.resource<constant>
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.size %_constant_0 : !stream.resource<constant>
%1 = stream.resource.size %_constant_1 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%3 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c8}, %2 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%4 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%1}
%5 = util.optimization_barrier %4 : !stream.resource<constant>
%6 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%0}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %5 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%5 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After PropagateSubranges (iree-util-propagate-subranges) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant__storage_size : index
util.global private mutable @_constant__offset : index
util.global private mutable @_constant__length : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_0__storage_size : index
util.global private mutable @_constant_0__offset : index
util.global private mutable @_constant_0__length : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private mutable @_constant_1__storage_size : index
util.global private mutable @_constant_1__offset : index
util.global private mutable @_constant_1__length : index
util.initializer {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%c0_0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0_0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0_0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0_0], %arg1[%c0_0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0_0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %0#0, @_constant : !stream.resource<constant>
util.global.store %c192, @_constant__storage_size : index
util.global.store %c0_0, @_constant__offset : index
util.global.store %c8, @_constant__length : index
util.global.store %0#0, @_constant_0 : !stream.resource<constant>
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c12, @_constant_0__length : index
util.global.store %0#0, @_constant_1 : !stream.resource<constant>
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c24, @_constant_1__length : index
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant__length = util.global.load @_constant__length : index
%0 = stream.resource.subview %_constant[%_constant__offset] : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<constant>{%_constant__length}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%1 = stream.resource.subview %_constant_0[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%2 = stream.resource.subview %_constant_1[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%3 = stream.resource.size %1 : !stream.resource<constant>
%4 = stream.resource.size %2 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%5 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%6 = stream.cmd.execute await(%_constant__timepoint) => with(%0 as %arg0: !stream.resource<constant>{%c8}, %5 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%7 = stream.timepoint.await %_constant__timepoint_0 => %2 : !stream.resource<constant>{%4}
%8 = util.optimization_barrier %7 : !stream.resource<constant>
%9 = stream.timepoint.await %_constant__timepoint_1 => %1 : !stream.resource<constant>{%3}
%10 = util.optimization_barrier %9 : !stream.resource<constant>
%11 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%12 = stream.cmd.execute with(%11 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%13 = stream.timepoint.await %12 => %11 : !stream.resource<transient>{%c4}
%14 = util.optimization_barrier %13 : !stream.resource<transient>
%15 = stream.resource.size %8 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%16 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%17 = stream.cmd.execute with(%8 as %arg0: !stream.resource<constant>{%15}, %10 as %arg1: !stream.resource<constant>{%c12}, %14 as %arg2: !stream.resource<transient>{%c4}, %16 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %15] : !stream.resource<constant>{%15},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%18 = stream.timepoint.await %17 => %16 : !stream.resource<external>{%c8}
%19 = stream.tensor.export %18 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%20 = stream.timepoint.await %6 => %5 : !stream.resource<external>{%c8}
%21 = stream.tensor.export %20 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%19, %21) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.initializer {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%c0_0 = arith.constant 0 : index
%c192 = arith.constant 192 : index
%did_map, %result = stream.resource.try_map %buffer_cst[%c0_0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%0:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
%6 = stream.timepoint.immediate => !stream.timepoint
scf.yield %result, %6 : !stream.resource<constant>, !stream.timepoint
} else {
%6 = stream.resource.map %buffer_cst[%c0_0] : !util.buffer -> !stream.resource<staging>{%c192}
%7 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%8 = stream.cmd.execute with(%6 as %arg0: !stream.resource<staging>{%c192}, %7 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0_0], %arg1[%c0_0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %7, %8 : !stream.resource<constant>, !stream.timepoint
}
%1 = stream.resource.subview %0#0[%c0_0] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
%c64 = arith.constant 64 : index
%2 = stream.resource.subview %0#0[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%c128 = arith.constant 128 : index
%3 = stream.resource.subview %0#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = stream.cmd.execute with() {
} => !stream.timepoint
%5 = stream.timepoint.join max(%0#1, %4) => !stream.timepoint
util.global.store %0#0, @_constant : !stream.resource<constant>
util.global.store %c192, @_constant__storage_size : index
util.global.store %c0_0, @_constant__offset : index
util.global.store %c8, @_constant__length : index
util.global.store %0#0, @_constant_0 : !stream.resource<constant>
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c12, @_constant_0__length : index
util.global.store %0#0, @_constant_1 : !stream.resource<constant>
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c24, @_constant_1__length : index
util.global.store %5, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant__length = util.global.load @_constant__length : index
%0 = stream.resource.subview %_constant[%_constant__offset] : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<constant>{%_constant__length}
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%1 = stream.resource.subview %_constant_0[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%2 = stream.resource.subview %_constant_1[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%3 = stream.resource.size %1 : !stream.resource<constant>
%4 = stream.resource.size %2 : !stream.resource<constant>
%c0_2 = arith.constant 0 : index
%5 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%6 = stream.cmd.execute await(%_constant__timepoint) => with(%0 as %arg0: !stream.resource<constant>{%c8}, %5 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0_2], %arg1[%c0_2], %c8 : !stream.resource<constant>{%c8} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%7 = stream.timepoint.await %_constant__timepoint_0 => %2 : !stream.resource<constant>{%4}
%8 = util.optimization_barrier %7 : !stream.resource<constant>
%9 = stream.timepoint.await %_constant__timepoint_1 => %1 : !stream.resource<constant>{%3}
%10 = util.optimization_barrier %9 : !stream.resource<constant>
%11 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%c0_3 = arith.constant 0 : index
%12 = stream.cmd.execute with(%11 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0_3 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%13 = stream.timepoint.await %12 => %11 : !stream.resource<transient>{%c4}
%14 = util.optimization_barrier %13 : !stream.resource<transient>
%15 = stream.resource.size %8 : !stream.resource<constant>
%c0_4 = arith.constant 0 : index
%16 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%17 = stream.cmd.execute with(%8 as %arg0: !stream.resource<constant>{%15}, %10 as %arg1: !stream.resource<constant>{%c12}, %14 as %arg2: !stream.resource<transient>{%c4}, %16 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %15] : !stream.resource<constant>{%15},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0_4 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%18 = stream.timepoint.await %17 => %16 : !stream.resource<external>{%c8}
%19 = stream.tensor.export %18 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%20 = stream.timepoint.await %6 => %5 : !stream.resource<external>{%c8}
%21 = stream.tensor.export %20 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%19, %21) : tensor<2x1xf32>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c128 = arith.constant 128 : index
%c64 = arith.constant 64 : index
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %c192, @_constant__storage_size : index
util.global.store %c0, @_constant__offset : index
util.global.store %c8, @_constant__length : index
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c12, @_constant_0__length : index
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c24, @_constant_1__length : index
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump Before CSE (cse) //----- //
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c128 = arith.constant 128 : index
%c64 = arith.constant 64 : index
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %c192, @_constant__storage_size : index
util.global.store %c0, @_constant__offset : index
util.global.store %c8, @_constant__length : index
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c12, @_constant_0__length : index
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c24, @_constant_1__length : index
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After CSE (cse) //----- //
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c128 = arith.constant 128 : index
%c64 = arith.constant 64 : index
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %c192, @_constant__storage_size : index
util.global.store %c0, @_constant__offset : index
util.global.store %c8, @_constant__length : index
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c12, @_constant_0__length : index
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c24, @_constant_1__length : index
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c128 = arith.constant 128 : index
%c64 = arith.constant 64 : index
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %c192, @_constant__storage_size : index
util.global.store %c0, @_constant__offset : index
util.global.store %c8, @_constant__length : index
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c12, @_constant_0__length : index
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c24, @_constant_1__length : index
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%_constant_1__storage_size}
%3 = stream.resource.subview %2[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%_constant_0__storage_size}
%6 = stream.resource.subview %5[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %4 : !stream.resource<constant>
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump Before CSE (cse) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_0 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__timepoint_1 = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint_0 => %_constant_1 : !stream.resource<constant>{%_constant_1__storage_size}
%3 = stream.resource.subview %2[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.timepoint.await %_constant__timepoint_1 => %_constant_0 : !stream.resource<constant>{%_constant_0__storage_size}
%6 = stream.resource.subview %5[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %4 : !stream.resource<constant>
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c128 = arith.constant 128 : index
%c64 = arith.constant 64 : index
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__length : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__length : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c8, @_constant__length : index
util.global.store %c0, @_constant__offset : index
util.global.store %c192, @_constant__storage_size : index
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
// -----// IR Dump After CSE (cse) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint => %_constant_1 : !stream.resource<constant>{%_constant_1__storage_size}
%3 = stream.resource.subview %2[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.timepoint.await %_constant__timepoint => %_constant_0 : !stream.resource<constant>{%_constant_0__storage_size}
%6 = stream.resource.subview %5[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %4 : !stream.resource<constant>
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump Before SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint => %_constant_1 : !stream.resource<constant>{%_constant_1__storage_size}
%3 = stream.resource.subview %2[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.timepoint.await %_constant__timepoint => %_constant_0 : !stream.resource<constant>{%_constant_0__storage_size}
%6 = stream.resource.subview %5[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %4 : !stream.resource<constant>
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump After SimplifyGlobalAccesses (iree-util-simplify-global-accesses) //----- //
func.func private @_tensor_float() {
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint => %_constant_1 : !stream.resource<constant>{%_constant_1__storage_size}
%3 = stream.resource.subview %2[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.timepoint.await %_constant__timepoint => %_constant_0 : !stream.resource<constant>{%_constant_0__storage_size}
%6 = stream.resource.subview %5[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %4 : !stream.resource<constant>
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
// -----// IR Dump Before ApplyPatterns (iree-util-apply-patterns) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant__storage_size : index
util.global private mutable @_constant__offset : index
util.global private mutable @_constant__length : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_0__storage_size : index
util.global private mutable @_constant_0__offset : index
util.global private mutable @_constant_0__length : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private mutable @_constant_1__storage_size : index
util.global private mutable @_constant_1__offset : index
util.global private mutable @_constant_1__length : index
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c128 = arith.constant 128 : index
%c64 = arith.constant 64 : index
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c24 = arith.constant 24 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %c12, @_constant_0__length : index
util.global.store %c64, @_constant_0__offset : index
util.global.store %c192, @_constant_0__storage_size : index
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %c24, @_constant_1__length : index
util.global.store %c128, @_constant_1__offset : index
util.global.store %c192, @_constant_1__storage_size : index
util.global.store %c8, @_constant__length : index
util.global.store %c0, @_constant__offset : index
util.global.store %c192, @_constant__storage_size : index
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c4 = arith.constant 4 : index
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2 = stream.timepoint.await %_constant__timepoint => %_constant_1 : !stream.resource<constant>{%_constant_1__storage_size}
%3 = stream.resource.subview %2[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.timepoint.await %_constant__timepoint => %_constant_0 : !stream.resource<constant>{%_constant_0__storage_size}
%6 = stream.resource.subview %5[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%7 = util.optimization_barrier %6 : !stream.resource<constant>
%8 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%9 = stream.cmd.execute with(%8 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%10 = stream.timepoint.await %9 => %8 : !stream.resource<transient>{%c4}
%11 = util.optimization_barrier %10 : !stream.resource<transient>
%12 = stream.resource.size %4 : !stream.resource<constant>
%13 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%14 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%12}, %7 as %arg1: !stream.resource<constant>{%c12}, %11 as %arg2: !stream.resource<transient>{%c4}, %13 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %12] : !stream.resource<constant>{%12},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%15 = stream.timepoint.await %14 => %13 : !stream.resource<external>{%c8}
%16 = stream.tensor.export %15 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%17 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%18 = stream.tensor.export %17 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%16, %18) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After ApplyPatterns (iree-util-apply-patterns) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant__storage_size = 192 : index
util.global private mutable @_constant__offset = 0 : index
util.global private mutable @_constant__length = 8 : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_0__storage_size = 192 : index
util.global private mutable @_constant_0__offset = 64 : index
util.global private mutable @_constant_0__length = 12 : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private mutable @_constant_1__storage_size = 192 : index
util.global private mutable @_constant_1__offset = 128 : index
util.global private mutable @_constant_1__length = 24 : index
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2:2 = stream.timepoint.await %_constant__timepoint => %_constant_1, %_constant_0 : !stream.resource<constant>{%_constant_1__storage_size}, !stream.resource<constant>{%_constant_0__storage_size}
%3 = stream.resource.subview %2#0[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.resource.subview %2#1[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%6 = util.optimization_barrier %5 : !stream.resource<constant>
%7 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%8 = stream.cmd.execute with(%7 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%9 = stream.timepoint.await %8 => %7 : !stream.resource<transient>{%c4}
%10 = util.optimization_barrier %9 : !stream.resource<transient>
%11 = stream.resource.size %4 : !stream.resource<constant>
%12 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%13 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%11}, %6 as %arg1: !stream.resource<constant>{%c12}, %10 as %arg2: !stream.resource<transient>{%c4}, %12 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %11] : !stream.resource<constant>{%11},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%14 = stream.timepoint.await %13 => %12 : !stream.resource<external>{%c8}
%15 = stream.tensor.export %14 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%16 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%17 = stream.tensor.export %16 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%15, %17) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FoldGlobals (iree-util-fold-globals) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private mutable @_constant__storage_size = 192 : index
util.global private mutable @_constant__offset = 0 : index
util.global private mutable @_constant__length = 8 : index
util.global private @_constant_0 : !stream.resource<constant>
util.global private mutable @_constant_0__storage_size = 192 : index
util.global private mutable @_constant_0__offset = 64 : index
util.global private mutable @_constant_0__length = 12 : index
util.global private @_constant_1 : !stream.resource<constant>
util.global private mutable @_constant_1__storage_size = 192 : index
util.global private mutable @_constant_1__offset = 128 : index
util.global private mutable @_constant_1__length = 24 : index
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant__storage_size = util.global.load @_constant__storage_size : index
%_constant__offset = util.global.load @_constant__offset : index
%_constant_1__storage_size = util.global.load @_constant_1__storage_size : index
%_constant_1__offset = util.global.load @_constant_1__offset : index
%_constant_1__length = util.global.load @_constant_1__length : index
%_constant_0__storage_size = util.global.load @_constant_0__storage_size : index
%_constant_0__offset = util.global.load @_constant_0__offset : index
%_constant_0__length = util.global.load @_constant_0__length : index
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%_constant__storage_size}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%_constant__offset], %arg1[%c0], %c8 : !stream.resource<constant>{%_constant__storage_size} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2:2 = stream.timepoint.await %_constant__timepoint => %_constant_1, %_constant_0 : !stream.resource<constant>{%_constant_1__storage_size}, !stream.resource<constant>{%_constant_0__storage_size}
%3 = stream.resource.subview %2#0[%_constant_1__offset] : !stream.resource<constant>{%_constant_1__storage_size} -> !stream.resource<constant>{%_constant_1__length}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.resource.subview %2#1[%_constant_0__offset] : !stream.resource<constant>{%_constant_0__storage_size} -> !stream.resource<constant>{%_constant_0__length}
%6 = util.optimization_barrier %5 : !stream.resource<constant>
%7 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%8 = stream.cmd.execute with(%7 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%9 = stream.timepoint.await %8 => %7 : !stream.resource<transient>{%c4}
%10 = util.optimization_barrier %9 : !stream.resource<transient>
%11 = stream.resource.size %4 : !stream.resource<constant>
%12 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%13 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%11}, %6 as %arg1: !stream.resource<constant>{%c12}, %10 as %arg2: !stream.resource<transient>{%c4}, %12 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %11] : !stream.resource<constant>{%11},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%14 = stream.timepoint.await %13 => %12 : !stream.resource<external>{%c8}
%15 = stream.tensor.export %14 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%16 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%17 = stream.tensor.export %16 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%15, %17) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FoldGlobals (iree-util-fold-globals) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c64 = arith.constant 64 : index
%c24 = arith.constant 24 : index
%c128 = arith.constant 128 : index
%c192 = arith.constant 192 : index
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c192}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c8 : !stream.resource<constant>{%c192} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2:2 = stream.timepoint.await %_constant__timepoint => %_constant_1, %_constant_0 : !stream.resource<constant>{%c192}, !stream.resource<constant>{%c192}
%3 = stream.resource.subview %2#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.resource.subview %2#1[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%6 = util.optimization_barrier %5 : !stream.resource<constant>
%7 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%8 = stream.cmd.execute with(%7 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%9 = stream.timepoint.await %8 => %7 : !stream.resource<transient>{%c4}
%10 = util.optimization_barrier %9 : !stream.resource<transient>
%11 = stream.resource.size %4 : !stream.resource<constant>
%12 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%13 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%11}, %6 as %arg1: !stream.resource<constant>{%c12}, %10 as %arg2: !stream.resource<transient>{%c4}, %12 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %11] : !stream.resource<constant>{%11},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%14 = stream.timepoint.await %13 => %12 : !stream.resource<external>{%c8}
%15 = stream.tensor.export %14 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%16 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%17 = stream.tensor.export %16 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%15, %17) : tensor<2x1xf32>
return
}
}
// -----// IR Dump Before FuseGlobals (iree-util-fuse-globals) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.global private @_constant_0 : !stream.resource<constant>
util.global private @_constant_1 : !stream.resource<constant>
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %1#0, @_constant_0 : !stream.resource<constant>
util.global.store %1#0, @_constant_1 : !stream.resource<constant>
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6, %9 : tensor<f32>, tensor<2x1xf32>) outs(%7 : tensor<2x1xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%11 = arith.addf %in, %in_0 : f32
linalg.yield %11 : f32
} -> tensor<2x1xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
}
}
}
func.func private @_tensor_float() {
%c64 = arith.constant 64 : index
%c24 = arith.constant 24 : index
%c128 = arith.constant 128 : index
%c192 = arith.constant 192 : index
%c4 = arith.constant 4 : index
%c1065353216_i32 = arith.constant 1065353216 : i32
%c12 = arith.constant 12 : index
%c8 = arith.constant 8 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%_constant__timepoint = util.global.load @_constant__timepoint : !stream.timepoint
%_constant = util.global.load @_constant : !stream.resource<constant>
%_constant_0 = util.global.load @_constant_0 : !stream.resource<constant>
%_constant_1 = util.global.load @_constant_1 : !stream.resource<constant>
%0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%1 = stream.cmd.execute await(%_constant__timepoint) => with(%_constant as %arg0: !stream.resource<constant>{%c192}, %0 as %arg1: !stream.resource<external>{%c8}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c8 : !stream.resource<constant>{%c192} -> !stream.resource<external>{%c8}
} => !stream.timepoint
%2:2 = stream.timepoint.await %_constant__timepoint => %_constant_1, %_constant_0 : !stream.resource<constant>{%c192}, !stream.resource<constant>{%c192}
%3 = stream.resource.subview %2#0[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c24}
%4 = util.optimization_barrier %3 : !stream.resource<constant>
%5 = stream.resource.subview %2#1[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c12}
%6 = util.optimization_barrier %5 : !stream.resource<constant>
%7 = stream.resource.alloc uninitialized : !stream.resource<transient>{%c4}
%8 = stream.cmd.execute with(%7 as %arg0: !stream.resource<transient>{%c4}) {
stream.cmd.fill %c1065353216_i32, %arg0[%c0 for %c4] : i32 -> !stream.resource<transient>{%c4}
} => !stream.timepoint
%9 = stream.timepoint.await %8 => %7 : !stream.resource<transient>{%c4}
%10 = util.optimization_barrier %9 : !stream.resource<transient>
%11 = stream.resource.size %4 : !stream.resource<constant>
%12 = stream.resource.alloc uninitialized : !stream.resource<external>{%c8}
%13 = stream.cmd.execute with(%4 as %arg0: !stream.resource<constant>{%11}, %6 as %arg1: !stream.resource<constant>{%c12}, %10 as %arg2: !stream.resource<transient>{%c4}, %12 as %arg3: !stream.resource<external>{%c8}) {
stream.cmd.dispatch @_tensor_float_dispatch_0::@_tensor_float_dispatch_0_matmul_2x1x3[%c2, %c1] {
ro %arg0[%c0 for %11] : !stream.resource<constant>{%11},
ro %arg1[%c0 for %c12] : !stream.resource<constant>{%c12},
ro %arg2[%c0 for %c4] : !stream.resource<transient>{%c4},
wo %arg3[%c0 for %c8] : !stream.resource<external>{%c8}
}
} => !stream.timepoint
%14 = stream.timepoint.await %13 => %12 : !stream.resource<external>{%c8}
%15 = stream.tensor.export %14 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
%16 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c8}
%17 = stream.tensor.export %16 : tensor<2x1xf32> in !stream.resource<external>{%c8} -> tensor<2x1xf32>
check.expect_eq(%15, %17) : tensor<2x1xf32>
return
}
}
// -----// IR Dump After FuseGlobals (iree-util-fuse-globals) //----- //
#composite_of_192b = #util.composite<192xi8, [
dense<[[5.100000e+01], [1.230000e+02]]> : tensor<2x1xf32>,
dense<0> : vector<56xi8>,
dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00]]> : tensor<1x3xf32>,
dense<0> : vector<52xi8>,
dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>,
dense<0> : vector<40xi8>,
]>
#map = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
util.global private mutable @_constant__timepoint = #stream.timepoint<immediate> : !stream.timepoint
util.global private @_constant : !stream.resource<constant>
util.initializer {
%0 = stream.timepoint.immediate => !stream.timepoint
%c192 = arith.constant 192 : index
%c0 = arith.constant 0 : index
%buffer_cst = util.buffer.constant {alignment = 64 : index} : !util.buffer = #composite_of_192b
%did_map, %result = stream.resource.try_map %buffer_cst[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c192}
%1:2 = scf.if %did_map -> (!stream.resource<constant>, !stream.timepoint) {
scf.yield %result, %0 : !stream.resource<constant>, !stream.timepoint
} else {
%2 = stream.resource.map %buffer_cst[%c0] : !util.buffer -> !stream.resource<staging>{%c192}
%3 = stream.resource.alloc uninitialized : !stream.resource<constant>{%c192}
%4 = stream.cmd.execute with(%2 as %arg0: !stream.resource<staging>{%c192}, %3 as %arg1: !stream.resource<constant>{%c192}) {
stream.cmd.copy %arg0[%c0], %arg1[%c0], %c192 : !stream.resource<staging>{%c192} -> !stream.resource<constant>{%c192}
} => !stream.timepoint
scf.yield %3, %4 : !stream.resource<constant>, !stream.timepoint
}
util.global.store %1#0, @_constant : !stream.resource<constant>
util.global.store %1#1, @_constant__timepoint : !stream.timepoint
util.initializer.return
}
func.func @tensor_float() attributes {iree.abi.stub} {
call @_tensor_float() : () -> ()
return
}
stream.executable private @_tensor_float_dispatch_0 {
stream.executable.export public @_tensor_float_dispatch_0_matmul_2x1x3 workgroups(%arg0: index, %arg1: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0, %arg1
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_tensor_float_dispatch_0_matmul_2x1x3(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3x1xf32>>
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<f32>>
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%7 = tensor.empty() : tensor<2x1xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1xf32>) -> tensor<2x1xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<2x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<2x1xf32>) -> tensor<2x1xf32>
%10 = linalg.generic {indexing_maps = [#map, #map1, #map1], i
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment