This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// When running with CollapseDims pass | |
// Dims on matmul are collapsed, but not on dequantization, causing the ops to not get fused in tiling | |
// -----// IR Dump After CollapseDims (iree-flow-collapse-dims) //----- // | |
func.func @something(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%cst_0 = arith.constant dense<0.000000e+00> : tensor<4096x32xf32> | |
%0 = hal.tensor.import %arg0 "input 0" : !hal.buffer_view -> tensor<4096x32x128xi8> | |
%1 = hal.tensor.import %arg1 "input 1" : !hal.buffer_view -> tensor<1x32x128xf32> | |
%2 = util.optimization_barrier %cst_0 : tensor<4096x32xf32> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
builtin.module { | |
func.func @quantized_matmul(%arg0: tensor<11008x32x128xi8>, %arg1: tensor<11008x32x1xf32>, %arg2: tensor<11008x32x1xf32>, %arg3: tensor<1x1x32x128xf32>) -> tensor<1x1x11008xf32> { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%4 = tensor.empty() : tensor<1x1x11008xf32> | |
%5 = tensor.empty() : tensor<11008x32x128xf32> | |
%6 = linalg.fill ins(%cst : f32) outs(%4 : tensor<1x1x11008xf32>) -> tensor<1x1x11008xf32> | |
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor<11008x32x128xi8>, tensor<11008x32x1xf32>, tensor<11008x32x1xf32>) outs(%5 : tensor<11008x32x128xf32>) { | |
^bb0(%in: i8, %in_0: f32, %in_1: f32, %out: f32): | |
%9 = arith.extui %in : i8 to i32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After FoldUnitExtentDims (iree-flow-fold-unit-extent-dims) //----- // | |
func.func @quantized_matmul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view, %arg3: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%0 = hal.tensor.import %arg0 "input 0" : !hal.buffer_view -> tensor<11008x32x128xi4> | |
%1 = hal.tensor.import %arg1 "input 1" : !hal.buffer_view -> tensor<11008x32x1xf32> | |
%2 = hal.tensor.import %arg2 "input 2" : !hal.buffer_view -> tensor<11008x32x1xf32> | |
%3 = hal.tensor.import %arg3 "input 3" : !hal.buffer_view -> tensor<1x1x32x128xf32> | |
%4 = tensor.empty() : tensor<11008x32x128xf32> | |
%collapsed = tensor.collapse_shape %1 [[0], [1, 2]] : tensor<11008x32x1xf32> into tensor<11008x32xf32> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After FoldUnitExtentDims (iree-flow-fold-unit-extent-dims) //----- // | |
func.func @quantized_matmul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view, %arg3: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%0 = hal.tensor.import %arg0 "input 0" : !hal.buffer_view -> tensor<11008x32x128xi4> | |
%1 = hal.tensor.import %arg1 "input 1" : !hal.buffer_view -> tensor<11008x32x1xf32> | |
%2 = hal.tensor.import %arg2 "input 2" : !hal.buffer_view -> tensor<11008x32x1xf32> | |
%3 = hal.tensor.import %arg3 "input 3" : !hal.buffer_view -> tensor<1x1x32x128xf32> | |
%4 = tensor.empty() : tensor<11008x32x128xf32> | |
%collapsed = tensor.collapse_shape %1 [[0], [1, 2]] : tensor<11008x32x1xf32> into tensor<11008x32xf32> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After TileAndDecomposeWinogradTransform (iree-linalg-ext-tile-and-decompose-winograd) //----- // | |
func.func @quantized_matmul_dispatch_3_generic_11008x32x128_i16xi8xi32() { | |
%c11008 = arith.constant 11008 : index | |
%c256 = arith.constant 256 : index | |
%c0 = arith.constant 0 : index | |
%c128 = arith.constant 128 : index | |
%c0_i32 = arith.constant 0 : i32 | |
%cst = arith.constant 0.000000e+00 : f32 | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c256) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xi16>> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp | |
index 7f8322bd5f6f..762de3b99494 100644 | |
--- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp | |
+++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp | |
@@ -460,6 +460,10 @@ LogicalResult LoadOpOfExpandShapeOpFolder<OpTy>::matchAndRewrite( | |
rewriter.replaceOpWithNewOp<decltype(op)>( | |
loadOp, expandShapeOp.getViewSource(), sourceIndices); | |
}) | |
+ .Case([&](vector::LoadOp op) { | |
+ rewriter.replaceOpWithNewOp<vector::LoadOp>( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
func.func @reassociated_grouped_quantized_matmul(%vec: tensor<32x128xi16>, %mat: tensor<11008x32x128xi4>, %vec_scales: tensor<32xf32>, %vec_scaled_sums: tensor<32xf32>, %mat_scales: tensor<11008x32xf32>, %mat_zps: tensor<11008x32xf32>) -> tensor<11008xf32> { | |
%c0_i32 = arith.constant 0 : i32 | |
%cst_0 = arith.constant 0.000000e+00 : f32 | |
%0 = tensor.empty() : tensor<11008x32xi32> | |
%1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<11008x32xi32>) -> tensor<11008x32xi32> | |
%2 = tensor.empty() : tensor<11008xf32> | |
%3 = linalg.fill ins(%cst_0 : f32) outs(%2 : tensor<11008xf32>) -> tensor<11008xf32> | |
%batch_matmul_result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, | |
affine_map<(d0, d1, d2) -> (d0, d1, d2)>, | |
affine_map<(d0, d1, d2) -> (d0, d1)>], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> | |
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)> | |
#map2 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> | |
#map3 = affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)> | |
#map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)> | |
module { | |
func.func @concat_batchMM(%arg0: tensor<1x?x32x128xf32>, %arg1: tensor<1x32x1x128xf32>, %arg2: tensor<1x32x1x128xf32>) -> (tensor<1x32x1x?xf32>, tensor<1x?x32x128xf32>) { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%0 = tensor.empty() : tensor<1x1x32x128xf32> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> | |
#map1 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> | |
module { | |
func.func @concat_batchMM(%arg0: tensor<?x32x128xf32>, %arg1: tensor<32x1x128xf32>) -> (tensor<32x1x?xf32>, tensor<?x32x128xf32>) { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%dim = tensor.dim %arg0, %c0 : tensor<?x32x128xf32> | |
%0 = tensor.empty(%dim) : tensor<32x?x128xf32> | |
%1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor<?x32x128xf32>) outs(%0 : tensor<32x?x128xf32>) { | |
^bb0(%in: f32, %out: f32): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#map = affine_map<(d0, d1, d2) -> (d1, d2)> | |
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> | |
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> | |
module{ | |
util.global private @cst = #util.byte_pattern<1> : tensor<11008x32x128xi4> | |
util.global private mutable @global_seed = #util.byte_pattern<2> : tensor<i64> | |
func.func @transpose_extend_batch_matmul(%arg0: tensor<32x128xi16>) -> tensor<11008x32xi32> { | |
%cst = util.global.load @cst : tensor<11008x32x128xi4> | |
%c0_i32 = arith.constant 0 : i32 | |
%0 = tensor.empty() : tensor<11008x32xi32> |
OlderNewer