bjacob/debug.mlir

## debug.mlir
Args: tools/iree-compile --iree-llvmcpu-link-embedded=false --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu=znver4 --iree-llvmcpu-enable-ukernels=all /home/benoit/matmul_n4_i8.mlir -o /tmp/a.vmfb --iree-hal-dump-executable-intermediates-to=/tmp -mlir-disable-threading -mlir-print-ir-before-all -mlir-print-ir-after-all -debug
Load new dialect in Context builtin
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DistinctAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ResourceBlobManagerDialectInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmDialectInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeDialectInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineBinaryOpExprStorage)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineConstantExprStorage)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineDimExprStorage)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineMapStorage)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::IntegerSetStorage)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::HoistableTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVMTranslationDialectInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::chlo::ChloDialect)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::stablehlo::StablehloDialect)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroOperands<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneRegion<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroResults<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroSuccessors<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoRegionArguments<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoTerminator<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlock<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OpInvariants<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AffineScope<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsIsolatedFromAbove<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SymbolTable<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasOnlyGraphRegion<Empty>)
Load new dialect in Context func
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolUserOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchTerminatorOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectInlinerInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConvertToLLVMPatternInterface)
Load new dialect in Context cf
Load new dialect in Context arith
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithIntegerOverflowFlagsInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CastOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::NumericCastOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::BufferizableOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ValueBoundsOpInterface)
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BranchOpInterface)
Ignoring repeated interface registration
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AutomaticAllocationScope<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface::Trait<Empty>)
Load new dialect in Context linalg
Load new dialect in Context affine
Load new dialect in Context ub
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ub::PoisonAttrInterface)
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineDmaStartOp)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineMapAccessInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineDmaWaitOp)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LoopLikeOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineReadOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineWriteOpInterface)
Ignoring repeated interface registration
Load new dialect in Context math
Ignoring repeated interface registration
Load new dialect in Context memref
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CopyOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::PromotableMemOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestructurableAccessorOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::PromotableAllocationOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestructurableAllocationOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ViewLikeOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedDimOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OffsetSizeAndStrideOpInterface)
Ignoring repeated interface registration
Load new dialect in Context tensor
Load new dialect in Context complex
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::TilingInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::TiedOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::HoistableOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubsetOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubsetExtractionOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubsetInsertionOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::PartitionableLoopsInterface)
Ignoring repeated interface registration
Ignoring repeated interface registration
Ignoring repeated interface registration
Ignoring repeated interface registration
Ignoring repeated interface registration
Ignoring repeated interface registration
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::AggregatedOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ContractionOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::mesh::ShardingInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::PartialReductionOpInterface)
Ignoring repeated interface registration
Ignoring repeated interface registration
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicResults<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicOperands<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator<mlir::linalg::YieldOp>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AttrSizedOperandSegments<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ContractionOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::detail::MulIOpGenericAdaptorBase::Properties)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::detail::AddIOpGenericAdaptorBase::Properties)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroRegions<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent<mlir::func::FuncOp>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AlwaysSpeculatableImplTrait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::MemRefsNormalizable<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchTerminatorOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ReturnLike<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsTerminator<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneResult<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult<mlir::Type>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneOperand<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SameOperandsAndResultShape<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CastOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Elementwise<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Scalarizable<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Vectorizable<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Tensorizable<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NOperands<2>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsCommutative<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithIntegerOverflowFlagsInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SameOperandsAndResultType<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface::Trait<Empty>)

Features:+64bit-mode,-32bit-mode,-16bit-mode,+sse2,+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512
CPU:znver4
TuneCPU:znver4

Subtarget features: SSELevel 9, 3DNowLevel 1, 64bit 1
G_ADD (opcode 51): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SUB (opcode 52): 1 type index, 0 imm indices
.. opcode 52 is aliased to 51
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_MUL (opcode 53): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SDIV (opcode 54): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_UDIV (opcode 55): 1 type index, 0 imm indices
.. opcode 55 is aliased to 54
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SREM (opcode 56): 1 type index, 0 imm indices
.. opcode 56 is aliased to 54
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_UREM (opcode 57): 1 type index, 0 imm indices
.. opcode 57 is aliased to 54
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SDIVREM (opcode 58): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UDIVREM (opcode 59): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_AND (opcode 60): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_OR (opcode 61): 1 type index, 0 imm indices
.. opcode 61 is aliased to 60
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_XOR (opcode 62): 1 type index, 0 imm indices
.. opcode 62 is aliased to 60
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_IMPLICIT_DEF (opcode 63): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_PHI (opcode 64): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FRAME_INDEX (opcode 65): 1 type index, 0 imm indices
.. the first uncovered type index: 1, OK
.. the first uncovered imm index: 0, OK
G_GLOBAL_VALUE (opcode 66): 1 type index, 0 imm indices
.. opcode 66 is aliased to 65
.. the first uncovered type index: 1, OK
.. the first uncovered imm index: 0, OK
G_CONSTANT_POOL (opcode 67): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_EXTRACT (opcode 68): 2 type indices, 1 imm index
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_UNMERGE_VALUES (opcode 69): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_INSERT (opcode 70): 2 type indices, 1 imm index
.. opcode 70 is aliased to 68
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_MERGE_VALUES (opcode 71): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_BUILD_VECTOR (opcode 72): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_BUILD_VECTOR_TRUNC (opcode 73): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_CONCAT_VECTORS (opcode 74): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_PTRTOINT (opcode 75): 2 type indices, 0 imm indices
.. the first uncovered type index: 2, OK
.. the first uncovered imm index: 0, OK
G_INTTOPTR (opcode 76): 2 type indices, 0 imm indices
.. the first uncovered type index: 2, OK
.. the first uncovered imm index: 0, OK
G_BITCAST (opcode 77): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FREEZE (opcode 78): 1 type index, 0 imm indices
.. the first uncovered type index: 1, OK
.. the first uncovered imm index: 0, OK
G_CONSTANT_FOLD_BARRIER (opcode 79): 1 type index, 0 imm indices
.. opcode 79 is aliased to 78
.. the first uncovered type index: 1, OK
.. the first uncovered imm index: 0, OK
G_INTRINSIC_FPTRUNC_ROUND (opcode 80): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC_TRUNC (opcode 81): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC_ROUND (opcode 82): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC_LRINT (opcode 83): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC_ROUNDEVEN (opcode 84): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_READCYCLECOUNTER (opcode 85): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_READSTEADYCOUNTER (opcode 86): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_LOAD (opcode 87): 2 type indices, 0 imm indices
.. the first uncovered type index: 2, OK
.. the first uncovered imm index: 0, OK
G_SEXTLOAD (opcode 88): 2 type indices, 0 imm indices
.. the first uncovered type index: 2, OK
.. the first uncovered imm index: 0, OK
G_ZEXTLOAD (opcode 89): 2 type indices, 0 imm indices
.. the first uncovered type index: 2, OK
.. the first uncovered imm index: 0, OK
G_INDEXED_LOAD (opcode 90): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INDEXED_SEXTLOAD (opcode 91): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INDEXED_ZEXTLOAD (opcode 92): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STORE (opcode 93): 2 type indices, 0 imm indices
.. the first uncovered type index: 2, OK
.. the first uncovered imm index: 0, OK
G_INDEXED_STORE (opcode 94): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode 95): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMIC_CMPXCHG (opcode 96): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_XCHG (opcode 97): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_ADD (opcode 98): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_SUB (opcode 99): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_AND (opcode 100): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_NAND (opcode 101): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_OR (opcode 102): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_XOR (opcode 103): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_MAX (opcode 104): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_MIN (opcode 105): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_UMAX (opcode 106): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_UMIN (opcode 107): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_FADD (opcode 108): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_FSUB (opcode 109): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_FMAX (opcode 110): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_FMIN (opcode 111): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_UINC_WRAP (opcode 112): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ATOMICRMW_UDEC_WRAP (opcode 113): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FENCE (opcode 114): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_PREFETCH (opcode 115): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_BRCOND (opcode 116): 1 type index, 0 imm indices
.. the first uncovered type index: 1, OK
.. the first uncovered imm index: 0, OK
G_BRINDIRECT (opcode 117): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INVOKE_REGION_START (opcode 118): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC (opcode 119): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC_W_SIDE_EFFECTS (opcode 120): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC_CONVERGENT (opcode 121): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS (opcode 122): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ANYEXT (opcode 123): 2 type indices, 0 imm indices
.. opcode 123 is aliased to 129
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_TRUNC (opcode 124): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_CONSTANT (opcode 125): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FCONSTANT (opcode 126): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_VASTART (opcode 127): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VAARG (opcode 128): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SEXT (opcode 129): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SEXT_INREG (opcode 130): 1 type index, 1 imm index
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_ZEXT (opcode 131): 2 type indices, 0 imm indices
.. opcode 131 is aliased to 129
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SHL (opcode 132): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_LSHR (opcode 133): 2 type indices, 0 imm indices
.. opcode 133 is aliased to 132
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_ASHR (opcode 134): 2 type indices, 0 imm indices
.. opcode 134 is aliased to 132
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FSHL (opcode 135): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FSHR (opcode 136): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ROTR (opcode 137): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ROTL (opcode 138): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ICMP (opcode 139): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FCMP (opcode 140): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SELECT (opcode 141): 2 type indices, 0 imm indices
.. the first uncovered type index: 2, OK
.. the first uncovered imm index: 0, OK
G_UADDO (opcode 142): 2 type indices, 0 imm indices
.. opcode 142 is aliased to 143
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_UADDE (opcode 143): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_USUBO (opcode 144): 2 type indices, 0 imm indices
.. opcode 144 is aliased to 143
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_USUBE (opcode 145): 2 type indices, 0 imm indices
.. opcode 145 is aliased to 143
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SADDO (opcode 146): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SADDE (opcode 147): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SSUBO (opcode 148): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SSUBE (opcode 149): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UMULO (opcode 150): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SMULO (opcode 151): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UMULH (opcode 152): 1 type index, 0 imm indices
.. opcode 152 is aliased to 153
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_SMULH (opcode 153): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_UADDSAT (opcode 154): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SADDSAT (opcode 155): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_USUBSAT (opcode 156): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SSUBSAT (opcode 157): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_USHLSAT (opcode 158): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SSHLSAT (opcode 159): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SMULFIX (opcode 160): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UMULFIX (opcode 161): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SMULFIXSAT (opcode 162): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UMULFIXSAT (opcode 163): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SDIVFIX (opcode 164): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UDIVFIX (opcode 165): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SDIVFIXSAT (opcode 166): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UDIVFIXSAT (opcode 167): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FADD (opcode 168): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FSUB (opcode 169): 1 type index, 0 imm indices
.. opcode 169 is aliased to 168
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FMUL (opcode 170): 1 type index, 0 imm indices
.. opcode 170 is aliased to 168
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FMA (opcode 171): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FMAD (opcode 172): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FDIV (opcode 173): 1 type index, 0 imm indices
.. opcode 173 is aliased to 168
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FREM (opcode 174): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FPOW (opcode 175): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FPOWI (opcode 176): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FEXP (opcode 177): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FEXP2 (opcode 178): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FEXP10 (opcode 179): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FLOG (opcode 180): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FLOG2 (opcode 181): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FLOG10 (opcode 182): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FLDEXP (opcode 183): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FFREXP (opcode 184): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FNEG (opcode 185): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FPEXT (opcode 186): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FPTRUNC (opcode 187): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FPTOSI (opcode 188): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_FPTOUI (opcode 189): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SITOFP (opcode 190): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_UITOFP (opcode 191): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FABS (opcode 192): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FCOPYSIGN (opcode 193): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_IS_FPCLASS (opcode 194): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FCANONICALIZE (opcode 195): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FMINNUM (opcode 196): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FMAXNUM (opcode 197): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FMINNUM_IEEE (opcode 198): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FMAXNUM_IEEE (opcode 199): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FMINIMUM (opcode 200): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FMAXIMUM (opcode 201): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_GET_FPENV (opcode 202): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SET_FPENV (opcode 203): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_RESET_FPENV (opcode 204): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_GET_FPMODE (opcode 205): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SET_FPMODE (opcode 206): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_RESET_FPMODE (opcode 207): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_PTR_ADD (opcode 208): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_PTRMASK (opcode 209): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SMIN (opcode 210): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SMAX (opcode 211): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UMIN (opcode 212): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UMAX (opcode 213): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ABS (opcode 214): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_LROUND (opcode 215): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_LLROUND (opcode 216): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_BR (opcode 217): 0 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_BRJT (opcode 218): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INSERT_SUBVECTOR (opcode 219): 2 type indices, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_EXTRACT_SUBVECTOR (opcode 220): 1 type index, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_INSERT_VECTOR_ELT (opcode 221): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_EXTRACT_VECTOR_ELT (opcode 222): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SHUFFLE_VECTOR (opcode 223): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SPLAT_VECTOR (opcode 224): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_CTTZ (opcode 225): 2 type indices, 0 imm indices
.. opcode 225 is aliased to 226
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_CTTZ_ZERO_UNDEF (opcode 226): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_CTLZ (opcode 227): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_CTLZ_ZERO_UNDEF (opcode 228): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_CTPOP (opcode 229): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_BSWAP (opcode 230): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_BITREVERSE (opcode 231): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FCEIL (opcode 232): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FCOS (opcode 233): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FSIN (opcode 234): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FSQRT (opcode 235): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FFLOOR (opcode 236): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FRINT (opcode 237): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_FNEARBYINT (opcode 238): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_ADDRSPACE_CAST (opcode 239): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_BLOCK_ADDR (opcode 240): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_JUMP_TABLE (opcode 241): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_DYN_STACKALLOC (opcode 242): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_STACKSAVE (opcode 243): 1 type index, 0 imm indices
.. opcode 243 is aliased to 242
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_STACKRESTORE (opcode 244): 1 type index, 0 imm indices
.. opcode 244 is aliased to 242
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_STRICT_FADD (opcode 245): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STRICT_FSUB (opcode 246): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STRICT_FMUL (opcode 247): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STRICT_FDIV (opcode 248): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STRICT_FREM (opcode 249): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STRICT_FMA (opcode 250): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STRICT_FSQRT (opcode 251): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_STRICT_FLDEXP (opcode 252): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_READ_REGISTER (opcode 253): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_WRITE_REGISTER (opcode 254): 1 type index, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_MEMCPY (opcode 255): 3 type indices, 1 imm index
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_MEMCPY_INLINE (opcode 256): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_MEMMOVE (opcode 257): 3 type indices, 1 imm index
.. opcode 257 is aliased to 255
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_MEMSET (opcode 258): 3 type indices, 1 imm index
.. opcode 258 is aliased to 255
.. type index coverage check SKIPPED: user-defined predicate detected
.. imm index coverage check SKIPPED: user-defined predicate detected
G_BZERO (opcode 259): 2 type indices, 1 imm index
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_SEQ_FADD (opcode 260): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_SEQ_FMUL (opcode 261): 3 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_FADD (opcode 262): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_FMUL (opcode 263): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_FMAX (opcode 264): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_FMIN (opcode 265): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_FMAXIMUM (opcode 266): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_FMINIMUM (opcode 267): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_ADD (opcode 268): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_MUL (opcode 269): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_AND (opcode 270): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_OR (opcode 271): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_XOR (opcode 272): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_SMAX (opcode 273): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_SMIN (opcode 274): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_UMAX (opcode 275): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_VECREDUCE_UMIN (opcode 276): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_SBFX (opcode 277): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
G_UBFX (opcode 278): 2 type indices, 0 imm indices
.. type index coverage check SKIPPED: no rules defined
.. imm index coverage check SKIPPED: no rules defined
[dialect] repeated interface registration for dialect builtin[dialect] repeated interface registration for dialect builtin[dialect] repeated interface registration for dialect builtin[dialect] repeated interface registration for dialect builtin[dialect] repeated interface registration for dialect builtin[dialect] repeated interface registration for dialect builtin[dialect] repeated interface registration for dialect builtin[dialect] repeated interface registration for dialect builtinLoad new dialect in Context arm_neon
Ignoring repeated interface registration
Load new dialect in Context arm_sme
Load new dialect in Context scf
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ParallelCombiningOpInterface)
Ignoring repeated interface registration
Load new dialect in Context vector
ImplicitTypeIDRegistry::lookupOrInsert(mlir::vector::MaskableOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::vector::MaskingOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorTransferOpInterface)
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arm_sme::ArmSMETileOpInterface)
Ignoring repeated interface registration
[dialect] repeated interface registration for dialect arm_sme[dialect] repeated interface registration for dialect arm_sme[dialect] repeated interface registration for dialect arm_sme[dialect] repeated interface registration for dialect arm_sme[dialect] repeated interface registration for dialect arm_sme[dialect] repeated interface registration for dialect arm_sme[dialect] repeated interface registration for dialect arm_smeLoad new dialect in Context bufferization
ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::AllocationOpInterface)
Ignoring repeated interface registration
Load new dialect in Context chlo
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferShapedTypeOpInterface)
Ignoring repeated interface registration
Load new dialect in Context flow
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Flow::DispatchTensorType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ShapeAwareOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ClosureOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectFoldInterface)
Load new dialect in Context util
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SizedStorageAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SerializableAttrInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterface)
Ignoring repeated interface registration
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ReferenceTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SizeAwareTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::InferTypeSizeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SubrangeTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SizeAwareOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SubrangeOperandOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SubrangeOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalAddressOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalLoadIndirectOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalLoadOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalStoreIndirectOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalStoreOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::InitializerOpInterface)
Ignoring repeated interface registration
Ignoring repeated interface registration
Load new dialect in Context gpu
ImplicitTypeIDRegistry::lookupOrInsert(mlir::gpu::AsyncTokenType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::gpu::MMAMatrixType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::gpu::SparseDnTensorHandleType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::gpu::SparseSpMatHandleType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::gpu::SparseSpGEMMOpHandleType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::gpu::AsyncOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DataLayoutOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DeviceMappingAttrInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::ProcessorIDInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::ProcessorCountInterface)
Ignoring repeated interface registration
Load new dialect in Context hal
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::AffinityAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::AllocatorType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::BufferType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::BufferViewType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::ChannelType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::CommandBufferType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::DeviceType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::EventType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::ExecutableType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::FenceType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::FileType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::PipelineLayoutType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::SemaphoreType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::VMConversionDialectInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::ProcessorTileSizeInterface)
Ignoring repeated interface registration
Load new dialect in Context io_parameters
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::HALConversionDialectInterface)
Ignoring repeated interface registration
Load new dialect in Context iree_codegen
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Codegen::UKernelOpInterface)
Ignoring repeated interface registration
Load new dialect in Context iree_input
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Input::TiedOpInterface)
Ignoring repeated interface registration
Load new dialect in Context iree_linalg_ext
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::LinalgExt::LinalgExtOp)
Ignoring repeated interface registration
Load new dialect in Context llvm
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::LLVMVoidType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::LLVMPPCFP128Type)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::LLVMX86MMXType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::LLVMTokenType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::LLVMLabelType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::LLVMMetadataType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::LLVMStructType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DataLayoutTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestructurableTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::IntegerOverflowFlagsInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::PromotableOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::GetResultPtrElementType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::AccessGroupOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::AliasAnalysisOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::FastmathFlagsInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LLVM::BranchWeightOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SafeMemorySlotAccessOpInterface)
Ignoring repeated interface registration
[dialect] repeated interface registration for dialect llvm[dialect] repeated interface registration for dialect llvm[dialect] repeated interface registration for dialect llvm[dialect] repeated interface registration for dialect llvm[dialect] repeated interface registration for dialect llvm[dialect] repeated interface registration for dialect llvm[dialect] repeated interface registration for dialect llvm[dialect] repeated interface registration for dialect llvmLoad new dialect in Context mesh
Ignoring repeated interface registration
Load new dialect in Context ml_program
Ignoring repeated interface registration
Load new dialect in Context pdl
Ignoring repeated interface registration
Load new dialect in Context pdl_interp
Ignoring repeated interface registration
Load new dialect in Context spirv
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::InterfaceVarABIAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::TargetEnvAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::VerCapExtAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::ArrayType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::CooperativeMatrixType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::ImageType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::JointMatrixINTELType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::MatrixType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::PointerType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::RuntimeArrayType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::SampledImageType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::StructType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::QueryMinVersionInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::QueryMaxVersionInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::QueryExtensionInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::spirv::QueryCapabilityInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::gpu::TargetAttrInterface)
Ignoring repeated interface registration
Load new dialect in Context stablehlo
ImplicitTypeIDRegistry::lookupOrInsert(mlir::hlo::HloDialectInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::VerifiableTensorEncoding)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::hlo::BoundedAttrInterface)
Ignoring repeated interface registration
Load new dialect in Context stream
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::AffinityOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::StreamableOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::AsyncAccessOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::TimelineOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::SubviewEffectOpInterface)
Ignoring repeated interface registration
Load new dialect in Context tm_tensor
ImplicitTypeIDRegistry::lookupOrInsert(mlir::torch::TMTensor::TMTensorOp)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::torch::TMTensor::ScalarLoopOpInterface)
Ignoring repeated interface registration
Load new dialect in Context torch
Ignoring repeated interface registration
Load new dialect in Context torch_c
Ignoring repeated interface registration
Load new dialect in Context tosa
Load new dialect in Context quant
ImplicitTypeIDRegistry::lookupOrInsert(mlir::quant::AnyQuantizedType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::quant::CalibratedQuantizedType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::quant::UniformQuantizedType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::quant::UniformQuantizedPerAxisType)
Ignoring repeated interface registration
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tosa::TosaOp)
Ignoring repeated interface registration
Load new dialect in Context transform
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::TransformOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::PatternDescriptorOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::ConversionPatternDescriptorOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::TypeConverterBuilderOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::MatchOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::TransformParamTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::TransformHandleTypeInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::TransformValueHandleTypeInterface)
Load new dialect in Context index
Ignoring repeated interface registration
Load new dialect in Context nvgpu
Ignoring repeated interface registration
Load new dialect in Context amdgpu
Ignoring repeated interface registration
Ignoring repeated interface registration
Load new dialect in Context vk
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Vulkan::TargetEnvAttr)
Ignoring repeated interface registration
Load new dialect in Context vm
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::VM::BufferType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::VM::ListType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::VM::OpaqueType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::VM::RefType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::VM::VMSerializableOp)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::VM::VMOp)
Ignoring repeated interface registration
Load new dialect in Context vmvx
Ignoring repeated interface registration
// -----// IR Dump Before AssignTargetDevicesPass (iree-hal-assign-target-devices) //----- //
module {
  func.func @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    return %0 : tensor<?x4xi32>
  }
}


ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::PreservedAnalyses::AllAnalysesType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DataLayoutSpecInterface)
// -----// IR Dump After AssignTargetDevicesPass (iree-hal-assign-target-devices) //----- //
#executable_target_system_elf_x86_64_ = ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::StorageUserTrait::IsMutable<Empty>)
#hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  func.func @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before AutoInputConversionPipeline (iree-auto-input-conversion) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  func.func @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump After AutoInputConversionPipeline (iree-auto-input-conversion) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  func.func @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before IREEImportPublic (iree-import-public) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  func.func @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    return %0 : tensor<?x4xi32>
  }
}


//===-------------------------------------------===//
Legalizing operation : 'builtin.module'(0x58bb0e20ac80) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'func.func'(0x58bb0e21ccf0) {
  * Fold {
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ConstantLike<Empty>)
  } -> FAILURE : unable to fold

  * Pattern : 'func.func -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::FuncFuncOpPattern"
    ** Insert  : 'util.func'(0x58bb0e3766b0)
    ** Insert Block into : 'util.func'(0x58bb0e3766b0)
    ** Erase   : 'func.func'(0x58bb0e21ccf0)
"mlir::iree_compiler::(anonymous namespace)::FuncFuncOpPattern" result 1

    //===-------------------------------------------===//
    Legalizing operation : 'util.func'(0x58bb0e3766b0) {
    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//
  } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
'func.return' op expects parent op 'func.func'
mlir-asm-printer: 'builtin.module' failed to verify and will be printed in generic form
"builtin.module"() ({
  "util.func"() <{function_type = (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>, sym_name = "matmul_n4_i8"}> ({
  ^bb0(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>):
    %0 = "linalg.matmul"(%arg0, %arg1, %arg2) <{operandSegmentSizes = array<i32: 2, 1>}> ({
    ^bb0(%arg3: i8, %arg4: i8, %arg5: i32):
      %1 = "arith.extsi"(%arg3) : (i8) -> i32
      %2 = "arith.extsi"(%arg4) : (i8) -> i32
      %3 = "arith.muli"(%1, %2) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
      %4 = "arith.addi"(%arg5, %3) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
      "linalg.yield"(%4) : (i32) -> ()
    }) {linalg.memoized_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]} : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>
    "func.return"(%0) : (tensor<?x4xi32>) -> ()
  }) : () -> ()
  "func.func"() <{function_type = (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>, sym_name = "matmul_n4_i8"}> ({
  }) : () -> ()
}) {hal.device.targets = [#hal.device.target<"llvm-cpu", [#hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>]>]} : () -> ()


} -> SUCCESS
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %1 = "arith.extsi"(%arg3) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e350) {
  %2 = "arith.extsi"(%arg4) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.muli'(0x58bb0e25ec00) {
  %3 = "arith.muli"(%1, %2) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %4 = "arith.addi"(%arg5, %3) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%4) : (i32) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'func.return'(0x58bb0e23b460) {
  "func.return"(%0) : (tensor<?x4xi32>) -> ()

  * Fold {
  } -> FAILURE : unable to fold

  * Pattern : 'func.return -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::FuncReturnOpPattern"
    ** Insert  : 'util.return'(0x58bb0ed97be0)
    ** Replace : 'func.return'(0x58bb0e23b460)
"mlir::iree_compiler::(anonymous namespace)::FuncReturnOpPattern" result 1

    //===-------------------------------------------===//
    Legalizing operation : 'util.return'(0x58bb0ed97be0) {
      "util.return"(%0) : (tensor<?x4xi32>) -> ()

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//
  } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent<mlir::iree_compiler::IREE::Util::InitializerOp, mlir::iree_compiler::IREE::Util::FuncOp>::Impl<Empty>)
'util.return' op must be the last operation in the parent block
mlir-asm-printer: 'util.func' failed to verify and will be printed in generic form
"util.func"() <{function_type = (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>, sym_name = "matmul_n4_i8"}> ({
^bb0(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>):
  %0 = "linalg.matmul"(%arg0, %arg1, %arg2) <{operandSegmentSizes = array<i32: 2, 1>}> ({
  ^bb0(%arg3: i8, %arg4: i8, %arg5: i32):
    %1 = "arith.extsi"(%arg3) : (i8) -> i32
    %2 = "arith.extsi"(%arg4) : (i8) -> i32
    %3 = "arith.muli"(%1, %2) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
    %4 = "arith.addi"(%arg5, %3) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
    "linalg.yield"(%4) : (i32) -> ()
  }) {linalg.memoized_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]} : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>
  "util.return"(%0) : (tensor<?x4xi32>) -> ()
  "func.return"(%0) : (tensor<?x4xi32>) -> ()
}) : () -> ()


} -> SUCCESS
//===-------------------------------------------===//
// -----// IR Dump After IREEImportPublic (iree-import-public) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before ImportMLProgram (iree-import-ml-program) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


//===-------------------------------------------===//
Legalizing operation : 'builtin.module'(0x58bb0e20ac80) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'util.func'(0x58bb0e3766b0) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %1 = "arith.extsi"(%arg3) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e350) {
  %2 = "arith.extsi"(%arg4) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.muli'(0x58bb0e25ec00) {
  %3 = "arith.muli"(%1, %2) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %4 = "arith.addi"(%arg5, %3) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%4) : (i32) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'util.return'(0x58bb0ed97be0) {
  "util.return"(%0) : (tensor<?x4xi32>) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//
// -----// IR Dump After ImportMLProgram (iree-import-ml-program) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before SanitizeModuleNames (iree-sanitize-module-names) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump After SanitizeModuleNames (iree-sanitize-module-names) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before ConvertMeshToFlow (iree-convert-mesh-to-flow) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0ed97be0) {
  "util.return"(%0) : (tensor<?x4xi32>) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%4) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %4 = "arith.addi"(%arg5, %3) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %3 = "arith.muli"(%1, %2) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %2 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.func'(0x58bb0e3766b0) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %1 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasRecursiveMemoryEffects<Empty>)
// -----// IR Dump After ConvertMeshToFlow (iree-convert-mesh-to-flow) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before mlir::iree_compiler::IREE::ABI::ConvertStreamableOpsPass (iree-abi-convert-streamable-ops) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump After mlir::iree_compiler::IREE::ABI::ConvertStreamableOpsPass (iree-abi-convert-streamable-ops) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass (iree-abi-wrap-entry-points) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult<mlir::IndexType>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::detail::CallOpGenericAdaptorBase::Properties)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::detail::ConstantOpGenericAdaptorBase::Properties)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedDimOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::TiedOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolUserOpInterface::Trait<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult<mlir::TensorType>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AtLeastNOperands<1>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ShapeAwareOpInterface::Trait<Empty>)
// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass (iree-abi-wrap-entry-points) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = util.call @_matmul_n4_i8(%2, %4, %6) : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>
    %c0 = arith.constant 0 : index
    %dim = tensor.dim %7, %c0 : tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%dim} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
  util.func private @_matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


// -----// IR Dump Before Inliner (inline) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = util.call @_matmul_n4_i8(%2, %4, %6) : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>
    %c0 = arith.constant 0 : index
    %dim = tensor.dim %7, %c0 : tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%dim} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
  util.func private @_matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
    util.return %0 : tensor<?x4xi32>
  }
}


ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallGraph)
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.func private @_matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
  util.return %0 : tensor<?x4xi32>
}


//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern FoldTensorCastProducerOp : 'linalg.matmul -> ()' {
Trying to match "FoldTensorCastProducerOp"
"FoldTensorCastProducerOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::EraseDeadLinalgOp : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::EraseDeadLinalgOp"
"(anonymous namespace)::EraseDeadLinalgOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::InferStaticShapeOfOperands : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::InferStaticShapeOfOperands"
"(anonymous namespace)::InferStaticShapeOfOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %1 = "arith.extsi"(%arg3) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %2 = "arith.extsi"(%arg4) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %3 = "arith.muli"(%1, %2) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::MulIMulIConstant : 'arith.muli -> (arith.constant, arith.muli)' {
Trying to match "(anonymous namespace)::MulIMulIConstant"
    ** Match Failure : castedOp1 is not ::mlir::arith::MulIOp type
"(anonymous namespace)::MulIMulIConstant" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %4 = "arith.addi"(%arg5, %3) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::AddIAddConstant : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddIAddConstant"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIAddConstant" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantRHS : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddISubConstantRHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantRHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantLHS : 'arith.addi -> (arith.constant, arith.subi)' {
Trying to match "(anonymous namespace)::AddISubConstantLHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantLHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneRhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneRhs"
    ** Match Failure : ::mlir::success(::mlir::matchPattern(op0->getResult(0), ::mlir::m_Constant(&arg1_0))) return ::mlir::failure
"(anonymous namespace)::AddIMulNegativeOneRhs" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneLhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneLhs"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIMulNegativeOneLhs" result 0
  } -> failure : pattern failed to match

  * Pattern CanonicalizeContractAdd<mlir::arith::AddIOp> : 'arith.addi -> ()' {
Trying to match "CanonicalizeContractAdd<mlir::arith::AddIOp>"
"CanonicalizeContractAdd<mlir::arith::AddIOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%4) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0ed97be0) {
  "util.return"(%0) : (tensor<?x4xi32>) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.func private @_matmul_n4_i8(%arg0: tensor<?x?xi8>, %arg1: tensor<?x4xi8>, %arg2: tensor<?x4xi32>) -> tensor<?x4xi32> {
  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%arg2 : tensor<?x4xi32>) -> tensor<?x4xi32>
  util.return %0 : tensor<?x4xi32>
}

* Inliner: Initial calls in SCC are: {
}
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = util.call @_matmul_n4_i8(%2, %4, %6) : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>
  %c0 = arith.constant 0 : index
  %dim = tensor.dim %7, %c0 : tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%dim} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %2 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %3 = "hal.tensor.import"(%arg0, %1, %2) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %4 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %5 = "hal.tensor.import"(%arg1, %4) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %6 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %7 = "hal.tensor.import"(%arg2, %6) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.call'(0x58bb0e399a40) {
  %8 = "util.call"(%3, %5, %7) <{callee = @_matmul_n4_i8}> : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed97df0) {
  %0 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e381570) {
  %9 = "tensor.dim"(%8, %0) : (tensor<?x4xi32>, index) -> index


  * Pattern mlir::iree_compiler::IREE::Util::FoldDimOp<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "mlir::iree_compiler::IREE::Util::FoldDimOp<mlir::tensor::DimOp>"
"mlir::iree_compiler::IREE::Util::FoldDimOp<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match

  * Pattern mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim : 'tensor.dim -> ()' {
Trying to match "mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim"
    ** Match Failure : no dynamic dims found/usable
"mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim" result 0
  } -> failure : pattern failed to match

  * Pattern mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim : 'tensor.dim -> ()' {
Trying to match "mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim"
    ** Match Failure : no dynamic dims found/usable
"mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::DimOfToTensorFolder : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfToTensorFolder"
"(anonymous namespace)::DimOfToTensorFolder" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::FoldDimOfAllocTensorOp : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::FoldDimOfAllocTensorOp"
"(anonymous namespace)::FoldDimOfAllocTensorOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::DimOfForallOp : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfForallOp"
"(anonymous namespace)::DimOfForallOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::FoldDimOfExpandShape : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::FoldDimOfExpandShape"
"(anonymous namespace)::FoldDimOfExpandShape" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::FoldDimOfCollapseShape : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::FoldDimOfCollapseShape"
"(anonymous namespace)::FoldDimOfCollapseShape" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::FoldEmptyTensorWithDimOp : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::FoldEmptyTensorWithDimOp"
"(anonymous namespace)::FoldEmptyTensorWithDimOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::DimOfCastOp : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfCastOp"
"(anonymous namespace)::DimOfCastOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::DimOfDestStyleOp : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfDestStyleOp"
"(anonymous namespace)::DimOfDestStyleOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::DimOfReshapeOp : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReshapeOp"
"(anonymous namespace)::DimOfReshapeOp" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %10 = "hal.tensor.export"(%8, %9) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%10) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = util.call @_matmul_n4_i8(%2, %4, %6) : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>
  %dim = tensor.dim %7, %c0 : tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%dim} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

* Inliner: Initial calls in SCC are: {
  0. %7 = util.call @_matmul_n4_i8(%2, %4, %6) : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>,
}
* Inlining call: 0. %7 = util.call @_matmul_n4_i8(%2, %4, %6) : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>
* new inlineHistory entry: 0. [%7 = util.call @_matmul_n4_i8(%2, %4, %6) : (tensor<?x?xi8>, tensor<?x4xi8>, tensor<?x4xi32>) -> tensor<?x4xi32>, root]
// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %dim = tensor.dim %7, %c0 : tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%dim} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed97df0) {
  %0 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %2 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %3 = "hal.tensor.import"(%arg0, %1, %2) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %4 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %5 = "hal.tensor.import"(%arg1, %4) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %6 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %7 = "hal.tensor.import"(%arg2, %6) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern FoldTensorCastProducerOp : 'linalg.matmul -> ()' {
Trying to match "FoldTensorCastProducerOp"
"FoldTensorCastProducerOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::EraseDeadLinalgOp : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::EraseDeadLinalgOp"
"(anonymous namespace)::EraseDeadLinalgOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::InferStaticShapeOfOperands : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::InferStaticShapeOfOperands"
"(anonymous namespace)::InferStaticShapeOfOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %11 = "arith.extsi"(%arg3) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %12 = "arith.extsi"(%arg4) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %13 = "arith.muli"(%11, %12) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::MulIMulIConstant : 'arith.muli -> (arith.constant, arith.muli)' {
Trying to match "(anonymous namespace)::MulIMulIConstant"
    ** Match Failure : castedOp1 is not ::mlir::arith::MulIOp type
"(anonymous namespace)::MulIMulIConstant" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %14 = "arith.addi"(%arg5, %13) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::AddIAddConstant : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddIAddConstant"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIAddConstant" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantRHS : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddISubConstantRHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantRHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantLHS : 'arith.addi -> (arith.constant, arith.subi)' {
Trying to match "(anonymous namespace)::AddISubConstantLHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantLHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneRhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneRhs"
    ** Match Failure : ::mlir::success(::mlir::matchPattern(op0->getResult(0), ::mlir::m_Constant(&arg1_0))) return ::mlir::failure
"(anonymous namespace)::AddIMulNegativeOneRhs" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneLhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneLhs"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIMulNegativeOneLhs" result 0
  } -> failure : pattern failed to match

  * Pattern CanonicalizeContractAdd<mlir::arith::AddIOp> : 'arith.addi -> ()' {
Trying to match "CanonicalizeContractAdd<mlir::arith::AddIOp>"
"CanonicalizeContractAdd<mlir::arith::AddIOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%14) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e381570) {
  %9 = "tensor.dim"(%8, %0) : (tensor<?x4xi32>, index) -> index


  * Pattern mlir::iree_compiler::IREE::Util::FoldDimOp<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "mlir::iree_compiler::IREE::Util::FoldDimOp<mlir::tensor::DimOp>"
"mlir::iree_compiler::IREE::Util::FoldDimOp<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match

  * Pattern mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim : 'tensor.dim -> ()' {
Trying to match "mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim"
    ** Replace : 'tensor.dim'(0x58bb0e381570)
    ** Modified: 'hal.tensor.export'(0x58bb0e381750)
    ** Erase   : 'tensor.dim'(0x58bb0e381570)
"mlir::iree_compiler::IREE::Flow::(anonymous namespace)::ResolveShapedDim" result 1
  } -> success : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


} -> success : pattern matched
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed97df0) {
  %0 = "arith.constant"() <{value = 0 : index}> : () -> index

  ** Erase   : 'arith.constant'(0x58bb0ed97df0)
} -> success : operation is trivially dead
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern FoldTensorCastProducerOp : 'linalg.matmul -> ()' {
Trying to match "FoldTensorCastProducerOp"
"FoldTensorCastProducerOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::EraseDeadLinalgOp : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::EraseDeadLinalgOp"
"(anonymous namespace)::EraseDeadLinalgOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::InferStaticShapeOfOperands : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::InferStaticShapeOfOperands"
"(anonymous namespace)::InferStaticShapeOfOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::MulIMulIConstant : 'arith.muli -> (arith.constant, arith.muli)' {
Trying to match "(anonymous namespace)::MulIMulIConstant"
    ** Match Failure : castedOp1 is not ::mlir::arith::MulIOp type
"(anonymous namespace)::MulIMulIConstant" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::AddIAddConstant : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddIAddConstant"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIAddConstant" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantRHS : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddISubConstantRHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantRHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantLHS : 'arith.addi -> (arith.constant, arith.subi)' {
Trying to match "(anonymous namespace)::AddISubConstantLHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantLHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneRhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneRhs"
    ** Match Failure : ::mlir::success(::mlir::matchPattern(op0->getResult(0), ::mlir::m_Constant(&arg1_0))) return ::mlir::failure
"(anonymous namespace)::AddIMulNegativeOneRhs" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneLhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneLhs"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIMulNegativeOneLhs" result 0
  } -> failure : pattern failed to match

  * Pattern CanonicalizeContractAdd<mlir::arith::AddIOp> : 'arith.addi -> ()' {
Trying to match "CanonicalizeContractAdd<mlir::arith::AddIOp>"
"CanonicalizeContractAdd<mlir::arith::AddIOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

* Inliner: Initial calls in SCC are: {
}
* Inliner: Initial calls in SCC are: {
}
// -----// IR Dump After Inliner (inline) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern FoldTensorCastProducerOp : 'linalg.matmul -> ()' {
Trying to match "FoldTensorCastProducerOp"
"FoldTensorCastProducerOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::EraseDeadLinalgOp : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::EraseDeadLinalgOp"
"(anonymous namespace)::EraseDeadLinalgOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::InferStaticShapeOfOperands : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::InferStaticShapeOfOperands"
"(anonymous namespace)::InferStaticShapeOfOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::MulIMulIConstant : 'arith.muli -> (arith.constant, arith.muli)' {
Trying to match "(anonymous namespace)::MulIMulIConstant"
    ** Match Failure : castedOp1 is not ::mlir::arith::MulIOp type
"(anonymous namespace)::MulIMulIConstant" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::AddIAddConstant : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddIAddConstant"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIAddConstant" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantRHS : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddISubConstantRHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantRHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantLHS : 'arith.addi -> (arith.constant, arith.subi)' {
Trying to match "(anonymous namespace)::AddISubConstantLHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantLHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneRhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneRhs"
    ** Match Failure : ::mlir::success(::mlir::matchPattern(op0->getResult(0), ::mlir::m_Constant(&arg1_0))) return ::mlir::failure
"(anonymous namespace)::AddIMulNegativeOneRhs" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneLhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneLhs"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIMulNegativeOneLhs" result 0
  } -> failure : pattern failed to match

  * Pattern CanonicalizeContractAdd<mlir::arith::AddIOp> : 'arith.addi -> ()' {
Trying to match "CanonicalizeContractAdd<mlir::arith::AddIOp>"
"CanonicalizeContractAdd<mlir::arith::AddIOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before CSE (cse) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

ImplicitTypeIDRegistry::lookupOrInsert(mlir::DominanceInfo)
// -----// IR Dump After CSE (cse) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before SymbolDCE (symbol-dce) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


// -----// IR Dump After SymbolDCE (symbol-dce) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


// -----// IR Dump Before DemoteF64ToF32 (iree-util-demote-f64-to-f32) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


//===-------------------------------------------===//
Legalizing operation : 'builtin.module'(0x58bb0e20ac80) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'util.func'(0x58bb0eda2e50) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//
// -----// IR Dump After DemoteF64ToF32 (iree-util-demote-f64-to-f32) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


// -----// IR Dump Before RemoveZeroExtentTensors (iree-global-opt-remove-zero-extent-tensors) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'util.return -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.tensor.export -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'linalg.yield -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'arith.addi -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'arith.muli -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'arith.extsi -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'arith.extsi -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.tensor.import -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.buffer_view.dim -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.tensor.import -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.buffer_view.dim -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.tensor.import -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.buffer_view.dim -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index


  * Pattern mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands : 'hal.buffer_view.dim -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands"
"mlir::iree_compiler::GlobalOptimization::ReplaceZeroExtentOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After RemoveZeroExtentTensors (iree-global-opt-remove-zero-extent-tensors) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before DetachElementwiseFromNamedOps (iree-global-opt-detach-elementwise-from-named-ops) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::DetachElementwisePattern : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::DetachElementwisePattern"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::DetachElementwisePattern" result 0
  } -> failure : pattern failed to match

  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::DetachSplatConstantOutsOperands<mlir::linalg::LinalgOp> : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::DetachSplatConstantOutsOperands<mlir::linalg::LinalgOp>"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::DetachSplatConstantOutsOperands<mlir::linalg::LinalgOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After DetachElementwiseFromNamedOps (iree-global-opt-detach-elementwise-from-named-ops) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before LinalgNamedOpConversionPass (linalg-named-op-conversion) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After LinalgNamedOpConversionPass (linalg-named-op-conversion) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before Convert1X1FilterConv2DToMatmul (iree-global-opt-convert-1x1-filter-conv2d-to-matmul) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After Convert1X1FilterConv2DToMatmul (iree-global-opt-convert-1x1-filter-conv2d-to-matmul) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before EraseUnusedLinalgOperands (iree-global-opt-erase-unused-linalg-operands) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.func'(0x58bb0eda2e50) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After EraseUnusedLinalgOperands (iree-global-opt-erase-unused-linalg-operands) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


// -----// IR Dump Before ExpandTensorShapes (iree-global-opt-expand-tensor-shapes) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


// -----// IR Dump After ExpandTensorShapes (iree-global-opt-expand-tensor-shapes) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


// -----// IR Dump Before ConvertElementwiseToLinalgPass (convert-elementwise-to-linalg) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Legalizing operation : 'util.func'(0x58bb0eda2e50) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//
// -----// IR Dump After ConvertElementwiseToLinalgPass (convert-elementwise-to-linalg) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before RaiseSpecialOps (iree-global-opt-raise-special-ops) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::NamedImplicitCastOpConversion<mlir::linalg::ContractionOpInterface> : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::NamedImplicitCastOpConversion<mlir::linalg::ContractionOpInterface>"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::NamedImplicitCastOpConversion<mlir::linalg::ContractionOpInterface>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
[transform-matchers] matching: %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
[transform-matchers] op is a linalg interface implementation': 1
[transform-matchers] start recursive lhs OR match {
[transform-matchers] matching: %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
[transform-matchers] op is a linalg interface implementation': 1
[transform-matchers] operation type is one of {linalg.generic}: 0
[transform-matchers] -------
[transform-matchers] } end recursive match[transform-matchers] start recursive rhs OR match {
[transform-matchers] matching: %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
[transform-matchers] op is a linalg interface implementation': 1
[transform-matchers] start recursive lhs OR match {
[transform-matchers] matching: %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
[transform-matchers] op is a linalg interface implementation': 1
[transform-matchers] operation type is one of {linalg.generic}: 0
[transform-matchers] -------
[transform-matchers] } end recursive match[transform-matchers] start recursive rhs OR match {
[transform-matchers] matching: %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
[transform-matchers] op is a linalg interface implementation': 1
[transform-matchers] operation type is one of {linalg.generic}: 0
[transform-matchers] -------
[transform-matchers] } end recursive match: 0
[transform-matchers] -------
[transform-matchers] } end recursive match: 0
[transform-matchers] -------
// -----// IR Dump After RaiseSpecialOps (iree-global-opt-raise-special-ops) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before DecomposeConcat (iree-global-opt-decompose-concat) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After DecomposeConcat (iree-global-opt-decompose-concat) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before GeneralizeLinalgNamedOps (iree-global-opt-generalize-linalg-named-ops) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump After GeneralizeLinalgNamedOps (iree-global-opt-generalize-linalg-named-ops) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before FoldUnitExtentDims (iree-flow-fold-unit-extent-dims) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After FoldUnitExtentDims (iree-flow-fold-unit-extent-dims) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before FuseDequantizationMatmul (iree-global-opt-fuse-dequantization-matmul) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump After FuseDequantizationMatmul (iree-global-opt-fuse-dequantization-matmul) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern FoldTensorCastProducerOp : 'linalg.matmul -> ()' {
Trying to match "FoldTensorCastProducerOp"
"FoldTensorCastProducerOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::EraseDeadLinalgOp : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::EraseDeadLinalgOp"
"(anonymous namespace)::EraseDeadLinalgOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::InferStaticShapeOfOperands : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::InferStaticShapeOfOperands"
"(anonymous namespace)::InferStaticShapeOfOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::MulIMulIConstant : 'arith.muli -> (arith.constant, arith.muli)' {
Trying to match "(anonymous namespace)::MulIMulIConstant"
    ** Match Failure : castedOp1 is not ::mlir::arith::MulIOp type
"(anonymous namespace)::MulIMulIConstant" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::AddIAddConstant : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddIAddConstant"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIAddConstant" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantRHS : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddISubConstantRHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantRHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantLHS : 'arith.addi -> (arith.constant, arith.subi)' {
Trying to match "(anonymous namespace)::AddISubConstantLHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantLHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneRhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneRhs"
    ** Match Failure : ::mlir::success(::mlir::matchPattern(op0->getResult(0), ::mlir::m_Constant(&arg1_0))) return ::mlir::failure
"(anonymous namespace)::AddIMulNegativeOneRhs" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneLhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneLhs"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIMulNegativeOneLhs" result 0
  } -> failure : pattern failed to match

  * Pattern CanonicalizeContractAdd<mlir::arith::AddIOp> : 'arith.addi -> ()' {
Trying to match "CanonicalizeContractAdd<mlir::arith::AddIOp>"
"CanonicalizeContractAdd<mlir::arith::AddIOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before CSE (cse) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump After CSE (cse) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern FoldTensorCastProducerOp : 'linalg.matmul -> ()' {
Trying to match "FoldTensorCastProducerOp"
"FoldTensorCastProducerOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::EraseDeadLinalgOp : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::EraseDeadLinalgOp"
"(anonymous namespace)::EraseDeadLinalgOp" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::InferStaticShapeOfOperands : 'linalg.matmul -> ()' {
Trying to match "(anonymous namespace)::InferStaticShapeOfOperands"
"(anonymous namespace)::InferStaticShapeOfOperands" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e2c0) {
  %9 = "arith.extsi"(%arg3) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32


  * Pattern (anonymous namespace)::ExtSIOfExtUI : 'arith.extsi -> (arith.extui)' {
Trying to match "(anonymous namespace)::ExtSIOfExtUI"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::ExtSIOfExtUI" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::MulIMulIConstant : 'arith.muli -> (arith.constant, arith.muli)' {
Trying to match "(anonymous namespace)::MulIMulIConstant"
    ** Match Failure : castedOp1 is not ::mlir::arith::MulIOp type
"(anonymous namespace)::MulIMulIConstant" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32


  * Pattern (anonymous namespace)::AddIAddConstant : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddIAddConstant"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIAddConstant" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantRHS : 'arith.addi -> (arith.addi, arith.constant)' {
Trying to match "(anonymous namespace)::AddISubConstantRHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantRHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddISubConstantLHS : 'arith.addi -> (arith.constant, arith.subi)' {
Trying to match "(anonymous namespace)::AddISubConstantLHS"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddISubConstantLHS" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneRhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneRhs"
    ** Match Failure : ::mlir::success(::mlir::matchPattern(op0->getResult(0), ::mlir::m_Constant(&arg1_0))) return ::mlir::failure
"(anonymous namespace)::AddIMulNegativeOneRhs" result 0
  } -> failure : pattern failed to match

  * Pattern (anonymous namespace)::AddIMulNegativeOneLhs : 'arith.addi -> (arith.subi)' {
Trying to match "(anonymous namespace)::AddIMulNegativeOneLhs"
    ** Match Failure : There's no operation that defines operand 0 of castedOp0
"(anonymous namespace)::AddIMulNegativeOneLhs" result 0
  } -> failure : pattern failed to match

  * Pattern CanonicalizeContractAdd<mlir::arith::AddIOp> : 'arith.addi -> ()' {
Trying to match "CanonicalizeContractAdd<mlir::arith::AddIOp>"
"CanonicalizeContractAdd<mlir::arith::AddIOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before CSE (cse) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump After CSE (cse) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
  %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %8 : !hal.buffer_view
}

// -----// IR Dump Before SetEncoding (iree-global-opt-set-encoding) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7 = linalg.matmul ins(%2, %4 : tensor<?x?xi8>, tensor<?x4xi8>) outs(%6 : tensor<?x4xi32>) -> tensor<?x4xi32>
    %8 = hal.tensor.export %7 "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %8 : !hal.buffer_view
  }
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%8) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %8 = "hal.tensor.export"(%7, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0e239d20) {
  "linalg.yield"(%12) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0e25ecb0) {
  %12 = "arith.addi"(%arg5, %11) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0e25ec00) {
  %11 = "arith.muli"(%9, %10) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0e25e350) {
  %10 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e221200) {

  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding"
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::LinalgExt::detail::UpperBoundTileSizeOpGenericAdaptorBase::Properties)
    ** Insert  : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0edbecd0)
    ** Insert  : 'arith.constant'(0x58bb0ed99730)
    ** Insert  : 'tensor.dim'(0x58bb0e381570)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::detail::AffineApplyOpGenericAdaptorBase::Properties)
    ** Insert  : 'affine.apply'(0x58bb0e3527e0)
    ** Insert  : 'arith.constant'(0x58bb0edae850)
    ** Insert  : 'tensor.dim'(0x58bb0e37d2f0)
    ** Insert  : 'affine.apply'(0x58bb0e37d3a0)
    ** Insert  : 'arith.constant'(0x58bb0edae8c0)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tensor::detail::PadOpGenericAdaptorBase::Properties)
    ** Insert  : 'tensor.yield'(0x58bb0eda3440)
    ** Insert  : 'tensor.pad'(0x58bb0e352410)
    ** Insert  : 'iree_linalg_ext.set_encoding'(0x58bb0edae7c0)
    ** Insert  : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e3529d0)
    ** Insert  : 'arith.constant'(0x58bb0edaebf0)
    ** Insert  : 'tensor.dim'(0x58bb0edaec60)
    ** Insert  : 'affine.apply'(0x58bb0edcf7f0)
    ** Insert  : 'arith.constant'(0x58bb0edcf8a0)
    ** Insert  : 'tensor.dim'(0x58bb0edad7e0)
    ** Insert  : 'affine.apply'(0x58bb0edad890)
    ** Insert  : 'arith.constant'(0x58bb0edcf910)
    ** Insert  : 'tensor.yield'(0x58bb0eda33c0)
    ** Insert  : 'tensor.pad'(0x58bb0e352520)
    ** Insert  : 'iree_linalg_ext.set_encoding'(0x58bb0edcf280)
    ** Insert  : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e35b0a0)
    ** Insert  : 'arith.constant'(0x58bb0e3569a0)
    ** Insert  : 'tensor.dim'(0x58bb0e356a10)
    ** Insert  : 'affine.apply'(0x58bb0e356ac0)
    ** Insert  : 'arith.constant'(0x58bb0edccea0)
    ** Insert  : 'tensor.dim'(0x58bb0edccf10)
    ** Insert  : 'affine.apply'(0x58bb0edccfc0)
    ** Insert  : 'arith.constant'(0x58bb0edcd070)
    ** Insert  : 'tensor.yield'(0x58bb0ed9ae60)
    ** Insert  : 'tensor.pad'(0x58bb0edaf000)
    ** Insert  : 'iree_linalg_ext.set_encoding'(0x58bb0edcf670)
    ** Insert  : 'linalg.matmul'(0x58bb0e352320)
    ** Insert  : 'arith.constant'(0x58bb0edcd940)
    ** Insert  : 'tensor.dim'(0x58bb0edcd9b0)
    ** Insert  : 'iree_linalg_ext.unset_encoding'(0x58bb0edcda60)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tensor::detail::ExtractSliceOpGenericAdaptorBase::Properties)
    ** Insert  : 'tensor.extract_slice'(0x58bb0ed99810)
    ** Replace : 'linalg.matmul'(0x58bb0e221200)
    ** Modified: 'hal.tensor.export'(0x58bb0e381750)
    ** Erase   : 'linalg.yield'(0x58bb0e239d20)
    ** Erase   : 'arith.addi'(0x58bb0e25ecb0)
    ** Erase   : 'arith.muli'(0x58bb0e25ec00)
    ** Erase   : 'arith.extsi'(0x58bb0e25e350)
    ** Erase   : 'arith.extsi'(0x58bb0e25e2c0)
    ** Erase   : 'linalg.matmul'(0x58bb0e221200)
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding" result 1
  } -> success : pattern applied successfully
// *** IR Dump After Pattern Application ***
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult<mlir::RankedTensorType>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator<mlir::tensor::YieldOp>::Impl<Empty>)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent<mlir::tensor::GenerateOp, mlir::tensor::PadOp>::Impl<Empty>)
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0 = arith.constant 0 : index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %c1 = arith.constant 1 : index
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %c0_i8 = arith.constant 0 : i8
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %4, %c0_1 : tensor<?x4xi8>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#0, %dim_2]
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %4, %c1_3 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#1, %dim_4]
  %c0_i8_5 = arith.constant 0 : i8
  %padded_6 = tensor.pad %4 low[0, 0] high[%12, %13] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8_5 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %14 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %15:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0_7 = arith.constant 0 : index
  %dim_8 = tensor.dim %6, %c0_7 : tensor<?x4xi32>
  %16 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#0, %dim_8]
  %c1_9 = arith.constant 1 : index
  %dim_10 = tensor.dim %6, %c1_9 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#1, %dim_10]
  %c0_i32 = arith.constant 0 : i32
  %padded_11 = tensor.pad %6 low[0, 0] high[%16, %17] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %18 = iree_linalg_ext.set_encoding %padded_11 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %19 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_12 = arith.constant 0 : index
  %dim_13 = tensor.dim %6, %c0_12 : tensor<?x4xi32>
  %20 = iree_linalg_ext.unset_encoding %19 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %20[0, 0] [%dim_13, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %21 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %21 : !hal.buffer_view
}


} -> success : pattern matched
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %42 = "hal.tensor.export"(%41, %5) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

ImplicitTypeIDRegistry::lookupOrInsert(mlir::OffsetSizeAndStrideOpInterface::Trait<Empty>)
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.extract_slice'(0x58bb0ed99810) {
  %41 = "tensor.extract_slice"(%40, %39) <{operandSegmentSizes = array<i32: 1, 0, 1, 0>, static_offsets = array<i64: 0, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: 1, 1>}> : (tensor<?x?xi32>, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.unset_encoding'(0x58bb0edcda60) {
  %40 = "iree_linalg_ext.unset_encoding"(%37) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edcd9b0) {
  %39 = "tensor.dim"(%6, %38) : (tensor<?x4xi32>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcd940) {
  %38 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e352320) {

  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf670) {
  %36 = "iree_linalg_ext.set_encoding"(%35) : (tensor<?x?xi32>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>


  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0edaf000) {

  * Pattern (anonymous namespace)::FoldFillWithPad : 'tensor.pad -> ()' {
Trying to match "(anonymous namespace)::FoldFillWithPad"
"(anonymous namespace)::FoldFillWithPad" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcd070) {
  %34 = "arith.constant"() <{value = 0 : i32}> : () -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edccfc0) {
  %33 = "affine.apply"(%27#1, %32) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edccf10) {
  %32 = "tensor.dim"(%6, %31) : (tensor<?x4xi32>, index) -> index

} -> success : operation was folded
//===-------------------------------------------===//
** Insert  : 'arith.constant'(0x58bb0edcddc0)
** Replace : 'tensor.dim'(0x58bb0edccf10)
** Modified: 'affine.apply'(0x58bb0edccfc0)
** Erase   : 'tensor.dim'(0x58bb0edccf10)
// *** IR Dump After Successful Folding ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0 = arith.constant 0 : index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %c1 = arith.constant 1 : index
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %c0_i8 = arith.constant 0 : i8
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %4, %c0_1 : tensor<?x4xi8>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#0, %dim_2]
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %4, %c1_3 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#1, %dim_4]
  %c0_i8_5 = arith.constant 0 : i8
  %padded_6 = tensor.pad %4 low[0, 0] high[%12, %13] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8_5 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %14 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %15:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0_7 = arith.constant 0 : index
  %dim_8 = tensor.dim %6, %c0_7 : tensor<?x4xi32>
  %16 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#0, %dim_8]
  %c1_9 = arith.constant 1 : index
  %c4 = arith.constant 4 : index
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#1, %c4]
  %c0_i32 = arith.constant 0 : i32
  %padded_10 = tensor.pad %6 low[0, 0] high[%16, %17] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %18 = iree_linalg_ext.set_encoding %padded_10 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %19 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_11 = arith.constant 0 : index
  %dim_12 = tensor.dim %6, %c0_11 : tensor<?x4xi32>
  %20 = iree_linalg_ext.unset_encoding %19 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %20[0, 0] [%dim_12, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %21 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %21 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edccfc0) {
  %33 = "affine.apply"(%27#1, %32) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcddc0) {
  %32 = "arith.constant"() <{value = 4 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edccea0) {
  %31 = "arith.constant"() <{value = 1 : index}> : () -> index

  ** Erase   : 'arith.constant'(0x58bb0edccea0)
} -> success : operation is trivially dead
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e356ac0) {
  %30 = "affine.apply"(%27#0, %29) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e356a10) {
  %29 = "tensor.dim"(%6, %28) : (tensor<?x4xi32>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0e3569a0) {
  %28 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e35b0a0) {
  %27:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf280) {
  %26 = "iree_linalg_ext.set_encoding"(%25) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>


  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352520) {

  * Pattern (anonymous namespace)::FoldFillWithPad : 'tensor.pad -> ()' {
Trying to match "(anonymous namespace)::FoldFillWithPad"
"(anonymous namespace)::FoldFillWithPad" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcf910) {
  %24 = "arith.constant"() <{value = 0 : i8}> : () -> i8

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edad890) {
  %23 = "affine.apply"(%17#1, %22) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edad7e0) {
  %22 = "tensor.dim"(%4, %21) : (tensor<?x4xi8>, index) -> index

} -> success : operation was folded
//===-------------------------------------------===//
** Insert  : 'arith.constant'(0x58bb0edccea0)
** Replace : 'tensor.dim'(0x58bb0edad7e0)
** Modified: 'affine.apply'(0x58bb0edad890)
** Erase   : 'tensor.dim'(0x58bb0edad7e0)
// *** IR Dump After Successful Folding ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0 = arith.constant 0 : index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %c1 = arith.constant 1 : index
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %c0_i8 = arith.constant 0 : i8
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %4, %c0_1 : tensor<?x4xi8>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#0, %dim_2]
  %c1_3 = arith.constant 1 : index
  %c4 = arith.constant 4 : index
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#1, %c4]
  %c0_i8_4 = arith.constant 0 : i8
  %padded_5 = tensor.pad %4 low[0, 0] high[%12, %13] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8_4 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %14 = iree_linalg_ext.set_encoding %padded_5 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %15:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %c0_6 = arith.constant 0 : index
  %dim_7 = tensor.dim %6, %c0_6 : tensor<?x4xi32>
  %16 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#0, %dim_7]
  %c4_8 = arith.constant 4 : index
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#1, %c4_8]
  %c0_i32 = arith.constant 0 : i32
  %padded_9 = tensor.pad %6 low[0, 0] high[%16, %17] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %18 = iree_linalg_ext.set_encoding %padded_9 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %19 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_10 = arith.constant 0 : index
  %dim_11 = tensor.dim %6, %c0_10 : tensor<?x4xi32>
  %20 = iree_linalg_ext.unset_encoding %19 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %20[0, 0] [%dim_11, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %21 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %21 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edad890) {
  %23 = "affine.apply"(%17#1, %22) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edccea0) {
  %22 = "arith.constant"() <{value = 4 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcf8a0) {
  %21 = "arith.constant"() <{value = 1 : index}> : () -> index

  ** Erase   : 'arith.constant'(0x58bb0edcf8a0)
} -> success : operation is trivially dead
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edcf7f0) {
  %20 = "affine.apply"(%17#0, %19) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edaec60) {
  %19 = "tensor.dim"(%4, %18) : (tensor<?x4xi8>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edaebf0) {
  %18 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e3529d0) {
  %17:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edae7c0) {
  %16 = "iree_linalg_ext.set_encoding"(%15) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>


  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352410) {

  * Pattern (anonymous namespace)::FoldFillWithPad : 'tensor.pad -> ()' {
Trying to match "(anonymous namespace)::FoldFillWithPad"
"(anonymous namespace)::FoldFillWithPad" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae8c0) {
  %14 = "arith.constant"() <{value = 0 : i8}> : () -> i8

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e37d3a0) {
  %13 = "affine.apply"(%7#1, %12) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e37d2f0) {
  %12 = "tensor.dim"(%2, %11) : (tensor<?x?xi8>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae850) {
  %11 = "arith.constant"() <{value = 1 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e3527e0) {
  %10 = "affine.apply"(%7#0, %9) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e381570) {
  %9 = "tensor.dim"(%2, %8) : (tensor<?x?xi8>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed99730) {
  %8 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0edbecd0) {
  %7:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %6 = "hal.tensor.import"(%arg2, %5) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %5 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %4 = "hal.tensor.import"(%arg1, %3) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %3 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %2 = "hal.tensor.import"(%arg0, %0, %1) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %1 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.func'(0x58bb0eda2e50) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %0 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
** Replace : 'arith.constant'(0x58bb0edaebf0)
** Modified: 'tensor.dim'(0x58bb0edaec60)
** Erase   : 'arith.constant'(0x58bb0edaebf0)
** Replace : 'arith.constant'(0x58bb0edcf910)
** Modified: 'tensor.yield'(0x58bb0eda33c0)
** Erase   : 'arith.constant'(0x58bb0edcf910)
** Replace : 'arith.constant'(0x58bb0e3569a0)
** Modified: 'tensor.dim'(0x58bb0e356a10)
** Erase   : 'arith.constant'(0x58bb0e3569a0)
** Replace : 'arith.constant'(0x58bb0edcddc0)
** Modified: 'affine.apply'(0x58bb0edccfc0)
** Erase   : 'arith.constant'(0x58bb0edcddc0)
** Replace : 'arith.constant'(0x58bb0edcd940)
** Modified: 'tensor.dim'(0x58bb0edcd9b0)
** Erase   : 'arith.constant'(0x58bb0edcd940)

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%35) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %35 = "hal.tensor.export"(%34, %10) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.extract_slice'(0x58bb0ed99810) {
  %34 = "tensor.extract_slice"(%33, %32) <{operandSegmentSizes = array<i32: 1, 0, 1, 0>, static_offsets = array<i64: 0, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: 1, 1>}> : (tensor<?x?xi32>, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.unset_encoding'(0x58bb0edcda60) {
  %33 = "iree_linalg_ext.unset_encoding"(%31) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edcd9b0) {
  %32 = "tensor.dim"(%11, %4) : (tensor<?x4xi32>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0edbe4e0) {
  "linalg.yield"(%39) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0edcd720) {
  %39 = "arith.addi"(%arg5, %38) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0edcd6b0) {
  %38 = "arith.muli"(%36, %37) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0edcd640) {
  %37 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e352320) {

  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::setContractionOpEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0edcd1c0) {
  %36 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf670) {
  %30 = "iree_linalg_ext.set_encoding"(%29) : (tensor<?x?xi32>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>


  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0edaf000) {

  * Pattern (anonymous namespace)::FoldFillWithPad : 'tensor.pad -> ()' {
Trying to match "(anonymous namespace)::FoldFillWithPad"
"(anonymous namespace)::FoldFillWithPad" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0ed9ae60) {
  "tensor.yield"(%0) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcd070) {
  %0 = "arith.constant"() <{value = 0 : i32}> : () -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edccfc0) {
  %28 = "affine.apply"(%25#1, %1) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e356ac0) {
  %27 = "affine.apply"(%25#0, %26) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e356a10) {
  %26 = "tensor.dim"(%11, %4) : (tensor<?x4xi32>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e35b0a0) {
  %25:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf280) {
  %24 = "iree_linalg_ext.set_encoding"(%23) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>


  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352520) {

  * Pattern (anonymous namespace)::FoldFillWithPad : 'tensor.pad -> ()' {
Trying to match "(anonymous namespace)::FoldFillWithPad"
"(anonymous namespace)::FoldFillWithPad" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0eda33c0) {
  "tensor.yield"(%2) : (i8) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edad890) {
  %22 = "affine.apply"(%19#1, %1) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edccea0) {
  %1 = "arith.constant"() <{value = 4 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edcf7f0) {
  %21 = "affine.apply"(%19#0, %20) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edaec60) {
  %20 = "tensor.dim"(%9, %4) : (tensor<?x4xi8>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e3529d0) {
  %19:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edae7c0) {
  %18 = "iree_linalg_ext.set_encoding"(%17) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>


  * Pattern mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding"
"mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FoldFillWithSetEncoding" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352410) {

  * Pattern (anonymous namespace)::FoldFillWithPad : 'tensor.pad -> ()' {
Trying to match "(anonymous namespace)::FoldFillWithPad"
"(anonymous namespace)::FoldFillWithPad" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0eda3440) {
  "tensor.yield"(%2) : (i8) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae8c0) {
  %2 = "arith.constant"() <{value = 0 : i8}> : () -> i8

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e37d3a0) {
  %16 = "affine.apply"(%12#1, %15) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e37d2f0) {
  %15 = "tensor.dim"(%7, %3) : (tensor<?x?xi8>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae850) {
  %3 = "arith.constant"() <{value = 1 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e3527e0) {
  %14 = "affine.apply"(%12#0, %13) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e381570) {
  %13 = "tensor.dim"(%7, %4) : (tensor<?x?xi8>, index) -> index


  * Pattern (anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp> : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 0
  } -> failure : pattern failed to match
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed99730) {
  %4 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0edbecd0) {
  %12:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %11 = "hal.tensor.import"(%arg2, %10) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %10 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %9 = "hal.tensor.import"(%arg1, %8) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %8 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %7 = "hal.tensor.import"(%arg0, %5, %6) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %6 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'util.func'(0x58bb0eda2e50) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %5 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After SetEncoding (iree-global-opt-set-encoding) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#map = ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface::Trait<Empty>)
affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
#map3 = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %c0_i32 = arith.constant 0 : i32
    %c4 = arith.constant 4 : index
    %c0_i8 = arith.constant 0 : i8
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> index, index
    %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
    %8 = affine.apply #map3()[%7#0, %dim]
    %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
    %9 = affine.apply #map3()[%7#1, %dim_0]
    %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
    ^bb0(%arg3: index, %arg4: index):
      tensor.yield %c0_i8 : i8
    } : tensor<?x?xi8> to tensor<?x?xi8>
    %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> index, index
    %dim_1 = tensor.dim %4, %c0 : tensor<?x4xi8>
    %12 = affine.apply #map3()[%11#0, %dim_1]
    %13 = affine.apply #map3()[%11#1, %c4]
    %padded_2 = tensor.pad %4 low[0, 0] high[%12, %13] {
    ^bb0(%arg3: index, %arg4: index):
      tensor.yield %c0_i8 : i8
    } : tensor<?x4xi8> to tensor<?x?xi8>
    %14 = iree_linalg_ext.set_encoding %padded_2 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %15:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> index, index
    %dim_3 = tensor.dim %6, %c0 : tensor<?x4xi32>
    %16 = affine.apply #map3()[%15#0, %dim_3]
    %17 = affine.apply #map3()[%15#1, %c4]
    %padded_4 = tensor.pad %6 low[0, 0] high[%16, %17] {
    ^bb0(%arg3: index, %arg4: index):
      tensor.yield %c0_i32 : i32
    } : tensor<?x4xi32> to tensor<?x?xi32>
    %18 = iree_linalg_ext.set_encoding %padded_4 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %19 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>) outs(%18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %dim_5 = tensor.dim %6, %c0 : tensor<?x4xi32>
    %20 = iree_linalg_ext.unset_encoding %19 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> tensor<?x?xi32>
    %extracted_slice = tensor.extract_slice %20[0, 0] [%dim_5, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
    %21 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %21 : !hal.buffer_view
  }
}


// -----// IR Dump Before MaterializeHomogeneousEncodings (iree-global-opt-materialize-homogeneous-encodings) //----- //
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-linux-gnu", ukernels = "all"}>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
#map3 = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_system_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
  util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
    %c0_i32 = arith.constant 0 : i32
    %c4 = arith.constant 4 : index
    %c0_i8 = arith.constant 0 : i8
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
    %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
    %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
    %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
    %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
    %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> index, index
    %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
    %8 = affine.apply #map3()[%7#0, %dim]
    %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
    %9 = affine.apply #map3()[%7#1, %dim_0]
    %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
    ^bb0(%arg3: index, %arg4: index):
      tensor.yield %c0_i8 : i8
    } : tensor<?x?xi8> to tensor<?x?xi8>
    %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> index, index
    %dim_1 = tensor.dim %4, %c0 : tensor<?x4xi8>
    %12 = affine.apply #map3()[%11#0, %dim_1]
    %13 = affine.apply #map3()[%11#1, %c4]
    %padded_2 = tensor.pad %4 low[0, 0] high[%12, %13] {
    ^bb0(%arg3: index, %arg4: index):
      tensor.yield %c0_i8 : i8
    } : tensor<?x4xi8> to tensor<?x?xi8>
    %14 = iree_linalg_ext.set_encoding %padded_2 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %15:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> index, index
    %dim_3 = tensor.dim %6, %c0 : tensor<?x4xi32>
    %16 = affine.apply #map3()[%15#0, %dim_3]
    %17 = affine.apply #map3()[%15#1, %c4]
    %padded_4 = tensor.pad %6 low[0, 0] high[%16, %17] {
    ^bb0(%arg3: index, %arg4: index):
      tensor.yield %c0_i32 : i32
    } : tensor<?x4xi32> to tensor<?x?xi32>
    %18 = iree_linalg_ext.set_encoding %padded_4 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %19 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>) outs(%18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>>
    %dim_5 = tensor.dim %6, %c0 : tensor<?x4xi32>
    %20 = iree_linalg_ext.unset_encoding %19 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [#map, #map1, #map2]>> -> tensor<?x?xi32>
    %extracted_slice = tensor.extract_slice %20[0, 0] [%dim_5, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
    %21 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
    util.return %21 : !hal.buffer_view
  }
}


// -----// IR Dump Before CPUMaterializeUpperBoundTileSize (iree-codegen-cpu-materialize-upper-bound-tile-size) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0_i32 = arith.constant 0 : i32
  %c4 = arith.constant 4 : index
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim_1 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#0, %dim_1]
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#1, %c4]
  %padded_2 = tensor.pad %4 low[0, 0] high[%12, %13] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %14 = iree_linalg_ext.set_encoding %padded_2 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %15:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim_3 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %16 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#0, %dim_3]
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%15#1, %c4]
  %padded_4 = tensor.pad %6 low[0, 0] high[%16, %17] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %18 = iree_linalg_ext.set_encoding %padded_4 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %19 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_5 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %20 = iree_linalg_ext.unset_encoding %19 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %20[0, 0] [%dim_5, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %21 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %21 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%35) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %35 = "hal.tensor.export"(%34, %10) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.extract_slice'(0x58bb0ed99810) {
  %34 = "tensor.extract_slice"(%33, %32) <{operandSegmentSizes = array<i32: 1, 0, 1, 0>, static_offsets = array<i64: 0, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: 1, 1>}> : (tensor<?x?xi32>, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.unset_encoding'(0x58bb0edcda60) {
  %33 = "iree_linalg_ext.unset_encoding"(%31) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edcd9b0) {
  %32 = "tensor.dim"(%11, %4) : (tensor<?x4xi32>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0edbe4e0) {
  "linalg.yield"(%39) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0edcd720) {
  %39 = "arith.addi"(%arg5, %38) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0edcd6b0) {
  %38 = "arith.muli"(%36, %37) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0edcd640) {
  %37 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e352320) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0edcd1c0) {
  %36 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf670) {
  %30 = "iree_linalg_ext.set_encoding"(%29) : (tensor<?x?xi32>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0edaf000) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0ed9ae60) {
  "tensor.yield"(%0) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edccfc0) {
  %28 = "affine.apply"(%25#1, %1) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e356ac0) {
  %27 = "affine.apply"(%25#0, %26) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e356a10) {
  %26 = "tensor.dim"(%11, %4) : (tensor<?x4xi32>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e35b0a0) {
  %25:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x4xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)


  * Pattern mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion : 'iree_linalg_ext.upper_bound_tile_size -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion"
    ** Insert  : 'arith.constant'(0x58bb0edcf910)
    ** Insert  : 'arith.constant'(0x58bb0edaebf0)
    ** Replace : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e35b0a0)
    ** Modified: 'affine.apply'(0x58bb0e356ac0)
    ** Modified: 'affine.apply'(0x58bb0edccfc0)
    ** Erase   : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e35b0a0)
"mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion" result 1
  } -> success : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0_i32 = arith.constant 0 : i32
  %c4 = arith.constant 4 : index
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim_1 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#0, %dim_1]
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#1, %c4]
  %padded_2 = tensor.pad %4 low[0, 0] high[%12, %13] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %14 = iree_linalg_ext.set_encoding %padded_2 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c16 = arith.constant 16 : index
  %c4_3 = arith.constant 4 : index
  %dim_4 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %15 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_4]
  %16 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c4_3, %c4]
  %padded_5 = tensor.pad %6 low[0, 0] high[%15, %16] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %17 = iree_linalg_ext.set_encoding %padded_5 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %18 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%17 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_6 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %19 = iree_linalg_ext.unset_encoding %18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %19[0, 0] [%dim_6, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %20 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %20 : !hal.buffer_view
}


} -> success : pattern matched
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edccfc0) {
  %29 = "affine.apply"(%26, %1) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> success : operation was folded
//===-------------------------------------------===//
** Insert  : 'arith.constant'(0x58bb0e35b090)
** Replace : 'affine.apply'(0x58bb0edccfc0)
** Modified: 'tensor.pad'(0x58bb0edaf000)
** Erase   : 'affine.apply'(0x58bb0edccfc0)
// *** IR Dump After Successful Folding ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0_i32 = arith.constant 0 : i32
  %c4 = arith.constant 4 : index
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %11:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim_1 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#0, %dim_1]
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%11#1, %c4]
  %padded_2 = tensor.pad %4 low[0, 0] high[%12, %13] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %14 = iree_linalg_ext.set_encoding %padded_2 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c16 = arith.constant 16 : index
  %c4_3 = arith.constant 4 : index
  %dim_4 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %15 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_4]
  %c0_5 = arith.constant 0 : index
  %padded_6 = tensor.pad %6 low[0, 0] high[%15, %c0_5] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %16 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %17 = linalg.matmul ins(%10, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%16 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_7 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %18 = iree_linalg_ext.unset_encoding %17 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %18[0, 0] [%dim_7, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %19 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %19 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0edaf000) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0e35b090) {
  %29 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e356ac0) {
  %28 = "affine.apply"(%25, %27) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edaebf0) {
  %26 = "arith.constant"() <{value = 4 : index}> : () -> index

  ** Erase   : 'arith.constant'(0x58bb0edaebf0)
} -> success : operation is trivially dead
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcf910) {
  %25 = "arith.constant"() <{value = 16 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf280) {
  %24 = "iree_linalg_ext.set_encoding"(%23) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352520) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0eda33c0) {
  "tensor.yield"(%2) : (i8) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edad890) {
  %22 = "affine.apply"(%19#1, %1) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edcf7f0) {
  %21 = "affine.apply"(%19#0, %20) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edaec60) {
  %20 = "tensor.dim"(%9, %4) : (tensor<?x4xi8>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e3529d0) {
  %19:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x4xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)


  * Pattern mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion : 'iree_linalg_ext.upper_bound_tile_size -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion"
    ** Insert  : 'arith.constant'(0x58bb0edaebf0)
    ** Insert  : 'arith.constant'(0x58bb0e24b470)
    ** Replace : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e3529d0)
    ** Modified: 'affine.apply'(0x58bb0edcf7f0)
    ** Modified: 'affine.apply'(0x58bb0edad890)
    ** Erase   : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0e3529d0)
"mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion" result 1
  } -> success : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0_i32 = arith.constant 0 : i32
  %c4 = arith.constant 4 : index
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c4_1 = arith.constant 4 : index
  %c2 = arith.constant 2 : index
  %dim_2 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %11 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_2]
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c4_1, %c4]
  %padded_3 = tensor.pad %4 low[0, 0] high[%11, %12] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %13 = iree_linalg_ext.set_encoding %padded_3 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c16 = arith.constant 16 : index
  %dim_4 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %14 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_4]
  %c0_5 = arith.constant 0 : index
  %padded_6 = tensor.pad %6 low[0, 0] high[%14, %c0_5] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %15 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %16 = linalg.matmul ins(%10, %13 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%15 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_7 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = iree_linalg_ext.unset_encoding %16 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %17[0, 0] [%dim_7, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %18 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %18 : !hal.buffer_view
}


} -> success : pattern matched
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edad890) {
  %23 = "affine.apply"(%19, %1) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> success : operation was folded
//===-------------------------------------------===//
** Insert  : 'arith.constant'(0x58bb0e3529c0)
** Replace : 'affine.apply'(0x58bb0edad890)
** Modified: 'tensor.pad'(0x58bb0e352520)
** Erase   : 'affine.apply'(0x58bb0edad890)
// *** IR Dump After Successful Folding ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0_i32 = arith.constant 0 : i32
  %c4 = arith.constant 4 : index
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %7:2 = iree_linalg_ext.upper_bound_tile_size tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> index, index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#0, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%7#1, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%8, %9] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %10 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c4_1 = arith.constant 4 : index
  %c2 = arith.constant 2 : index
  %dim_2 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %11 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_2]
  %c0_3 = arith.constant 0 : index
  %padded_4 = tensor.pad %4 low[0, 0] high[%11, %c0_3] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded_4 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c16 = arith.constant 16 : index
  %dim_5 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_5]
  %c0_6 = arith.constant 0 : index
  %padded_7 = tensor.pad %6 low[0, 0] high[%13, %c0_6] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %14 = iree_linalg_ext.set_encoding %padded_7 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %15 = linalg.matmul ins(%10, %12 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%14 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_8 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %16 = iree_linalg_ext.unset_encoding %15 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %16[0, 0] [%dim_8, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %17 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %17 : !hal.buffer_view
}


//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352520) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0e3529c0) {
  %23 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edcf7f0) {
  %22 = "affine.apply"(%20, %21) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0e24b470) {
  %20 = "arith.constant"() <{value = 2 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edaebf0) {
  %19 = "arith.constant"() <{value = 4 : index}> : () -> index

  ** Erase   : 'arith.constant'(0x58bb0edaebf0)
} -> success : operation is trivially dead
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edae7c0) {
  %18 = "iree_linalg_ext.set_encoding"(%17) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352410) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0eda3440) {
  "tensor.yield"(%2) : (i8) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e37d3a0) {
  %16 = "affine.apply"(%12#1, %15) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e37d2f0) {
  %15 = "tensor.dim"(%7, %3) : (tensor<?x?xi8>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e3527e0) {
  %14 = "affine.apply"(%12#0, %13) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e381570) {
  %13 = "tensor.dim"(%7, %4) : (tensor<?x?xi8>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0edbecd0) {
  %12:2 = "iree_linalg_ext.upper_bound_tile_size"() <{tensorType = tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>}> : () -> (index, index)


  * Pattern mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion : 'iree_linalg_ext.upper_bound_tile_size -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion"
    ** Insert  : 'arith.constant'(0x58bb0edaebf0)
    ** Insert  : 'arith.constant'(0x58bb0ed97c70)
    ** Replace : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0edbecd0)
    ** Modified: 'affine.apply'(0x58bb0e3527e0)
    ** Modified: 'affine.apply'(0x58bb0e37d3a0)
    ** Erase   : 'iree_linalg_ext.upper_bound_tile_size'(0x58bb0edbecd0)
"mlir::iree_compiler::(anonymous namespace)::UpperBoundTileSizeToConstantOpConversion" result 1
  } -> success : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c0_i32 = arith.constant 0 : i32
  %c4 = arith.constant 4 : index
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %c16 = arith.constant 16 : index
  %c2 = arith.constant 2 : index
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %9 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c2_1 = arith.constant 2 : index
  %dim_2 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %10 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2_1, %dim_2]
  %c0_3 = arith.constant 0 : index
  %padded_4 = tensor.pad %4 low[0, 0] high[%10, %c0_3] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %11 = iree_linalg_ext.set_encoding %padded_4 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c16_5 = arith.constant 16 : index
  %dim_6 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16_5, %dim_6]
  %c0_7 = arith.constant 0 : index
  %padded_8 = tensor.pad %6 low[0, 0] high[%12, %c0_7] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %13 = iree_linalg_ext.set_encoding %padded_8 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %14 = linalg.matmul ins(%9, %11 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%13 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_9 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %15 = iree_linalg_ext.unset_encoding %14 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %15[0, 0] [%dim_9, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %16 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %16 : !hal.buffer_view
}


} -> success : pattern matched
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e37d3a0) {
  %17 = "affine.apply"(%13, %16) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e3527e0) {
  %15 = "affine.apply"(%12, %14) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed97c70) {
  %13 = "arith.constant"() <{value = 2 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edaebf0) {
  %12 = "arith.constant"() <{value = 16 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %11 = "hal.tensor.import"(%arg2, %10) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %10 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %9 = "hal.tensor.import"(%arg1, %8) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %8 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %7 = "hal.tensor.import"(%arg0, %5, %6) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %6 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %5 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed99730) {
  %4 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae850) {
  %3 = "arith.constant"() <{value = 1 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae8c0) {
  %2 = "arith.constant"() <{value = 0 : i8}> : () -> i8

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edccea0) {
  %1 = "arith.constant"() <{value = 4 : index}> : () -> index

  ** Erase   : 'arith.constant'(0x58bb0edccea0)
} -> success : operation is trivially dead
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcd070) {
  %0 = "arith.constant"() <{value = 0 : i32}> : () -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//
** Replace : 'arith.constant'(0x58bb0e24b470)
** Modified: 'affine.apply'(0x58bb0edcf7f0)
** Erase   : 'arith.constant'(0x58bb0e24b470)
** Replace : 'arith.constant'(0x58bb0e3529c0)
** Modified: 'tensor.pad'(0x58bb0e352520)
** Erase   : 'arith.constant'(0x58bb0e3529c0)
** Replace : 'arith.constant'(0x58bb0edcf910)
** Modified: 'affine.apply'(0x58bb0e356ac0)
** Erase   : 'arith.constant'(0x58bb0edcf910)
** Replace : 'arith.constant'(0x58bb0e35b090)
** Modified: 'tensor.pad'(0x58bb0edaf000)
** Erase   : 'arith.constant'(0x58bb0e35b090)

//===-------------------------------------------===//
Processing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%31) : (!hal.buffer_view) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %31 = "hal.tensor.export"(%30, %11) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.extract_slice'(0x58bb0ed99810) {
  %30 = "tensor.extract_slice"(%29, %28) <{operandSegmentSizes = array<i32: 1, 0, 1, 0>, static_offsets = array<i64: 0, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: 1, 1>}> : (tensor<?x?xi32>, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.unset_encoding'(0x58bb0edcda60) {
  %29 = "iree_linalg_ext.unset_encoding"(%27) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edcd9b0) {
  %28 = "tensor.dim"(%12, %5) : (tensor<?x4xi32>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.yield'(0x58bb0edbe4e0) {
  "linalg.yield"(%35) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.addi'(0x58bb0edcd720) {
  %35 = "arith.addi"(%arg5, %34) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.muli'(0x58bb0edcd6b0) {
  %34 = "arith.muli"(%32, %33) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0edcd640) {
  %33 = "arith.extsi"(%arg4) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'linalg.matmul'(0x58bb0e352320) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.extsi'(0x58bb0edcd1c0) {
  %32 = "arith.extsi"(%arg3) : (i8) -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf670) {
  %26 = "iree_linalg_ext.set_encoding"(%25) : (tensor<?x?xi32>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0ed9ae60) {
  "tensor.yield"(%2) : (i32) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0edaf000) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e356a10) {
  %23 = "tensor.dim"(%12, %5) : (tensor<?x4xi32>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e356ac0) {
  %24 = "affine.apply"(%1, %23) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf280) {
  %22 = "iree_linalg_ext.set_encoding"(%21) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0eda33c0) {
  "tensor.yield"(%3) : (i8) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352520) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0edaec60) {
  %19 = "tensor.dim"(%10, %5) : (tensor<?x4xi8>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0edcf7f0) {
  %20 = "affine.apply"(%0, %19) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edae7c0) {
  %18 = "iree_linalg_ext.set_encoding"(%17) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.pad'(0x58bb0e352410) {
} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.yield'(0x58bb0eda3440) {
  "tensor.yield"(%3) : (i8) -> ()

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e37d3a0) {
  %16 = "affine.apply"(%0, %15) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e37d2f0) {
  %15 = "tensor.dim"(%8, %4) : (tensor<?x?xi8>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'affine.apply'(0x58bb0e3527e0) {
  %14 = "affine.apply"(%1, %13) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'tensor.dim'(0x58bb0e381570) {
  %13 = "tensor.dim"(%8, %5) : (tensor<?x?xi8>, index) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed97c70) {
  %0 = "arith.constant"() <{value = 2 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edaebf0) {
  %1 = "arith.constant"() <{value = 16 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %12 = "hal.tensor.import"(%arg2, %11) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %11 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %10 = "hal.tensor.import"(%arg1, %9) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %9 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %8 = "hal.tensor.import"(%arg0, %6, %7) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %7 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %6 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0ed99730) {
  %5 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae850) {
  %4 = "arith.constant"() <{value = 1 : index}> : () -> index

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edae8c0) {
  %3 = "arith.constant"() <{value = 0 : i8}> : () -> i8

} -> failure : pattern failed to match
//===-------------------------------------------===//

//===-------------------------------------------===//
Processing operation : 'arith.constant'(0x58bb0edcd070) {
  %2 = "arith.constant"() <{value = 0 : i32}> : () -> i32

} -> failure : pattern failed to match
//===-------------------------------------------===//
// -----// IR Dump After CPUMaterializeUpperBoundTileSize (iree-codegen-cpu-materialize-upper-bound-tile-size) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %9 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_1 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %10 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_1]
  %padded_2 = tensor.pad %4 low[0, 0] high[%10, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %11 = iree_linalg_ext.set_encoding %padded_2 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_3 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_3]
  %padded_4 = tensor.pad %6 low[0, 0] high[%12, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %13 = iree_linalg_ext.set_encoding %padded_4 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %14 = linalg.matmul ins(%9, %11 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%13 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_5 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %15 = iree_linalg_ext.unset_encoding %14 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %15[0, 0] [%dim_5, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %16 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %16 : !hal.buffer_view
}

// -----// IR Dump Before CPUMaterializeEncoding (iree-codegen-cpu-materialize-encoding) //----- //
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %9 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_1 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %10 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_1]
  %padded_2 = tensor.pad %4 low[0, 0] high[%10, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %11 = iree_linalg_ext.set_encoding %padded_2 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_3 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %12 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_3]
  %padded_4 = tensor.pad %6 low[0, 0] high[%12, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %13 = iree_linalg_ext.set_encoding %padded_4 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %14 = linalg.matmul ins(%9, %11 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%13 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_5 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %15 = iree_linalg_ext.unset_encoding %14 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %15[0, 0] [%dim_5, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %16 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %16 : !hal.buffer_view
}


//===-------------------------------------------===//
Legalizing operation : 'util.func'(0x58bb0eda2e50) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.constant'(0x58bb0ed97c70) {
  %0 = "arith.constant"() <{value = 2 : index}> : () -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.constant'(0x58bb0edaebf0) {
  %1 = "arith.constant"() <{value = 16 : index}> : () -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.constant'(0x58bb0edcd070) {
  %2 = "arith.constant"() <{value = 0 : i32}> : () -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.constant'(0x58bb0edae8c0) {
  %3 = "arith.constant"() <{value = 0 : i8}> : () -> i8

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.constant'(0x58bb0edae850) {
  %4 = "arith.constant"() <{value = 1 : index}> : () -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.constant'(0x58bb0ed99730) {
  %5 = "arith.constant"() <{value = 0 : index}> : () -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0ed93100) {
  %6 = "hal.buffer_view.dim"(%arg0) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0e21cd00) {
  %7 = "hal.buffer_view.dim"(%arg0) {index = 1 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e267df0) {
  %8 = "hal.tensor.import"(%arg0, %6, %7) {name = "input0", operandSegmentSizes = array<i32: 1, 2, 0>, target_encoding = tensor<?x?xi8>} : (!hal.buffer_view, index, index) -> tensor<?x?xi8>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0eda4180) {
  %9 = "hal.buffer_view.dim"(%arg1) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e3811b0) {
  %10 = "hal.tensor.import"(%arg1, %9) {name = "input1", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi8>} : (!hal.buffer_view, index) -> tensor<?x4xi8>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.buffer_view.dim'(0x58bb0ed97d60) {
  %11 = "hal.buffer_view.dim"(%arg2) {index = 0 : index} : (!hal.buffer_view) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.import'(0x58bb0e381390) {
  %12 = "hal.tensor.import"(%arg2, %11) {name = "input2", operandSegmentSizes = array<i32: 1, 1, 0>, target_encoding = tensor<?x4xi32>} : (!hal.buffer_view, index) -> tensor<?x4xi32>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.dim'(0x58bb0e381570) {
  %13 = "tensor.dim"(%8, %5) : (tensor<?x?xi8>, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'affine.apply'(0x58bb0e3527e0) {
  %14 = "affine.apply"(%1, %13) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.dim'(0x58bb0e37d2f0) {
  %15 = "tensor.dim"(%8, %4) : (tensor<?x?xi8>, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'affine.apply'(0x58bb0e37d3a0) {
  %16 = "affine.apply"(%0, %15) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.pad'(0x58bb0e352410) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.yield'(0x58bb0eda3440) {
  "tensor.yield"(%3) : (i8) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edae7c0) {
  %18 = "iree_linalg_ext.set_encoding"(%17) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

  * Fold {
  } -> FAILURE : unable to fold

  * Pattern : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::SetEncodingOpToPackOpConversion"
    ** Insert  : 'arith.constant'(0x58bb0edbecc0)
    ** Insert  : 'tensor.dim'(0x58bb0edad890)
    ** Insert  : 'arith.constant'(0x58bb0ed97950)
    ** Insert  : 'tensor.dim'(0x58bb0edccfc0)
    ** Insert  : 'affine.apply'(0x58bb0edcdaf0)
    ** Insert  : 'affine.apply'(0x58bb0ed9ac80)
    ** Insert  : 'tensor.empty'(0x58bb0edad7e0)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tensor::detail::PackOpGenericAdaptorBase::Properties)
    ** Insert  : 'tensor.pack'(0x58bb0e221200)
    ** Replace : 'iree_linalg_ext.set_encoding'(0x58bb0edae7c0)
"mlir::iree_compiler::(anonymous namespace)::SetEncodingOpToPackOpConversion" result 1

    //===-------------------------------------------===//
    Legalizing operation : 'arith.constant'(0x58bb0edbecc0) {
      %18 = "arith.constant"() <{value = 0 : index}> : () -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.dim'(0x58bb0edad890) {
      %19 = "tensor.dim"(%8, %18) : (tensor<?x?xi8>, index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'arith.constant'(0x58bb0ed97950) {
      %20 = "arith.constant"() <{value = 1 : index}> : () -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.dim'(0x58bb0edccfc0) {
      %21 = "tensor.dim"(%8, %20) : (tensor<?x?xi8>, index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'affine.apply'(0x58bb0edcdaf0) {
      %22 = "affine.apply"(%19) <{map = affine_map<()[s0] -> (s0 ceildiv 16)>}> : (index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'affine.apply'(0x58bb0ed9ac80) {
      %23 = "affine.apply"(%21) <{map = affine_map<()[s0] -> (s0 ceildiv 2)>}> : (index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.empty'(0x58bb0edad7e0) {
      %24 = "tensor.empty"(%22, %23) : (index, index) -> tensor<?x?x16x2xi8>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.pack'(0x58bb0e221200) {
      %25 = "tensor.pack"(%8, %24, %3) <{inner_dims_pos = array<i64: 0, 1>, operandSegmentSizes = array<i32: 1, 1, 1, 0>, outer_dims_perm = array<i64: 0, 1>, static_inner_tiles = array<i64: 16, 2>}> : (tensor<?x?xi8>, tensor<?x?x16x2xi8>, i8) -> tensor<?x?x16x2xi8>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//
  } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_5 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_5]
  %padded_6 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %14 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_7 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %15 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_7]
  %padded_8 = tensor.pad %6 low[0, 0] high[%15, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %16 = iree_linalg_ext.set_encoding %padded_8 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %17 = linalg.matmul ins(%12, %14 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%16 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_9 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %18 = iree_linalg_ext.unset_encoding %17 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %18[0, 0] [%dim_9, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %19 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %19 : !hal.buffer_view
}


} -> SUCCESS
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.dim'(0x58bb0edaec60) {
  %27 = "tensor.dim"(%10, %5) : (tensor<?x4xi8>, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'affine.apply'(0x58bb0edcf7f0) {
  %28 = "affine.apply"(%0, %27) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.pad'(0x58bb0e352520) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.yield'(0x58bb0eda33c0) {
  "tensor.yield"(%3) : (i8) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf280) {
  %30 = "iree_linalg_ext.set_encoding"(%29) : (tensor<?x?xi8>) -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

  * Fold {
  } -> FAILURE : unable to fold

  * Pattern : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::SetEncodingOpToPackOpConversion"
    ** Insert  : 'arith.constant'(0x58bb0edc9d80)
    ** Insert  : 'tensor.dim'(0x58bb0edccf10)
    ** Insert  : 'affine.apply'(0x58bb0edafee0)
    ** Insert  : 'tensor.empty'(0x58bb0edaff70)
    ** Insert  : 'tensor.pack'(0x58bb0e21dcf0)
    ** Replace : 'iree_linalg_ext.set_encoding'(0x58bb0edcf280)
"mlir::iree_compiler::(anonymous namespace)::SetEncodingOpToPackOpConversion" result 1

    //===-------------------------------------------===//
    Legalizing operation : 'arith.constant'(0x58bb0edc9d80) {
      %30 = "arith.constant"() <{value = 0 : index}> : () -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.dim'(0x58bb0edccf10) {
      %31 = "tensor.dim"(%10, %30) : (tensor<?x4xi8>, index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'affine.apply'(0x58bb0edafee0) {
      %32 = "affine.apply"(%31) <{map = affine_map<()[s0] -> (s0 ceildiv 2)>}> : (index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.empty'(0x58bb0edaff70) {
      %33 = "tensor.empty"(%32) : (index) -> tensor<1x?x4x2xi8>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.pack'(0x58bb0e21dcf0) {
      %34 = "tensor.pack"(%10, %33, %3) <{inner_dims_pos = array<i64: 1, 0>, operandSegmentSizes = array<i32: 1, 1, 1, 0>, outer_dims_perm = array<i64: 1, 0>, static_inner_tiles = array<i64: 4, 2>}> : (tensor<?x4xi8>, tensor<1x?x4x2xi8>, i8) -> tensor<1x?x4x2xi8>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//
  } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_5 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_5]
  %padded_6 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_7 = arith.constant 0 : index
  %dim_8 = tensor.dim %4, %c0_7 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_8]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_9 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %16 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_10 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_10]
  %padded_11 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %18 = iree_linalg_ext.set_encoding %padded_11 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %19 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%18 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_12 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %20 = iree_linalg_ext.unset_encoding %19 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %20[0, 0] [%dim_12, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %21 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %21 : !hal.buffer_view
}


} -> SUCCESS
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.dim'(0x58bb0e356a10) {
  %36 = "tensor.dim"(%12, %5) : (tensor<?x4xi32>, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'affine.apply'(0x58bb0e356ac0) {
  %37 = "affine.apply"(%1, %36) <{map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>}> : (index, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.pad'(0x58bb0edaf000) {
} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.yield'(0x58bb0ed9ae60) {
  "tensor.yield"(%2) : (i32) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'iree_linalg_ext.set_encoding'(0x58bb0edcf670) {
  %39 = "iree_linalg_ext.set_encoding"(%38) : (tensor<?x?xi32>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

  * Fold {
  } -> FAILURE : unable to fold

  * Pattern : 'iree_linalg_ext.set_encoding -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::SetEncodingOpToPackOpConversion"
    ** Insert  : 'arith.constant'(0x58bb0edc3550)
    ** Insert  : 'tensor.dim'(0x58bb0e25ec00)
    ** Insert  : 'affine.apply'(0x58bb0edc1920)
    ** Insert  : 'tensor.empty'(0x58bb0edc3630)
    ** Insert  : 'tensor.pack'(0x58bb0e21db20)
    ** Replace : 'iree_linalg_ext.set_encoding'(0x58bb0edcf670)
"mlir::iree_compiler::(anonymous namespace)::SetEncodingOpToPackOpConversion" result 1

    //===-------------------------------------------===//
    Legalizing operation : 'arith.constant'(0x58bb0edc3550) {
      %39 = "arith.constant"() <{value = 0 : index}> : () -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.dim'(0x58bb0e25ec00) {
      %40 = "tensor.dim"(%12, %39) : (tensor<?x4xi32>, index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'affine.apply'(0x58bb0edc1920) {
      %41 = "affine.apply"(%40) <{map = affine_map<()[s0] -> (s0 ceildiv 16)>}> : (index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.empty'(0x58bb0edc3630) {
      %42 = "tensor.empty"(%41) : (index) -> tensor<?x1x16x4xi32>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.pack'(0x58bb0e21db20) {
      %43 = "tensor.pack"(%12, %42, %2) <{inner_dims_pos = array<i64: 0, 1>, operandSegmentSizes = array<i32: 1, 1, 1, 0>, outer_dims_perm = array<i64: 0, 1>, static_inner_tiles = array<i64: 16, 4>}> : (tensor<?x4xi32>, tensor<?x1x16x4xi32>, i32) -> tensor<?x1x16x4xi32>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//
  } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_5 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_5]
  %padded_6 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_7 = arith.constant 0 : index
  %dim_8 = tensor.dim %4, %c0_7 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_8]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_9 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %16 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_10 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_10]
  %padded_11 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_12 = arith.constant 0 : index
  %dim_13 = tensor.dim %6, %c0_12 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_13]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_14 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = iree_linalg_ext.set_encoding %padded_11 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %21 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%20 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_15 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %22 = iree_linalg_ext.unset_encoding %21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %22[0, 0] [%dim_15, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %23 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %23 : !hal.buffer_view
}


} -> SUCCESS
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.matmul'(0x58bb0e352320) {
  * Fold {
  } -> FAILURE : unable to fold

  * Pattern : 'linalg.matmul -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::MaterializeContractionOp"


--------------------------------------------------
TYPE CONVERTER:


MaterializeEncodingInfo:
> innerTileSizes[ 16 2 ]
> innerDimsPos[ 0 1 ]
> outerDimsPerm[ 0 1 ]


FROM:

tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

TO:

tensor<?x?x16x2xi8>

---------------------------------------


--------------------------------------------------
TYPE CONVERTER:


MaterializeEncodingInfo:
> innerTileSizes[ 4 2 ]
> innerDimsPos[ 1 0 ]
> outerDimsPerm[ 1 0 ]


FROM:

tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

TO:

tensor<1x?x4x2xi8>

---------------------------------------


--------------------------------------------------
TYPE CONVERTER:


MaterializeEncodingInfo:
> innerTileSizes[ 16 4 ]
> innerDimsPos[ 0 1 ]
> outerDimsPerm[ 0 1 ]


FROM:

tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>

 ...!!!!!! TRANSPOSE !!!!!!...


--------------------------------------------------
TYPE CONVERTER:


MaterializeEncodingInfo:
> innerTileSizes[ 4 16 ]
> innerDimsPos[ 0 1 ]
> outerDimsPerm[ 0 1 ]


FROM:

tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<4x?xi32>, matmul_narrow_M = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d2, d0)>, affine_map<(d0, d1, d2) -> (d1, d0)>]>>

TO:

tensor<1x?x4x16xi32>

---------------------------------------

    ** Insert Block into detached Region (nullptr parent op)'    ** Insert  : 'linalg.mmt4d'(0x58bb0e21d950)
lowerContractionOpWithEncoding result: %22 = linalg.mmt4d ins(%pack_9, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>

    ** Replace : 'linalg.matmul'(0x58bb0e352320)
"mlir::iree_compiler::(anonymous namespace)::MaterializeContractionOp" result 1

    //===-------------------------------------------===//
    Legalizing operation : 'linalg.mmt4d'(0x58bb0e21d950) {
    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//
  } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_5 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_5]
  %padded_6 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_7 = arith.constant 0 : index
  %dim_8 = tensor.dim %4, %c0_7 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_8]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_9 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %16 = iree_linalg_ext.set_encoding %padded_6 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_10 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_10]
  %padded_11 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_12 = arith.constant 0 : index
  %dim_13 = tensor.dim %6, %c0_12 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_13]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_14 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_14 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %21 = iree_linalg_ext.set_encoding %padded_11 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_9, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_15 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %24 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %24[0, 0] [%dim_15, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %25 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %25 : !hal.buffer_view
}


} -> SUCCESS
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0edcd1c0) {
  %52 = "arith.extsi"(%arg3) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.extsi'(0x58bb0edcd640) {
  %53 = "arith.extsi"(%arg4) : (i8) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.muli'(0x58bb0edcd6b0) {
  %54 = "arith.muli"(%52, %53) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'arith.addi'(0x58bb0edcd720) {
  %55 = "arith.addi"(%arg5, %54) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'linalg.yield'(0x58bb0edbe4e0) {
  "linalg.yield"(%55) : (i32) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.dim'(0x58bb0edcd9b0) {
  %48 = "tensor.dim"(%12, %5) : (tensor<?x4xi32>, index) -> index

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'iree_linalg_ext.unset_encoding'(0x58bb0edcda60) {
  %49 = "iree_linalg_ext.unset_encoding"(%47) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32>

  * Fold {
  } -> FAILURE : unable to fold

  * Pattern : 'iree_linalg_ext.unset_encoding -> ()' {
Trying to match "mlir::iree_compiler::(anonymous namespace)::UnsetEncodingOpToUnPackOpConversion"
lowerUnsetEncodingToUnpackOp sees:

encodingOp:

%24 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>

packedValue:

ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AtLeastNOperands<2>::Impl<Empty>)
%22 = linalg.mmt4d ins(%pack_9, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>

    ** Insert  : 'arith.constant'(0x58bb0edc5c60)
    ** Insert  : 'tensor.dim'(0x58bb0edc2aa0)
    ** Insert  : 'tensor.empty'(0x58bb0edc3040)
    ** Insert Block into detached Region (nullptr parent op)'    ** Insert  : 'linalg.yield'(0x58bb0edc5ef0)
    ** Insert  : 'linalg.transpose'(0x58bb0e20b160)
    ** Insert  : 'arith.constant'(0x58bb0edc5cd0)
    ** Insert  : 'tensor.dim'(0x58bb0ed976d0)
    ** Insert  : 'arith.constant'(0x58bb0edc5db0)
    ** Insert  : 'tensor.dim'(0x58bb0edc5e20)
    ** Insert  : 'tensor.empty'(0x58bb0edc5fb0)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tensor::detail::UnPackOpGenericAdaptorBase::Properties)
    ** Insert  : 'tensor.unpack'(0x58bb0edce940)
    ** Replace : 'iree_linalg_ext.unset_encoding'(0x58bb0edcda60)
"mlir::iree_compiler::(anonymous namespace)::UnsetEncodingOpToUnPackOpConversion" result 1

    //===-------------------------------------------===//
    Legalizing operation : 'arith.constant'(0x58bb0edc5c60) {
      %49 = "arith.constant"() <{value = 1 : index}> : () -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.dim'(0x58bb0edc2aa0) {
      %50 = "tensor.dim"(%46, %49) : (tensor<1x?x4x16xi32>, index) -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.empty'(0x58bb0edc3040) {
      %51 = "tensor.empty"(%50) : (index) -> tensor<?x1x16x4xi32>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'linalg.yield'(0x58bb0edc5ef0) {
      "linalg.yield"(%arg3) : (i32) -> ()

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'linalg.transpose'(0x58bb0e20b160) {
    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'arith.constant'(0x58bb0edc5cd0) {
      %53 = "arith.constant"() <{value = 0 : index}> : () -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.dim'(0x58bb0ed976d0) {
      %54 = "tensor.dim"(%47, %53) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

      * Fold {
      } -> FAILURE : unable to fold

      * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
        ** Insert  : 'arith.constant'(0x58bb0edc60f0)
        ** Insert  : 'tensor.dim'(0x58bb0edc6160)
        ** Insert  : 'arith.constant'(0x58bb0edc6210)
        ** Insert  : 'tensor.dim'(0x58bb0edc6280)
        ** Insert  : 'arith.constant'(0x58bb0edc6330)
        ** Insert  : 'tensor.dim'(0x58bb0edc63a0)
        ** Insert  : 'arith.constant'(0x58bb0edc6450)
        ** Insert  : 'tensor.dim'(0x58bb0edc64c0)
        ** Insert  : 'arith.constant'(0x58bb0edc6570)
        ** Insert  : 'tensor.dim'(0x58bb0edc65e0)
        ** Insert  : 'arith.constant'(0x58bb0edc66c0)
        ** Insert  : 'tensor.dim'(0x58bb0edc6760)
        ** Replace : 'tensor.dim'(0x58bb0ed976d0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edc60f0) {
          %54 = "arith.constant"() <{value = 0 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edc6160) {
          %55 = "tensor.dim"(%26, %54) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edc6870)
            ** Insert  : 'tensor.dim'(0x58bb0edc68e0)
            ** Insert  : 'arith.constant'(0x58bb0edc6990)
            ** Insert  : 'tensor.dim'(0x58bb0edc6a00)
            ** Replace : 'tensor.dim'(0x58bb0edc6160)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc6870) {
              %26 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edc68e0) {
              %27 = "tensor.dim"(%17, %26) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc6990) {
              %28 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edc6a00) {
              %29 = "tensor.dim"(%17, %28) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_9 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_9]
  %padded_10 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_11 = arith.constant 0 : index
  %dim_12 = tensor.dim %4, %c0_11 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_12]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_13 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %16 = iree_linalg_ext.set_encoding %padded_10 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_14 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_14]
  %padded_15 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_16 = arith.constant 0 : index
  %dim_17 = tensor.dim %6, %c0_16 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_17]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_18 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_18 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %21 = iree_linalg_ext.set_encoding %padded_15 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_13, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_19 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_20 = arith.constant 1 : index
  %dim_21 = tensor.dim %22, %c1_20 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_21) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_22 = arith.constant 0 : index
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %12, %c0_23 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_25 = arith.constant 1 : index
  %dim_26 = tensor.dim %12, %c1_25 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_27 = arith.constant 0 : index
  %dim_28 = tensor.dim %16, %c0_27 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_29 = arith.constant 1 : index
  %dim_30 = tensor.dim %16, %c1_29 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_31 = arith.constant 0 : index
  %dim_32 = tensor.dim %21, %c0_31 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_33 = arith.constant 1 : index
  %dim_34 = tensor.dim %21, %c1_33 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_35 = tensor.dim %23, %c0_22 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %23, %c1_36 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_35, %dim_37) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_19, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edc6210) {
          %60 = "arith.constant"() <{value = 1 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edc6280) {
          %61 = "tensor.dim"(%30, %60) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edc6ea0)
            ** Insert  : 'tensor.dim'(0x58bb0edd2700)
            ** Insert  : 'arith.constant'(0x58bb0edd27b0)
            ** Insert  : 'tensor.dim'(0x58bb0edd2820)
            ** Replace : 'tensor.dim'(0x58bb0edc6280)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc6ea0) {
              %30 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd2700) {
              %31 = "tensor.dim"(%17, %30) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd27b0) {
              %32 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd2820) {
              %33 = "tensor.dim"(%17, %32) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_13 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_13]
  %padded_14 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_15 = arith.constant 0 : index
  %dim_16 = tensor.dim %4, %c0_15 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_16]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_17 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %16 = iree_linalg_ext.set_encoding %padded_14 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_18 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_18]
  %padded_19 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_20 = arith.constant 0 : index
  %dim_21 = tensor.dim %6, %c0_20 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_21]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_22 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_22 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %21 = iree_linalg_ext.set_encoding %padded_19 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_17, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_23 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_24 = arith.constant 1 : index
  %dim_25 = tensor.dim %22, %c1_24 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_25) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_26 = arith.constant 0 : index
  %c0_27 = arith.constant 0 : index
  %dim_28 = tensor.dim %12, %c0_27 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_29 = arith.constant 1 : index
  %dim_30 = tensor.dim %12, %c1_29 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_31 = arith.constant 0 : index
  %dim_32 = tensor.dim %16, %c0_31 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_33 = arith.constant 1 : index
  %dim_34 = tensor.dim %16, %c1_33 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_35 = arith.constant 0 : index
  %dim_36 = tensor.dim %21, %c0_35 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_37 = arith.constant 1 : index
  %dim_38 = tensor.dim %21, %c1_37 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_39 = tensor.dim %23, %c0_26 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %23, %c1_40 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_39, %dim_41) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_23, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edc6330) {
          %66 = "arith.constant"() <{value = 0 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edc63a0) {
          %67 = "tensor.dim"(%43, %66) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edc76a0)
            ** Insert  : 'tensor.dim'(0x58bb0edd2b40)
            ** Insert  : 'arith.constant'(0x58bb0edc7710)
            ** Insert  : 'tensor.dim'(0x58bb0edd2bf0)
            ** Replace : 'tensor.dim'(0x58bb0edc63a0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc76a0) {
              %43 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd2b40) {
              %44 = "tensor.dim"(%37, %43) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc7710) {
              %45 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd2bf0) {
              %46 = "tensor.dim"(%37, %45) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_13 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_13]
  %padded_14 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_15 = arith.constant 0 : index
  %dim_16 = tensor.dim %4, %c0_15 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_16]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_17 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_18 = arith.constant 0 : index
  %dim_19 = tensor.dim %padded_14, %c0_18 : tensor<?x?xi8>
  %c1_20 = arith.constant 1 : index
  %dim_21 = tensor.dim %padded_14, %c1_20 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_14 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_22 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_22]
  %padded_23 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_24 = arith.constant 0 : index
  %dim_25 = tensor.dim %6, %c0_24 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_25]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_26 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_26 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %21 = iree_linalg_ext.set_encoding %padded_23 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_17, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_27 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %22, %c1_28 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_29) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_30 = arith.constant 0 : index
  %c0_31 = arith.constant 0 : index
  %dim_32 = tensor.dim %12, %c0_31 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_33 = arith.constant 1 : index
  %dim_34 = tensor.dim %12, %c1_33 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_35 = arith.constant 0 : index
  %dim_36 = tensor.dim %16, %c0_35 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_37 = arith.constant 1 : index
  %dim_38 = tensor.dim %16, %c1_37 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_39 = arith.constant 0 : index
  %dim_40 = tensor.dim %21, %c0_39 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_41 = arith.constant 1 : index
  %dim_42 = tensor.dim %21, %c1_41 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_43 = tensor.dim %23, %c0_30 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_44 = arith.constant 1 : index
  %dim_45 = tensor.dim %23, %c1_44 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_43, %dim_45) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_27, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edc6450) {
          %72 = "arith.constant"() <{value = 1 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edc64c0) {
          %73 = "tensor.dim"(%47, %72) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd2670)
            ** Insert  : 'tensor.dim'(0x58bb0edd2940)
            ** Insert  : 'arith.constant'(0x58bb0edd28d0)
            ** Insert  : 'tensor.dim'(0x58bb0edd3010)
            ** Replace : 'tensor.dim'(0x58bb0edc64c0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd2670) {
              %47 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd2940) {
              %48 = "tensor.dim"(%37, %47) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd28d0) {
              %49 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd3010) {
              %50 = "tensor.dim"(%37, %49) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_13 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_13]
  %padded_14 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_15 = arith.constant 0 : index
  %dim_16 = tensor.dim %4, %c0_15 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_16]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_17 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_18 = arith.constant 0 : index
  %dim_19 = tensor.dim %padded_14, %c0_18 : tensor<?x?xi8>
  %c1_20 = arith.constant 1 : index
  %dim_21 = tensor.dim %padded_14, %c1_20 : tensor<?x?xi8>
  %c0_22 = arith.constant 0 : index
  %dim_23 = tensor.dim %padded_14, %c0_22 : tensor<?x?xi8>
  %c1_24 = arith.constant 1 : index
  %dim_25 = tensor.dim %padded_14, %c1_24 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_14 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_26 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_26]
  %padded_27 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_28 = arith.constant 0 : index
  %dim_29 = tensor.dim %6, %c0_28 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_29]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_30 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_30 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %21 = iree_linalg_ext.set_encoding %padded_27 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_17, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_31 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %22, %c1_32 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_33) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_34 = arith.constant 0 : index
  %c0_35 = arith.constant 0 : index
  %dim_36 = tensor.dim %12, %c0_35 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_37 = arith.constant 1 : index
  %dim_38 = tensor.dim %12, %c1_37 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_39 = arith.constant 0 : index
  %dim_40 = tensor.dim %16, %c0_39 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_41 = arith.constant 1 : index
  %dim_42 = tensor.dim %16, %c1_41 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_43 = arith.constant 0 : index
  %dim_44 = tensor.dim %21, %c0_43 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_45 = arith.constant 1 : index
  %dim_46 = tensor.dim %21, %c1_45 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_47 = tensor.dim %23, %c0_34 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_48 = arith.constant 1 : index
  %dim_49 = tensor.dim %23, %c1_48 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_47, %dim_49) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_31, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edc6570) {
          %78 = "arith.constant"() <{value = 0 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edc65e0) {
          %79 = "tensor.dim"(%61, %78) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd2ab0)
            ** Insert  : 'tensor.dim'(0x58bb0edd34f0)
            ** Insert  : 'arith.constant'(0x58bb0edd2ce0)
            ** Insert  : 'tensor.dim'(0x58bb0edd35a0)
            ** Replace : 'tensor.dim'(0x58bb0edc65e0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd2ab0) {
              %61 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd34f0) {
              %62 = "tensor.dim"(%54, %61) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd2ce0) {
              %63 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd35a0) {
              %64 = "tensor.dim"(%54, %63) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_13 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_13]
  %padded_14 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_15 = arith.constant 0 : index
  %dim_16 = tensor.dim %4, %c0_15 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_16]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_17 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_18 = arith.constant 0 : index
  %dim_19 = tensor.dim %padded_14, %c0_18 : tensor<?x?xi8>
  %c1_20 = arith.constant 1 : index
  %dim_21 = tensor.dim %padded_14, %c1_20 : tensor<?x?xi8>
  %c0_22 = arith.constant 0 : index
  %dim_23 = tensor.dim %padded_14, %c0_22 : tensor<?x?xi8>
  %c1_24 = arith.constant 1 : index
  %dim_25 = tensor.dim %padded_14, %c1_24 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_14 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_26 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_26]
  %padded_27 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_28 = arith.constant 0 : index
  %dim_29 = tensor.dim %6, %c0_28 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_29]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_30 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_30 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_31 = arith.constant 0 : index
  %dim_32 = tensor.dim %padded_27, %c0_31 : tensor<?x?xi32>
  %c1_33 = arith.constant 1 : index
  %dim_34 = tensor.dim %padded_27, %c1_33 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_27 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_17, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_35 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %22, %c1_36 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_37) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_38 = arith.constant 0 : index
  %c0_39 = arith.constant 0 : index
  %dim_40 = tensor.dim %12, %c0_39 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_41 = arith.constant 1 : index
  %dim_42 = tensor.dim %12, %c1_41 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_43 = arith.constant 0 : index
  %dim_44 = tensor.dim %16, %c0_43 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_45 = arith.constant 1 : index
  %dim_46 = tensor.dim %16, %c1_45 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %21, %c0_47 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %21, %c1_49 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_51 = tensor.dim %23, %c0_38 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_52 = arith.constant 1 : index
  %dim_53 = tensor.dim %23, %c1_52 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_51, %dim_53) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_35, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edc66c0) {
          %84 = "arith.constant"() <{value = 1 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edc6760) {
          %85 = "tensor.dim"(%65, %84) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd2f80)
            ** Insert  : 'tensor.dim'(0x58bb0edd3bb0)
            ** Insert  : 'arith.constant'(0x58bb0edd2d50)
            ** Insert  : 'tensor.dim'(0x58bb0edd3c60)
            ** Replace : 'tensor.dim'(0x58bb0edc6760)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd2f80) {
              %65 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd3bb0) {
              %66 = "tensor.dim"(%54, %65) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd2d50) {
              %67 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd3c60) {
              %68 = "tensor.dim"(%54, %67) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_13 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_13]
  %padded_14 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_15 = arith.constant 0 : index
  %dim_16 = tensor.dim %4, %c0_15 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_16]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_17 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_18 = arith.constant 0 : index
  %dim_19 = tensor.dim %padded_14, %c0_18 : tensor<?x?xi8>
  %c1_20 = arith.constant 1 : index
  %dim_21 = tensor.dim %padded_14, %c1_20 : tensor<?x?xi8>
  %c0_22 = arith.constant 0 : index
  %dim_23 = tensor.dim %padded_14, %c0_22 : tensor<?x?xi8>
  %c1_24 = arith.constant 1 : index
  %dim_25 = tensor.dim %padded_14, %c1_24 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_14 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_26 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_26]
  %padded_27 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_28 = arith.constant 0 : index
  %dim_29 = tensor.dim %6, %c0_28 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_29]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_30 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_30 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_31 = arith.constant 0 : index
  %dim_32 = tensor.dim %padded_27, %c0_31 : tensor<?x?xi32>
  %c1_33 = arith.constant 1 : index
  %dim_34 = tensor.dim %padded_27, %c1_33 : tensor<?x?xi32>
  %c0_35 = arith.constant 0 : index
  %dim_36 = tensor.dim %padded_27, %c0_35 : tensor<?x?xi32>
  %c1_37 = arith.constant 1 : index
  %dim_38 = tensor.dim %padded_27, %c1_37 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_27 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_17, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_39 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %22, %c1_40 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_41) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_42 = arith.constant 0 : index
  %c0_43 = arith.constant 0 : index
  %dim_44 = tensor.dim %12, %c0_43 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_45 = arith.constant 1 : index
  %dim_46 = tensor.dim %12, %c1_45 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %16, %c0_47 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %16, %c1_49 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %21, %c0_51 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %21, %c1_53 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_55 = tensor.dim %23, %c0_42 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_56 = arith.constant 1 : index
  %dim_57 = tensor.dim %23, %c1_56 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_55, %dim_57) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_39, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//
      } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_13 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_13]
  %padded_14 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_15 = arith.constant 0 : index
  %dim_16 = tensor.dim %4, %c0_15 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_16]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_17 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_18 = arith.constant 0 : index
  %dim_19 = tensor.dim %padded_14, %c0_18 : tensor<?x?xi8>
  %c1_20 = arith.constant 1 : index
  %dim_21 = tensor.dim %padded_14, %c1_20 : tensor<?x?xi8>
  %c0_22 = arith.constant 0 : index
  %dim_23 = tensor.dim %padded_14, %c0_22 : tensor<?x?xi8>
  %c1_24 = arith.constant 1 : index
  %dim_25 = tensor.dim %padded_14, %c1_24 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_14 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_26 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_26]
  %padded_27 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_28 = arith.constant 0 : index
  %dim_29 = tensor.dim %6, %c0_28 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_29]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_30 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_30 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_31 = arith.constant 0 : index
  %dim_32 = tensor.dim %padded_27, %c0_31 : tensor<?x?xi32>
  %c1_33 = arith.constant 1 : index
  %dim_34 = tensor.dim %padded_27, %c1_33 : tensor<?x?xi32>
  %c0_35 = arith.constant 0 : index
  %dim_36 = tensor.dim %padded_27, %c0_35 : tensor<?x?xi32>
  %c1_37 = arith.constant 1 : index
  %dim_38 = tensor.dim %padded_27, %c1_37 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_27 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_17, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_39 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %22, %c1_40 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_41) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_42 = arith.constant 0 : index
  %c0_43 = arith.constant 0 : index
  %dim_44 = tensor.dim %12, %c0_43 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_45 = arith.constant 1 : index
  %dim_46 = tensor.dim %12, %c1_45 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %16, %c0_47 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %16, %c1_49 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %21, %c0_51 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %21, %c1_53 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_55 = tensor.dim %23, %c0_42 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_56 = arith.constant 1 : index
  %dim_57 = tensor.dim %23, %c1_56 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_55, %dim_57) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_39, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


    } -> SUCCESS
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'arith.constant'(0x58bb0edc5db0) {
      %91 = "arith.constant"() <{value = 1 : index}> : () -> index

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.dim'(0x58bb0edc5e20) {
      %92 = "tensor.dim"(%71, %91) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

      * Fold {
      } -> FAILURE : unable to fold

      * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
        ** Insert  : 'arith.constant'(0x58bb0edd3460)
        ** Insert  : 'tensor.dim'(0x58bb0edd3ff0)
        ** Insert  : 'arith.constant'(0x58bb0edd2dc0)
        ** Insert  : 'tensor.dim'(0x58bb0edd40a0)
        ** Insert  : 'arith.constant'(0x58bb0edd4150)
        ** Insert  : 'tensor.dim'(0x58bb0edd41c0)
        ** Insert  : 'arith.constant'(0x58bb0edd4270)
        ** Insert  : 'tensor.dim'(0x58bb0edd42e0)
        ** Insert  : 'arith.constant'(0x58bb0edd4390)
        ** Insert  : 'tensor.dim'(0x58bb0edd4430)
        ** Insert  : 'arith.constant'(0x58bb0edd4510)
        ** Insert  : 'tensor.dim'(0x58bb0edd45b0)
        ** Replace : 'tensor.dim'(0x58bb0edc5e20)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edd3460) {
          %92 = "arith.constant"() <{value = 0 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edd3ff0) {
          %93 = "tensor.dim"(%34, %92) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd3b20)
            ** Insert  : 'tensor.dim'(0x58bb0edc7320)
            ** Insert  : 'arith.constant'(0x58bb0edc73d0)
            ** Insert  : 'tensor.dim'(0x58bb0edc7440)
            ** Replace : 'tensor.dim'(0x58bb0edd3ff0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd3b20) {
              %34 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edc7320) {
              %35 = "tensor.dim"(%17, %34) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc73d0) {
              %36 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edc7440) {
              %37 = "tensor.dim"(%17, %36) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_17 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_17]
  %padded_18 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_19 = arith.constant 0 : index
  %dim_20 = tensor.dim %4, %c0_19 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_20]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_21 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_22 = arith.constant 0 : index
  %dim_23 = tensor.dim %padded_18, %c0_22 : tensor<?x?xi8>
  %c1_24 = arith.constant 1 : index
  %dim_25 = tensor.dim %padded_18, %c1_24 : tensor<?x?xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_18, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_18, %c1_28 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_18 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_30 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_30]
  %padded_31 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_32 = arith.constant 0 : index
  %dim_33 = tensor.dim %6, %c0_32 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_33]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_34 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_34 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_35 = arith.constant 0 : index
  %dim_36 = tensor.dim %padded_31, %c0_35 : tensor<?x?xi32>
  %c1_37 = arith.constant 1 : index
  %dim_38 = tensor.dim %padded_31, %c1_37 : tensor<?x?xi32>
  %c0_39 = arith.constant 0 : index
  %dim_40 = tensor.dim %padded_31, %c0_39 : tensor<?x?xi32>
  %c1_41 = arith.constant 1 : index
  %dim_42 = tensor.dim %padded_31, %c1_41 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_31 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_21, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_43 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_44 = arith.constant 1 : index
  %dim_45 = tensor.dim %22, %c1_44 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_45) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_46 = arith.constant 0 : index
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %12, %c0_47 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %12, %c1_49 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %16, %c0_51 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %16, %c1_53 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_55 = arith.constant 0 : index
  %dim_56 = tensor.dim %21, %c0_55 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_57 = arith.constant 1 : index
  %dim_58 = tensor.dim %21, %c1_57 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_59 = tensor.dim %23, %c0_46 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_60 = arith.constant 1 : index
  %c0_61 = arith.constant 0 : index
  %dim_62 = tensor.dim %12, %c0_61 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_63 = arith.constant 1 : index
  %dim_64 = tensor.dim %12, %c1_63 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_65 = arith.constant 0 : index
  %dim_66 = tensor.dim %16, %c0_65 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_67 = arith.constant 1 : index
  %dim_68 = tensor.dim %16, %c1_67 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_69 = arith.constant 0 : index
  %dim_70 = tensor.dim %21, %c0_69 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_71 = arith.constant 1 : index
  %dim_72 = tensor.dim %21, %c1_71 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_73 = tensor.dim %23, %c1_60 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_59, %dim_73) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_43, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edd2dc0) {
          %98 = "arith.constant"() <{value = 1 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edd40a0) {
          %99 = "tensor.dim"(%38, %98) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd3d50)
            ** Insert  : 'tensor.dim'(0x58bb0edd4f30)
            ** Insert  : 'arith.constant'(0x58bb0edd3dc0)
            ** Insert  : 'tensor.dim'(0x58bb0edd4fe0)
            ** Replace : 'tensor.dim'(0x58bb0edd40a0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd3d50) {
              %38 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd4f30) {
              %39 = "tensor.dim"(%17, %38) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd3dc0) {
              %40 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd4fe0) {
              %41 = "tensor.dim"(%17, %40) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %c0_17 = arith.constant 0 : index
  %dim_18 = tensor.dim %padded, %c0_17 : tensor<?x?xi8>
  %c1_19 = arith.constant 1 : index
  %dim_20 = tensor.dim %padded, %c1_19 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_21 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_21]
  %padded_22 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %4, %c0_23 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_24]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_25 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_22, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_22, %c1_28 : tensor<?x?xi8>
  %c0_30 = arith.constant 0 : index
  %dim_31 = tensor.dim %padded_22, %c0_30 : tensor<?x?xi8>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %padded_22, %c1_32 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_22 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_34 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_34]
  %padded_35 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_36 = arith.constant 0 : index
  %dim_37 = tensor.dim %6, %c0_36 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_37]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_38 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_38 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_39 = arith.constant 0 : index
  %dim_40 = tensor.dim %padded_35, %c0_39 : tensor<?x?xi32>
  %c1_41 = arith.constant 1 : index
  %dim_42 = tensor.dim %padded_35, %c1_41 : tensor<?x?xi32>
  %c0_43 = arith.constant 0 : index
  %dim_44 = tensor.dim %padded_35, %c0_43 : tensor<?x?xi32>
  %c1_45 = arith.constant 1 : index
  %dim_46 = tensor.dim %padded_35, %c1_45 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_35 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_47 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_48 = arith.constant 1 : index
  %dim_49 = tensor.dim %22, %c1_48 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_49) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_50 = arith.constant 0 : index
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %12, %c0_51 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %12, %c1_53 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_55 = arith.constant 0 : index
  %dim_56 = tensor.dim %16, %c0_55 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_57 = arith.constant 1 : index
  %dim_58 = tensor.dim %16, %c1_57 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_59 = arith.constant 0 : index
  %dim_60 = tensor.dim %21, %c0_59 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_61 = arith.constant 1 : index
  %dim_62 = tensor.dim %21, %c1_61 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_63 = tensor.dim %23, %c0_50 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_64 = arith.constant 1 : index
  %c0_65 = arith.constant 0 : index
  %dim_66 = tensor.dim %12, %c0_65 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_67 = arith.constant 1 : index
  %dim_68 = tensor.dim %12, %c1_67 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_69 = arith.constant 0 : index
  %dim_70 = tensor.dim %16, %c0_69 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_71 = arith.constant 1 : index
  %dim_72 = tensor.dim %16, %c1_71 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_73 = arith.constant 0 : index
  %dim_74 = tensor.dim %21, %c0_73 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_75 = arith.constant 1 : index
  %dim_76 = tensor.dim %21, %c1_75 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_77 = tensor.dim %23, %c1_64 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_63, %dim_77) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_47, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edd4150) {
          %104 = "arith.constant"() <{value = 0 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edd41c0) {
          %105 = "tensor.dim"(%59, %104) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd3100)
            ** Insert  : 'tensor.dim'(0x58bb0edd5400)
            ** Insert  : 'arith.constant'(0x58bb0edd3170)
            ** Insert  : 'tensor.dim'(0x58bb0edd54b0)
            ** Replace : 'tensor.dim'(0x58bb0edd41c0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd3100) {
              %59 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd5400) {
              %60 = "tensor.dim"(%45, %59) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd3170) {
              %61 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd54b0) {
              %62 = "tensor.dim"(%45, %61) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %c0_17 = arith.constant 0 : index
  %dim_18 = tensor.dim %padded, %c0_17 : tensor<?x?xi8>
  %c1_19 = arith.constant 1 : index
  %dim_20 = tensor.dim %padded, %c1_19 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_21 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_21]
  %padded_22 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %4, %c0_23 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_24]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_25 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_22, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_22, %c1_28 : tensor<?x?xi8>
  %c0_30 = arith.constant 0 : index
  %dim_31 = tensor.dim %padded_22, %c0_30 : tensor<?x?xi8>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %padded_22, %c1_32 : tensor<?x?xi8>
  %c0_34 = arith.constant 0 : index
  %dim_35 = tensor.dim %padded_22, %c0_34 : tensor<?x?xi8>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %padded_22, %c1_36 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_22 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_38 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_38]
  %padded_39 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_40 = arith.constant 0 : index
  %dim_41 = tensor.dim %6, %c0_40 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_41]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_42 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_42 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_43 = arith.constant 0 : index
  %dim_44 = tensor.dim %padded_39, %c0_43 : tensor<?x?xi32>
  %c1_45 = arith.constant 1 : index
  %dim_46 = tensor.dim %padded_39, %c1_45 : tensor<?x?xi32>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %padded_39, %c0_47 : tensor<?x?xi32>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %padded_39, %c1_49 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_39 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_51 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_52 = arith.constant 1 : index
  %dim_53 = tensor.dim %22, %c1_52 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_53) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_54 = arith.constant 0 : index
  %c0_55 = arith.constant 0 : index
  %dim_56 = tensor.dim %12, %c0_55 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_57 = arith.constant 1 : index
  %dim_58 = tensor.dim %12, %c1_57 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_59 = arith.constant 0 : index
  %dim_60 = tensor.dim %16, %c0_59 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_61 = arith.constant 1 : index
  %dim_62 = tensor.dim %16, %c1_61 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_63 = arith.constant 0 : index
  %dim_64 = tensor.dim %21, %c0_63 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_65 = arith.constant 1 : index
  %dim_66 = tensor.dim %21, %c1_65 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_67 = tensor.dim %23, %c0_54 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_68 = arith.constant 1 : index
  %c0_69 = arith.constant 0 : index
  %dim_70 = tensor.dim %12, %c0_69 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_71 = arith.constant 1 : index
  %dim_72 = tensor.dim %12, %c1_71 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_73 = arith.constant 0 : index
  %dim_74 = tensor.dim %16, %c0_73 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_75 = arith.constant 1 : index
  %dim_76 = tensor.dim %16, %c1_75 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_77 = arith.constant 0 : index
  %dim_78 = tensor.dim %21, %c0_77 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_79 = arith.constant 1 : index
  %dim_80 = tensor.dim %21, %c1_79 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_81 = tensor.dim %23, %c1_68 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_67, %dim_81) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_51, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edd4270) {
          %110 = "arith.constant"() <{value = 1 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edd42e0) {
          %111 = "tensor.dim"(%63, %110) : (tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd4ea0)
            ** Insert  : 'tensor.dim'(0x58bb0edd6960)
            ** Insert  : 'arith.constant'(0x58bb0edc8690)
            ** Insert  : 'tensor.dim'(0x58bb0edd31e0)
            ** Replace : 'tensor.dim'(0x58bb0edd42e0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd4ea0) {
              %63 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd6960) {
              %64 = "tensor.dim"(%45, %63) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc8690) {
              %65 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd31e0) {
              %66 = "tensor.dim"(%45, %65) : (tensor<?x?xi8>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %c0_17 = arith.constant 0 : index
  %dim_18 = tensor.dim %padded, %c0_17 : tensor<?x?xi8>
  %c1_19 = arith.constant 1 : index
  %dim_20 = tensor.dim %padded, %c1_19 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_21 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_21]
  %padded_22 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %4, %c0_23 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_24]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_25 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_22, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_22, %c1_28 : tensor<?x?xi8>
  %c0_30 = arith.constant 0 : index
  %dim_31 = tensor.dim %padded_22, %c0_30 : tensor<?x?xi8>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %padded_22, %c1_32 : tensor<?x?xi8>
  %c0_34 = arith.constant 0 : index
  %dim_35 = tensor.dim %padded_22, %c0_34 : tensor<?x?xi8>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %padded_22, %c1_36 : tensor<?x?xi8>
  %c0_38 = arith.constant 0 : index
  %dim_39 = tensor.dim %padded_22, %c0_38 : tensor<?x?xi8>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %padded_22, %c1_40 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_22 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_42 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_42]
  %padded_43 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_44 = arith.constant 0 : index
  %dim_45 = tensor.dim %6, %c0_44 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_45]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_46 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_46 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %padded_43, %c0_47 : tensor<?x?xi32>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %padded_43, %c1_49 : tensor<?x?xi32>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %padded_43, %c0_51 : tensor<?x?xi32>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %padded_43, %c1_53 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_43 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_55 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_56 = arith.constant 1 : index
  %dim_57 = tensor.dim %22, %c1_56 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_57) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_58 = arith.constant 0 : index
  %c0_59 = arith.constant 0 : index
  %dim_60 = tensor.dim %12, %c0_59 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_61 = arith.constant 1 : index
  %dim_62 = tensor.dim %12, %c1_61 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_63 = arith.constant 0 : index
  %dim_64 = tensor.dim %16, %c0_63 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_65 = arith.constant 1 : index
  %dim_66 = tensor.dim %16, %c1_65 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_67 = arith.constant 0 : index
  %dim_68 = tensor.dim %21, %c0_67 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_69 = arith.constant 1 : index
  %dim_70 = tensor.dim %21, %c1_69 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_71 = tensor.dim %23, %c0_58 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_72 = arith.constant 1 : index
  %c0_73 = arith.constant 0 : index
  %dim_74 = tensor.dim %12, %c0_73 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_75 = arith.constant 1 : index
  %dim_76 = tensor.dim %12, %c1_75 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_77 = arith.constant 0 : index
  %dim_78 = tensor.dim %16, %c0_77 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_79 = arith.constant 1 : index
  %dim_80 = tensor.dim %16, %c1_79 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_81 = arith.constant 0 : index
  %dim_82 = tensor.dim %21, %c0_81 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_83 = arith.constant 1 : index
  %dim_84 = tensor.dim %21, %c1_83 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_85 = tensor.dim %23, %c1_72 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_71, %dim_85) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_55, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edd4390) {
          %116 = "arith.constant"() <{value = 0 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edd4430) {
          %117 = "tensor.dim"(%85, %116) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd5370)
            ** Insert  : 'tensor.dim'(0x58bb0edd50d0)
            ** Insert  : 'arith.constant'(0x58bb0edc8700)
            ** Insert  : 'tensor.dim'(0x58bb0edd5180)
            ** Replace : 'tensor.dim'(0x58bb0edd4430)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd5370) {
              %85 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd50d0) {
              %86 = "tensor.dim"(%70, %85) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edc8700) {
              %87 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd5180) {
              %88 = "tensor.dim"(%70, %87) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %c0_17 = arith.constant 0 : index
  %dim_18 = tensor.dim %padded, %c0_17 : tensor<?x?xi8>
  %c1_19 = arith.constant 1 : index
  %dim_20 = tensor.dim %padded, %c1_19 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_21 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_21]
  %padded_22 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %4, %c0_23 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_24]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_25 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_22, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_22, %c1_28 : tensor<?x?xi8>
  %c0_30 = arith.constant 0 : index
  %dim_31 = tensor.dim %padded_22, %c0_30 : tensor<?x?xi8>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %padded_22, %c1_32 : tensor<?x?xi8>
  %c0_34 = arith.constant 0 : index
  %dim_35 = tensor.dim %padded_22, %c0_34 : tensor<?x?xi8>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %padded_22, %c1_36 : tensor<?x?xi8>
  %c0_38 = arith.constant 0 : index
  %dim_39 = tensor.dim %padded_22, %c0_38 : tensor<?x?xi8>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %padded_22, %c1_40 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_22 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_42 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_42]
  %padded_43 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_44 = arith.constant 0 : index
  %dim_45 = tensor.dim %6, %c0_44 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_45]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_46 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_46 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %padded_43, %c0_47 : tensor<?x?xi32>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %padded_43, %c1_49 : tensor<?x?xi32>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %padded_43, %c0_51 : tensor<?x?xi32>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %padded_43, %c1_53 : tensor<?x?xi32>
  %c0_55 = arith.constant 0 : index
  %dim_56 = tensor.dim %padded_43, %c0_55 : tensor<?x?xi32>
  %c1_57 = arith.constant 1 : index
  %dim_58 = tensor.dim %padded_43, %c1_57 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_43 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_59 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_60 = arith.constant 1 : index
  %dim_61 = tensor.dim %22, %c1_60 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_61) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_62 = arith.constant 0 : index
  %c0_63 = arith.constant 0 : index
  %dim_64 = tensor.dim %12, %c0_63 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_65 = arith.constant 1 : index
  %dim_66 = tensor.dim %12, %c1_65 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_67 = arith.constant 0 : index
  %dim_68 = tensor.dim %16, %c0_67 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_69 = arith.constant 1 : index
  %dim_70 = tensor.dim %16, %c1_69 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_71 = arith.constant 0 : index
  %dim_72 = tensor.dim %21, %c0_71 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_73 = arith.constant 1 : index
  %dim_74 = tensor.dim %21, %c1_73 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_75 = tensor.dim %23, %c0_62 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_76 = arith.constant 1 : index
  %c0_77 = arith.constant 0 : index
  %dim_78 = tensor.dim %12, %c0_77 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_79 = arith.constant 1 : index
  %dim_80 = tensor.dim %12, %c1_79 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_81 = arith.constant 0 : index
  %dim_82 = tensor.dim %16, %c0_81 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_83 = arith.constant 1 : index
  %dim_84 = tensor.dim %16, %c1_83 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_85 = arith.constant 0 : index
  %dim_86 = tensor.dim %21, %c0_85 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_87 = arith.constant 1 : index
  %dim_88 = tensor.dim %21, %c1_87 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_89 = tensor.dim %23, %c1_76 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_75, %dim_89) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_59, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'arith.constant'(0x58bb0edd4510) {
          %122 = "arith.constant"() <{value = 1 : index}> : () -> index

        } -> SUCCESS : operation marked legal by the target
        //===-------------------------------------------===//

        //===-------------------------------------------===//
        Legalizing operation : 'tensor.dim'(0x58bb0edd45b0) {
          %123 = "tensor.dim"(%89, %122) : (tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, index) -> index

          * Fold {
          } -> FAILURE : unable to fold

          * Pattern : 'tensor.dim -> ()' {
Trying to match "(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>"
            ** Insert  : 'arith.constant'(0x58bb0edd68d0)
            ** Insert  : 'tensor.dim'(0x58bb0edd4a50)
            ** Insert  : 'arith.constant'(0x58bb0edd3290)
            ** Insert  : 'tensor.dim'(0x58bb0edd4b00)
            ** Replace : 'tensor.dim'(0x58bb0edd45b0)
"(anonymous namespace)::DimOfReifyRankedShapedTypeOpInterface<mlir::tensor::DimOp>" result 1

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd68d0) {
              %89 = "arith.constant"() <{value = 0 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd4a50) {
              %90 = "tensor.dim"(%70, %89) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'arith.constant'(0x58bb0edd3290) {
              %91 = "arith.constant"() <{value = 1 : index}> : () -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//

            //===-------------------------------------------===//
            Legalizing operation : 'tensor.dim'(0x58bb0edd4b00) {
              %92 = "tensor.dim"(%70, %91) : (tensor<?x?xi32>, index) -> index

            } -> SUCCESS : operation marked legal by the target
            //===-------------------------------------------===//
          } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %c0_17 = arith.constant 0 : index
  %dim_18 = tensor.dim %padded, %c0_17 : tensor<?x?xi8>
  %c1_19 = arith.constant 1 : index
  %dim_20 = tensor.dim %padded, %c1_19 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_21 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_21]
  %padded_22 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %4, %c0_23 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_24]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_25 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_22, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_22, %c1_28 : tensor<?x?xi8>
  %c0_30 = arith.constant 0 : index
  %dim_31 = tensor.dim %padded_22, %c0_30 : tensor<?x?xi8>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %padded_22, %c1_32 : tensor<?x?xi8>
  %c0_34 = arith.constant 0 : index
  %dim_35 = tensor.dim %padded_22, %c0_34 : tensor<?x?xi8>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %padded_22, %c1_36 : tensor<?x?xi8>
  %c0_38 = arith.constant 0 : index
  %dim_39 = tensor.dim %padded_22, %c0_38 : tensor<?x?xi8>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %padded_22, %c1_40 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_22 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_42 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_42]
  %padded_43 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_44 = arith.constant 0 : index
  %dim_45 = tensor.dim %6, %c0_44 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_45]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_46 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_46 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %padded_43, %c0_47 : tensor<?x?xi32>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %padded_43, %c1_49 : tensor<?x?xi32>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %padded_43, %c0_51 : tensor<?x?xi32>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %padded_43, %c1_53 : tensor<?x?xi32>
  %c0_55 = arith.constant 0 : index
  %dim_56 = tensor.dim %padded_43, %c0_55 : tensor<?x?xi32>
  %c1_57 = arith.constant 1 : index
  %dim_58 = tensor.dim %padded_43, %c1_57 : tensor<?x?xi32>
  %c0_59 = arith.constant 0 : index
  %dim_60 = tensor.dim %padded_43, %c0_59 : tensor<?x?xi32>
  %c1_61 = arith.constant 1 : index
  %dim_62 = tensor.dim %padded_43, %c1_61 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_43 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_63 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_64 = arith.constant 1 : index
  %dim_65 = tensor.dim %22, %c1_64 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_65) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_66 = arith.constant 0 : index
  %c0_67 = arith.constant 0 : index
  %dim_68 = tensor.dim %12, %c0_67 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_69 = arith.constant 1 : index
  %dim_70 = tensor.dim %12, %c1_69 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_71 = arith.constant 0 : index
  %dim_72 = tensor.dim %16, %c0_71 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_73 = arith.constant 1 : index
  %dim_74 = tensor.dim %16, %c1_73 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_75 = arith.constant 0 : index
  %dim_76 = tensor.dim %21, %c0_75 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_77 = arith.constant 1 : index
  %dim_78 = tensor.dim %21, %c1_77 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_79 = tensor.dim %23, %c0_66 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_80 = arith.constant 1 : index
  %c0_81 = arith.constant 0 : index
  %dim_82 = tensor.dim %12, %c0_81 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_83 = arith.constant 1 : index
  %dim_84 = tensor.dim %12, %c1_83 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_85 = arith.constant 0 : index
  %dim_86 = tensor.dim %16, %c0_85 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_87 = arith.constant 1 : index
  %dim_88 = tensor.dim %16, %c1_87 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_89 = arith.constant 0 : index
  %dim_90 = tensor.dim %21, %c0_89 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_91 = arith.constant 1 : index
  %dim_92 = tensor.dim %21, %c1_91 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_93 = tensor.dim %23, %c1_80 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_79, %dim_93) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_63, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


        } -> SUCCESS
        //===-------------------------------------------===//
      } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %c0_17 = arith.constant 0 : index
  %dim_18 = tensor.dim %padded, %c0_17 : tensor<?x?xi8>
  %c1_19 = arith.constant 1 : index
  %dim_20 = tensor.dim %padded, %c1_19 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_21 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_21]
  %padded_22 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %4, %c0_23 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_24]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_25 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_22, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_22, %c1_28 : tensor<?x?xi8>
  %c0_30 = arith.constant 0 : index
  %dim_31 = tensor.dim %padded_22, %c0_30 : tensor<?x?xi8>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %padded_22, %c1_32 : tensor<?x?xi8>
  %c0_34 = arith.constant 0 : index
  %dim_35 = tensor.dim %padded_22, %c0_34 : tensor<?x?xi8>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %padded_22, %c1_36 : tensor<?x?xi8>
  %c0_38 = arith.constant 0 : index
  %dim_39 = tensor.dim %padded_22, %c0_38 : tensor<?x?xi8>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %padded_22, %c1_40 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_22 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_42 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_42]
  %padded_43 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_44 = arith.constant 0 : index
  %dim_45 = tensor.dim %6, %c0_44 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_45]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_46 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_46 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %padded_43, %c0_47 : tensor<?x?xi32>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %padded_43, %c1_49 : tensor<?x?xi32>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %padded_43, %c0_51 : tensor<?x?xi32>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %padded_43, %c1_53 : tensor<?x?xi32>
  %c0_55 = arith.constant 0 : index
  %dim_56 = tensor.dim %padded_43, %c0_55 : tensor<?x?xi32>
  %c1_57 = arith.constant 1 : index
  %dim_58 = tensor.dim %padded_43, %c1_57 : tensor<?x?xi32>
  %c0_59 = arith.constant 0 : index
  %dim_60 = tensor.dim %padded_43, %c0_59 : tensor<?x?xi32>
  %c1_61 = arith.constant 1 : index
  %dim_62 = tensor.dim %padded_43, %c1_61 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_43 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_63 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_64 = arith.constant 1 : index
  %dim_65 = tensor.dim %22, %c1_64 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_65) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_66 = arith.constant 0 : index
  %c0_67 = arith.constant 0 : index
  %dim_68 = tensor.dim %12, %c0_67 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_69 = arith.constant 1 : index
  %dim_70 = tensor.dim %12, %c1_69 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_71 = arith.constant 0 : index
  %dim_72 = tensor.dim %16, %c0_71 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_73 = arith.constant 1 : index
  %dim_74 = tensor.dim %16, %c1_73 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_75 = arith.constant 0 : index
  %dim_76 = tensor.dim %21, %c0_75 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_77 = arith.constant 1 : index
  %dim_78 = tensor.dim %21, %c1_77 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_79 = tensor.dim %23, %c0_66 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_80 = arith.constant 1 : index
  %c0_81 = arith.constant 0 : index
  %dim_82 = tensor.dim %12, %c0_81 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_83 = arith.constant 1 : index
  %dim_84 = tensor.dim %12, %c1_83 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_85 = arith.constant 0 : index
  %dim_86 = tensor.dim %16, %c0_85 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_87 = arith.constant 1 : index
  %dim_88 = tensor.dim %16, %c1_87 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_89 = arith.constant 0 : index
  %dim_90 = tensor.dim %21, %c0_89 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_91 = arith.constant 1 : index
  %dim_92 = tensor.dim %21, %c1_91 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_93 = tensor.dim %23, %c1_80 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_79, %dim_93) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_63, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


    } -> SUCCESS
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.empty'(0x58bb0edc5fb0) {
      %129 = "tensor.empty"(%114, %128) : (index, index) -> tensor<?x?xi32>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//

    //===-------------------------------------------===//
    Legalizing operation : 'tensor.unpack'(0x58bb0edce940) {
      %130 = "tensor.unpack"(%100, %129) <{inner_dims_pos = array<i64: 0, 1>, outer_dims_perm = array<i64: 0, 1>, static_inner_tiles = array<i64: 16, 4>}> : (tensor<?x1x16x4xi32>, tensor<?x?xi32>) -> tensor<?x?xi32>

    } -> SUCCESS : operation marked legal by the target
    //===-------------------------------------------===//
  } -> SUCCESS : pattern applied successfully
// *** IR Dump After Pattern Application ***
util.func public @matmul_n4_i8(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_n4_i8(%input0: tensor<?x?xi8>, %input1: tensor<?x4xi8>, %input2: tensor<?x4xi32>) -> (%output0: tensor<?x4xi32>)"}} {
  %c2 = arith.constant 2 : index
  %c16 = arith.constant 16 : index
  %c0_i32 = arith.constant 0 : i32
  %c0_i8 = arith.constant 0 : i8
  %c1 = arith.constant 1 : index
  %c0 = arith.constant 0 : index
  %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
  %1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
  %2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?xi8>{%0, %1}
  %3 = hal.buffer_view.dim<%arg1 : !hal.buffer_view>[0] : index
  %4 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<?x4xi8>{%3}
  %5 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
  %6 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x4xi32>{%5}
  %dim = tensor.dim %2, %c0 : tensor<?x?xi8>
  %7 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim]
  %dim_0 = tensor.dim %2, %c1 : tensor<?x?xi8>
  %8 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_0]
  %padded = tensor.pad %2 low[0, 0] high[%7, %8] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x?xi8> to tensor<?x?xi8>
  %c0_1 = arith.constant 0 : index
  %dim_2 = tensor.dim %2, %c0_1 : tensor<?x?xi8>
  %c1_3 = arith.constant 1 : index
  %dim_4 = tensor.dim %2, %c1_3 : tensor<?x?xi8>
  %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_2]
  %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_4]
  %11 = tensor.empty(%9, %10) : tensor<?x?x16x2xi8>
  %pack = tensor.pack %2 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %11 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
  %c0_5 = arith.constant 0 : index
  %dim_6 = tensor.dim %padded, %c0_5 : tensor<?x?xi8>
  %c1_7 = arith.constant 1 : index
  %dim_8 = tensor.dim %padded, %c1_7 : tensor<?x?xi8>
  %c0_9 = arith.constant 0 : index
  %dim_10 = tensor.dim %padded, %c0_9 : tensor<?x?xi8>
  %c1_11 = arith.constant 1 : index
  %dim_12 = tensor.dim %padded, %c1_11 : tensor<?x?xi8>
  %c0_13 = arith.constant 0 : index
  %dim_14 = tensor.dim %padded, %c0_13 : tensor<?x?xi8>
  %c1_15 = arith.constant 1 : index
  %dim_16 = tensor.dim %padded, %c1_15 : tensor<?x?xi8>
  %c0_17 = arith.constant 0 : index
  %dim_18 = tensor.dim %padded, %c0_17 : tensor<?x?xi8>
  %c1_19 = arith.constant 1 : index
  %dim_20 = tensor.dim %padded, %c1_19 : tensor<?x?xi8>
  %12 = iree_linalg_ext.set_encoding %padded : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_21 = tensor.dim %4, %c0 : tensor<?x4xi8>
  %13 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c2, %dim_21]
  %padded_22 = tensor.pad %4 low[0, 0] high[%13, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i8 : i8
  } : tensor<?x4xi8> to tensor<?x?xi8>
  %c0_23 = arith.constant 0 : index
  %dim_24 = tensor.dim %4, %c0_23 : tensor<?x4xi8>
  %14 = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%dim_24]
  %15 = tensor.empty(%14) : tensor<1x?x4x2xi8>
  %pack_25 = tensor.pack %4 padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 2] into %15 : tensor<?x4xi8> -> tensor<1x?x4x2xi8>
  %c0_26 = arith.constant 0 : index
  %dim_27 = tensor.dim %padded_22, %c0_26 : tensor<?x?xi8>
  %c1_28 = arith.constant 1 : index
  %dim_29 = tensor.dim %padded_22, %c1_28 : tensor<?x?xi8>
  %c0_30 = arith.constant 0 : index
  %dim_31 = tensor.dim %padded_22, %c0_30 : tensor<?x?xi8>
  %c1_32 = arith.constant 1 : index
  %dim_33 = tensor.dim %padded_22, %c1_32 : tensor<?x?xi8>
  %c0_34 = arith.constant 0 : index
  %dim_35 = tensor.dim %padded_22, %c0_34 : tensor<?x?xi8>
  %c1_36 = arith.constant 1 : index
  %dim_37 = tensor.dim %padded_22, %c1_36 : tensor<?x?xi8>
  %c0_38 = arith.constant 0 : index
  %dim_39 = tensor.dim %padded_22, %c0_38 : tensor<?x?xi8>
  %c1_40 = arith.constant 1 : index
  %dim_41 = tensor.dim %padded_22, %c1_40 : tensor<?x?xi8>
  %16 = iree_linalg_ext.set_encoding %padded_22 : tensor<?x?xi8> -> tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_42 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %17 = affine.apply affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>()[%c16, %dim_42]
  %padded_43 = tensor.pad %6 low[0, 0] high[%17, %c0] {
  ^bb0(%arg3: index, %arg4: index):
    tensor.yield %c0_i32 : i32
  } : tensor<?x4xi32> to tensor<?x?xi32>
  %c0_44 = arith.constant 0 : index
  %dim_45 = tensor.dim %6, %c0_44 : tensor<?x4xi32>
  %18 = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%dim_45]
  %19 = tensor.empty(%18) : tensor<?x1x16x4xi32>
  %pack_46 = tensor.pack %6 padding_value(%c0_i32 : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %19 : tensor<?x4xi32> -> tensor<?x1x16x4xi32>
  %20 = builtin.unrealized_conversion_cast %pack_46 : tensor<?x1x16x4xi32> to tensor<1x?x4x16xi32>
  %c0_47 = arith.constant 0 : index
  %dim_48 = tensor.dim %padded_43, %c0_47 : tensor<?x?xi32>
  %c1_49 = arith.constant 1 : index
  %dim_50 = tensor.dim %padded_43, %c1_49 : tensor<?x?xi32>
  %c0_51 = arith.constant 0 : index
  %dim_52 = tensor.dim %padded_43, %c0_51 : tensor<?x?xi32>
  %c1_53 = arith.constant 1 : index
  %dim_54 = tensor.dim %padded_43, %c1_53 : tensor<?x?xi32>
  %c0_55 = arith.constant 0 : index
  %dim_56 = tensor.dim %padded_43, %c0_55 : tensor<?x?xi32>
  %c1_57 = arith.constant 1 : index
  %dim_58 = tensor.dim %padded_43, %c1_57 : tensor<?x?xi32>
  %c0_59 = arith.constant 0 : index
  %dim_60 = tensor.dim %padded_43, %c0_59 : tensor<?x?xi32>
  %c1_61 = arith.constant 1 : index
  %dim_62 = tensor.dim %padded_43, %c1_61 : tensor<?x?xi32>
  %21 = iree_linalg_ext.set_encoding %padded_43 : tensor<?x?xi32> -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
  %23 = linalg.matmul ins(%12, %16 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>, tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) outs(%21 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>) -> tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_63 = tensor.dim %6, %c0 : tensor<?x4xi32>
  %c1_64 = arith.constant 1 : index
  %dim_65 = tensor.dim %22, %c1_64 : tensor<1x?x4x16xi32>
  %24 = tensor.empty(%dim_65) : tensor<?x1x16x4xi32>
  %transposed = linalg.transpose ins(%22 : tensor<1x?x4x16xi32>) outs(%24 : tensor<?x1x16x4xi32>) permutation = [1, 0, 3, 2]
  %c0_66 = arith.constant 0 : index
  %c0_67 = arith.constant 0 : index
  %dim_68 = tensor.dim %12, %c0_67 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_69 = arith.constant 1 : index
  %dim_70 = tensor.dim %12, %c1_69 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_71 = arith.constant 0 : index
  %dim_72 = tensor.dim %16, %c0_71 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_73 = arith.constant 1 : index
  %dim_74 = tensor.dim %16, %c1_73 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_75 = arith.constant 0 : index
  %dim_76 = tensor.dim %21, %c0_75 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_77 = arith.constant 1 : index
  %dim_78 = tensor.dim %21, %c1_77 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_79 = tensor.dim %23, %c0_66 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_80 = arith.constant 1 : index
  %c0_81 = arith.constant 0 : index
  %dim_82 = tensor.dim %12, %c0_81 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_83 = arith.constant 1 : index
  %dim_84 = tensor.dim %12, %c1_83 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  LHS, element_types = [i8, i8, i32], matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_85 = arith.constant 0 : index
  %dim_86 = tensor.dim %16, %c0_85 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_87 = arith.constant 1 : index
  %dim_88 = tensor.dim %16, %c1_87 : tensor<?x?xi8, #iree_linalg_ext.encoding<role =  RHS, element_types = [i8, i8, i32], original_type = tensor<?x4xi8>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c0_89 = arith.constant 0 : index
  %dim_90 = tensor.dim %21, %c0_89 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %c1_91 = arith.constant 1 : index
  %dim_92 = tensor.dim %21, %c1_91 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %dim_93 = tensor.dim %23, %c1_80 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>>
  %25 = tensor.empty(%dim_79, %dim_93) : tensor<?x?xi32>
  %unpack = tensor.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %25 : tensor<?x1x16x4xi32> -> tensor<?x?xi32>
  %26 = iree_linalg_ext.unset_encoding %23 : tensor<?x?xi32, #iree_linalg_ext.encoding<role =  RESULT, element_types = [i8, i8, i32], original_type = tensor<?x4xi32>, matmul_narrow_N = 4 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>> -> tensor<?x?xi32>
  %extracted_slice = tensor.extract_slice %26[0, 0] [%dim_63, 4] [1, 1] : tensor<?x?xi32> to tensor<?x4xi32>
  %27 = hal.tensor.export %extracted_slice "output0" : tensor<?x4xi32>{%5} -> !hal.buffer_view
  util.return %27 : !hal.buffer_view
}


} -> SUCCESS
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'tensor.extract_slice'(0x58bb0ed99810) {
  %132 = "tensor.extract_slice"(%131, %96) <{operandSegmentSizes = array<i32: 1, 0, 1, 0>, static_offsets = array<i64: 0, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: 1, 1>}> : (tensor<?x?xi32>, index) -> tensor<?x4xi32>

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'hal.tensor.export'(0x58bb0e381750) {
  %133 = "hal.tensor.export"(%132, %11) {name = "output0", operandSegmentSizes = array<i32: 1, 1, 0>, source_encoding = tensor<?x4xi32>} : (tensor<?x4xi32>, index) -> !hal.buffer_view

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//

//===-------------------------------------------===//
Legalizing operation : 'util.return'(0x58bb0eda3240) {
  "util.return"(%133) : (!hal.buffer_view) -> ()

} -> SUCCESS : operation marked legal by the target
//===-------------------------------------------===//
/home/benoit/matmul_n4_i8.mlir:2:13: error: failed to legalize unresolved materialization from 'tensor<?x1x16x4xi32>' to 'tensor<1x?x4x16xi32>' that remained live after conversion
  %result = linalg.matmul ins(%lhs, %rhs: tensor<?x?xi8>, tensor<?x4xi8>) outs(%acc: tensor<?x4xi32>) -> tensor<?x4xi32>
            ^
/home/benoit/matmul_n4_i8.mlir:1:1: note: called from
func.func @matmul_n4_i8(%lhs: tensor<?x?xi8>, %rhs: tensor<?x4xi8>, %acc: tensor<?x4xi32>) -> tensor<?x4xi32> {
^
/home/benoit/matmul_n4_i8.mlir:2:13: note: see current operation: %76 = "builtin.unrealized_conversion_cast"(%75) : (tensor<?x1x16x4xi32>) -> tensor<1x?x4x16xi32>
  %result = linalg.matmul ins(%lhs, %rhs: tensor<?x?xi8>, tensor<?x4xi8>) outs(%acc: tensor<?x4xi32>) -> tensor<?x4xi32>
            ^
/home/benoit/matmul_n4_i8.mlir:2:13: note: see existing live user here: %22 = linalg.mmt4d ins(%pack_25, %pack : tensor<1x?x4x2xi8>, tensor<?x?x16x2xi8>) outs(%20 : tensor<1x?x4x16xi32>) -> tensor<1x?x4x16xi32>
iree-compile: /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/UseDefLists.h:198: mlir::IRObjectWithUseList<mlir::OpOperand>::~IRObjectWithUseList() [OperandType = mlir::OpOperand]: Assertion `use_empty() && "Cannot destroy a value that still has uses!"' failed.
Please report issues to https://github.com/openxla/iree/issues and include the crash backtrace.
Stack dump:
0.	Program arguments: tools/iree-compile --iree-llvmcpu-link-embedded=false --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu=znver4 --iree-llvmcpu-enable-ukernels=all /home/benoit/matmul_n4_i8.mlir -o /tmp/a.vmfb --iree-hal-dump-executable-intermediates-to=/tmp -mlir-disable-threading -mlir-print-ir-before-all -mlir-print-ir-after-all -debug
 #0 0x000078bda34d21b1 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/benoit/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:11
 #1 0x000078bda34d26ab PrintStackTraceSignalHandler(void*) /home/benoit/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1
 #2 0x000078bda34d06a6 llvm::sys::RunSignalHandlers() /home/benoit/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:105:5
 #3 0x000078bda34d2e45 SignalHandler(int) /home/benoit/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #4 0x000078bd99242990 (/lib/x86_64-linux-gnu/libc.so.6+0x42990)
 #5 0x000078bd99299a1b __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #6 0x000078bd99299a1b __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #7 0x000078bd99299a1b pthread_kill ./nptl/pthread_kill.c:89:10
 #8 0x000078bd992428e6 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #9 0x000078bd992268b7 abort ./stdlib/abort.c:81:7
#10 0x000078bd992267db _nl_load_domain ./intl/loadmsgcat.c:1177:9
#11 0x000078bd99239206 (/lib/x86_64-linux-gnu/libc.so.6+0x39206)
#12 0x000078bda35a0568 mlir::IRObjectWithUseList<mlir::OpOperand>::~IRObjectWithUseList() /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/UseDefLists.h:199:3
#13 0x000078bda35a04f5 mlir::detail::ValueImpl::~ValueImpl() /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/Value.h:40:18
#14 0x000078bda35a04d5 mlir::detail::BlockArgumentImpl::~BlockArgumentImpl() /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/Value.h:289:7
#15 0x000078bda359f17c mlir::BlockArgument::destroy() /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/Value.h:341:20
#16 0x000078bda359d289 mlir::Block::~Block() /home/benoit/iree/third_party/llvm-project/mlir/lib/IR/Block.cpp:22:26
#17 0x000078bdaa433845 (anonymous namespace)::CreateBlockRewrite::rollback() /home/benoit/iree/third_party/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:286:7
#18 0x000078bdaa4139a5 mlir::detail::ConversionPatternRewriterImpl::undoRewrites(unsigned int) /home/benoit/iree/third_party/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:1224:22
#19 0x000078bdaa418ff4 mlir::OperationConverter::convertOperations(llvm::ArrayRef<mlir::Operation*>) /home/benoit/iree/third_party/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:2628:41
#20 0x000078bdaa41db39 mlir::applyPartialConversion(llvm::ArrayRef<mlir::Operation*>, mlir::ConversionTarget const&, mlir::FrozenRewritePatternSet const&, mlir::ConversionConfig) /home/benoit/iree/third_party/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:3610:22
#21 0x000078bdaa41dc3d mlir::applyPartialConversion(mlir::Operation*, mlir::ConversionTarget const&, mlir::FrozenRewritePatternSet const&, mlir::ConversionConfig) /home/benoit/iree/third_party/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:3616:10
#22 0x000078bda7f67f7b mlir::iree_compiler::CPUMaterializeEncodingPass::runOnOperation() /home/benoit/iree/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodingPass.cpp:561:14
#23 0x000078bda38c44fb mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1::operator()() const /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:0:17
#24 0x000078bda38c4495 void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1>(long) /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:5
#25 0x000078bda33df229 llvm::function_ref<void ()>::operator()() const /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:5
#26 0x000078bda38c7135 void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3
#27 0x000078bda38bfc03 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:525:17
#28 0x000078bda38c0184 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:585:16
#29 0x000078bda38c104e mlir::detail::OpToOpPassAdaptor::runOnOperationImpl(bool) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:726:20
#30 0x000078bda38c08ad mlir::detail::OpToOpPassAdaptor::runOnOperation(bool) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:709:1
#31 0x000078bda38c44e6 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1::operator()() const /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:517:11
#32 0x000078bda38c4495 void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1>(long) /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:5
#33 0x000078bda33df229 llvm::function_ref<void ()>::operator()() const /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:5
#34 0x000078bda38c7135 void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3
#35 0x000078bda38bfc03 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:525:17
#36 0x000078bda38c0184 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:585:16
#37 0x000078bda38c4471 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_0::operator()(mlir::OpPassManager&, mlir::Operation*) const /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:502:12
#38 0x000078bda38c41e5 mlir::LogicalResult llvm::function_ref<mlir::LogicalResult (mlir::OpPassManager&, mlir::Operation*)>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_0>(long, mlir::OpPassManager&, mlir::Operation*) /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:12
#39 0x000078bda37fd749 llvm::function_ref<mlir::LogicalResult (mlir::OpPassManager&, mlir::Operation*)>::operator()(mlir::OpPassManager&, mlir::Operation*) const /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:12
#40 0x000078bda37f6105 mlir::Pass::runPipeline(mlir::OpPassManager&, mlir::Operation*) /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/Pass/Pass.h:199:12
#41 0x000078bda5b72f88 mlir::iree_compiler::GlobalOptimization::MaterializeHomogeneousEncodingsPass::runOnOperation() /home/benoit/iree/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp:74:16
#42 0x000078bda38c44fb mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1::operator()() const /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:0:17
#43 0x000078bda38c4495 void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1>(long) /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:5
#44 0x000078bda33df229 llvm::function_ref<void ()>::operator()() const /home/benoit/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:5
#45 0x000078bda38c7135 void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /home/benoit/iree/third_party/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3
#46 0x000078bda38bfc03 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:525:17
#47 0x000078bda38c0184 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:585:16
#48 0x000078bda38c1cc8 mlir::PassManager::runPasses(mlir::Operation*, mlir::AnalysisManager) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:896:10
#49 0x000078bda38c1bf2 mlir::PassManager::run(mlir::Operation*) /home/benoit/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:876:60
#50 0x000078bda332db01 mlir::iree_compiler::embed::(anonymous namespace)::Invocation::runPipeline(iree_compiler_pipeline_t) /home/benoit/iree/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp:961:27
#51 0x000078bda332d423 ireeCompilerInvocationPipeline /home/benoit/iree/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp:1391:3
#52 0x000078bda38540a0 mlir::iree_compiler::runIreecMain(int, char**)::$_2::operator()(iree_compiler_source_t*) const /home/benoit/iree/compiler/src/iree/compiler/Tools/iree_compile_lib.cc:247:11
#53 0x000078bda3853569 mlir::iree_compiler::runIreecMain(int, char**) /home/benoit/iree/compiler/src/iree/compiler/Tools/iree_compile_lib.cc:348:9
#54 0x000078bda337725b ireeCompilerRunMain /home/benoit/iree/compiler/src/iree/compiler/API/Internal/IREECompileToolEntryPoint.cpp:12:3
#55 0x000058bb0d2697a2 main /home/benoit/iree/tools/iree-compile-main.cc:9:35
#56 0x000078bd99228150 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#57 0x000078bd99228209 call_init ./csu/../csu/libc-start.c:128:20
#58 0x000078bd99228209 __libc_start_main ./csu/../csu/libc-start.c:347:5
#59 0x000058bb0d2696b5 _start (tools/iree-compile+0x16b5)