Benoit Jacob bjacob

## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bjacob
                / README.md
            
            
              Created
              April 23, 2024 16:02
            
              
                ConvertToDestinationPassingStyle issue
              
          
    Repro:
tools/iree-opt /tmp/a.mlir --pass-pipeline="builtin.module(func.func(iree-codegen-convert-to-destination-passing-style),canonicalize,cse)"

Result:
The tensor.pack output-operand is no longer empty.

  
## README.md

      
              1 file
            
          
              0 forks
            
          
              4 comments
            
          
              8 stars
            
          
                bjacob
                / README.md
            
            
              Last active
              April 19, 2024 02:31
            
              
                IREE / MLIR / Linalg tutorial
              
          
    IREE/MLIR/Linalg tutorial

Introduction

This tutorial is simultaneously about IREE, MLIR, and specifically the MLIR Linalg dialect.
What is MLIR?

MLIR is a programming language, but MLIR in itself is almost just an empty shell. What it really provides is a framework allowing to define MLIR dialects which are where the features come from.

  
## a.diff
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 002c34f425..e4dd03268f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -1506,7 +1506,8 @@ static void setX86VectorTileSizes(linalg::GenericOp genericOp,
                                   SmallVectorImpl<int64_t> &vecTileSizes) {
   vecTileSizes.append(numLoops, 0);
   SmallVector<int64_t> staticLoopRanges = genericOp.getStaticLoopRanges();
-  for (auto loopNum : llvm::seq<unsigned>(0, numLoops)) {
+  int64_t maxRemainingVecTileSize = 1024;

## turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack.s
	.section	.text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,"ax",@progbits
	.p2align	4, 0x90
	.type	turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,@function
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack:
.Lfunc_begin1:
	.loc	1 1 0 is_stmt 1
	.cfi_startproc
	push	rbp
	.cfi_def_cfa_offset 16
	.cfi_offset rbp, -16

## debug.mlir
Args: tools/iree-compile --iree-llvmcpu-link-embedded=false --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu=znver4 --iree-llvmcpu-enable-ukernels=all /home/benoit/matmul_n4_i8.mlir -o /tmp/a.vmfb --iree-hal-dump-executable-intermediates-to=/tmp -mlir-disable-threading -mlir-print-ir-before-all -mlir-print-ir-after-all -debug
Load new dialect in Context builtin
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DistinctAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface)

## syslog.txt
2024-03-20T09:54:17.450726-04:00 hocher kernel: [    4.223485] [drm] amdgpu kernel modesetting enabled.
2024-03-20T09:54:17.450726-04:00 hocher kernel: [    4.223605] amdgpu: CRAT table disabled by module option
2024-03-20T09:54:17.450727-04:00 hocher kernel: [    4.223607] amdgpu: Virtual CRAT table created for CPU
2024-03-20T09:54:17.450727-04:00 hocher kernel: [    4.223616] amdgpu: Topology: Add CPU node
2024-03-20T09:54:17.450727-04:00 hocher kernel: [    4.223695] amdgpu 0000:03:00.0: enabling device (0006 -> 0007)
2024-03-20T09:54:17.450726-04:00 hocher sbkeysync[1334]:     e36dfc719d2114c2e39aea88849e2845ab326f6f7fe74e0e539b7e54d81f3631
2024-03-20T09:54:17.450727-04:00 hocher kernel: [    4.223756] [drm] initializing kernel modesetting (VEGA10 0x1002:0x687F 0x1462:0x3681 0xC3).
2024-03-20T09:54:17.450728-04:00 hocher kernel: [    4.223764] [drm] register mmio base: 0xF6C00000
2024-03-20T09:54:17.450728-04:00 hocher kernel: [    4.223765] [drm] register mmio size: 524288
2024-03-20T09:54:17.450728-04:0

## gist:31fdeb0902a42461d5cca6902aef6080
NameLoc: main
NameLoc: MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6
NameLoc: MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6

## a.diff
diff --git a/home/benoit/iree-build/llvm-project/tools/mlir/include/mlir/IR/BuiltinDialectBytecode.cpp.inc b/tmp/BuiltinDialectBytecode.cpp.inc
index 26072dbe9c8f..015598623db4 100644
--- a/home/benoit/iree-build/llvm-project/tools/mlir/include/mlir/IR/BuiltinDialectBytecode.cpp.inc
+++ b/tmp/BuiltinDialectBytecode.cpp.inc
@@ -355,6 +355,27 @@ static void write(UnknownLoc attribute, DialectBytecodeWriter &writer) {
   writer.writeVarInt(/* UnknownLoc */ 15);
 }

+namespace {
+struct Logger {

## README.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bjacob
                / README.md
            
            
              Last active
              March 18, 2024 13:48
            
              
                Download, compile and run OPT-1.3b on CPU with IREE
              
          
    Download

Download the MLIR model code without parameters (so that's lightweight and will compile fast):
https://storage.googleapis.com/shark_tank/elias/facebook_opt_1.3b.mlir
Download the parameters:
https://storage.googleapis.com/shark_tank/elias/facebook_opt_1.3b_weights.irpa
The below command lines assumes that these have been downloaded under $HOME/testing.

  
## e2e_matmul_cpu_dt_uk_i8_i32_small_llvm-cpu_local-task_calls.mlir
builtin.module @calls attributes {

} {

func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view
func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)

func.func private @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
func.func private @module.matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
func.func private @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view
	diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
	index 002c34f425..e4dd03268f 100644
	--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
	+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
	@@ -1506,7 +1506,8 @@ static void setX86VectorTileSizes(linalg::GenericOp genericOp,
	SmallVectorImpl<int64_t> &vecTileSizes) {
	vecTileSizes.append(numLoops, 0);
	SmallVector<int64_t> staticLoopRanges = genericOp.getStaticLoopRanges();
	- for (auto loopNum : llvm::seq<unsigned>(0, numLoops)) {
	+ int64_t maxRemainingVecTileSize = 1024;
	.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,"ax",@progbits
	.p2align 4, 0x90
	.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,@function
	turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack:
	.Lfunc_begin1:
	.loc 1 1 0 is_stmt 1
	.cfi_startproc
	push rbp
	.cfi_def_cfa_offset 16
	.cfi_offset rbp, -16
	Args: tools/iree-compile --iree-llvmcpu-link-embedded=false --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu=znver4 --iree-llvmcpu-enable-ukernels=all /home/benoit/matmul_n4_i8.mlir -o /tmp/a.vmfb --iree-hal-dump-executable-intermediates-to=/tmp -mlir-disable-threading -mlir-print-ir-before-all -mlir-print-ir-after-all -debug
	Load new dialect in Context builtin
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::DistinctAttr)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeOpInterface)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface)
	2024-03-20T09:54:17.450726-04:00 hocher kernel: [ 4.223485] [drm] amdgpu kernel modesetting enabled.
	2024-03-20T09:54:17.450726-04:00 hocher kernel: [ 4.223605] amdgpu: CRAT table disabled by module option
	2024-03-20T09:54:17.450727-04:00 hocher kernel: [ 4.223607] amdgpu: Virtual CRAT table created for CPU
	2024-03-20T09:54:17.450727-04:00 hocher kernel: [ 4.223616] amdgpu: Topology: Add CPU node
	2024-03-20T09:54:17.450727-04:00 hocher kernel: [ 4.223695] amdgpu 0000:03:00.0: enabling device (0006 -> 0007)
	2024-03-20T09:54:17.450726-04:00 hocher sbkeysync[1334]: e36dfc719d2114c2e39aea88849e2845ab326f6f7fe74e0e539b7e54d81f3631
	2024-03-20T09:54:17.450727-04:00 hocher kernel: [ 4.223756] [drm] initializing kernel modesetting (VEGA10 0x1002:0x687F 0x1462:0x3681 0xC3).
	2024-03-20T09:54:17.450728-04:00 hocher kernel: [ 4.223764] [drm] register mmio base: 0xF6C00000
	2024-03-20T09:54:17.450728-04:00 hocher kernel: [ 4.223765] [drm] register mmio size: 524288
	2024-03-20T09:54:17.450728-04:0
	NameLoc: main
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6
	NameLoc: MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6
	diff --git a/home/benoit/iree-build/llvm-project/tools/mlir/include/mlir/IR/BuiltinDialectBytecode.cpp.inc b/tmp/BuiltinDialectBytecode.cpp.inc
	index 26072dbe9c8f..015598623db4 100644
	--- a/home/benoit/iree-build/llvm-project/tools/mlir/include/mlir/IR/BuiltinDialectBytecode.cpp.inc
	+++ b/tmp/BuiltinDialectBytecode.cpp.inc
	@@ -355,6 +355,27 @@ static void write(UnknownLoc attribute, DialectBytecodeWriter &writer) {
	writer.writeVarInt(/* UnknownLoc */ 15);
	}

	+namespace {
	+struct Logger {
	builtin.module @calls attributes {

	} {

	func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view
	func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)

	func.func private @module.matmul_accumulate_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
	func.func private @module.matmul_accumulate_1x1xi8_times_1x1xi8_into_1x1xi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
	func.func private @module.matmul_DYNxDYNxi8_times_DYNxDYNxi8_into_DYNxDYNxi32(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view