Skip to content

Instantly share code, notes, and snippets.

@makslevental
Last active June 7, 2024 20:33
Show Gist options
  • Save makslevental/dfc7c24cf496757fd41e2b5c7f423037 to your computer and use it in GitHub Desktop.
Save makslevental/dfc7c24cf496757fd41e2b5c7f423037 to your computer and use it in GitHub Desktop.
builtin.module {
aie.device(npu) {
%tile_0_2 = aie.tile(0, 2)
%tile_0_3 = aie.tile(0, 3)
%tile_1_2 = aie.tile(1, 2)
%tile_1_3 = aie.tile(1, 3)
%tile_0_0 = aie.tile(0, 0)
%tile_0_1 = aie.tile(0, 1)
aie.objectfifo @obj0(%tile_0_0, {%tile_0_1}, 4 : i32) : !aie.objectfifo<memref<2048xi32, 1>>
aie.objectfifo @obj1(%tile_0_0, {%tile_0_1}, 4 : i32) : !aie.objectfifo<memref<2048xi32, 1>>
aie.objectfifo @obj2(%tile_0_1 , {%tile_0_2, %tile_0_3}, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo @obj3(%tile_0_1 , {%tile_1_2, %tile_1_3}, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo.link [@obj1] -> [@obj3, @obj2]()
aie.objectfifo.link [@obj1] -> [@obj3, @obj2]()
aie.objectfifo @obj4(%tile_0_1 , {%tile_0_2, %tile_1_2}, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo @obj5(%tile_0_1 , {%tile_0_3, %tile_1_3}, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo.link [@obj0] -> [@obj5, @obj4]()
aie.objectfifo.link [@obj0] -> [@obj5, @obj4]()
aie.objectfifo @obj6(%tile_0_2, {%tile_0_1}, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo @obj7(%tile_1_2, {%tile_0_1 }, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo @obj8(%tile_0_3, {%tile_0_1 }, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo @obj9(%tile_1_3, {%tile_0_1 }, 4 : i32) : !aie.objectfifo<memref<1024xi32, 1>>
aie.objectfifo @obj10(%tile_0_1 , {%tile_0_0}, 4 : i32) : !aie.objectfifo<memref<4096xi32, 1>>
aie.objectfifo.link [@obj6, @obj7, @obj9, @obj8] -> [@obj10]()
aie.objectfifo.link [@obj6, @obj7, @obj9, @obj8] -> [@obj10]()
%core_0_2 = aie.core(%tile_0_2) {
%c8 = arith.constant 8 : index
%c7 = arith.constant 7 : index
%c1 = arith.constant 1 : index
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
scf.for %arg0 = %c0 to %c4 step %c1 {
%0 = aie.objectfifo.acquire @obj6(Produce, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast = memref.reinterpret_cast %1 to offset: [0], sizes: [1, 1, 8, 8, 4, 4], strides: [1024, 1024, 128, 16, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x8x4x4xi32, 1>
%2 = aie.objectfifo.acquire @obj2(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_0 = memref.reinterpret_cast %3 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
%4 = aie.objectfifo.acquire @obj4(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%5 = aie.objectfifo.subview.access %4[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_1 = memref.reinterpret_cast %5 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c8 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c4 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
memref.store %c0_i32, %reinterpret_cast[%arg1, %arg2, %arg3, %arg4, %arg5, %arg6] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_1[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_0[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
scf.for %arg1 = %c1 to %c7 step %c1 {
aie.objectfifo.release @obj2(Consume, 1)
%10 = aie.objectfifo.acquire @obj2(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%11 = aie.objectfifo.subview.access %10[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_4 = memref.reinterpret_cast %11 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj4(Consume, 1)
%12 = aie.objectfifo.acquire @obj4(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%13 = aie.objectfifo.subview.access %12[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_5 = memref.reinterpret_cast %13 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c1 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c8 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c4 step %c1 {
scf.for %arg10 = %c0 to %c8 step %c1 {
%14 = memref.load %reinterpret_cast_5[%arg2, %arg4, %arg7, %arg5, %arg8, %arg10] : memref<1x1x4x8x4x8xi32, 1>
%15 = memref.load %reinterpret_cast_4[%arg4, %arg3, %arg6, %arg7, %arg10, %arg9] : memref<1x1x8x4x8x4xi32, 1>
%16 = memref.load %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
%17 = arith.muli %14, %15 : i32
%18 = arith.addi %16, %17 : i32
memref.store %18, %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj2(Consume, 1)
%6 = aie.objectfifo.acquire @obj2(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%7 = aie.objectfifo.subview.access %6[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_2 = memref.reinterpret_cast %7 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj4(Consume, 1)
%8 = aie.objectfifo.acquire @obj4(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%9 = aie.objectfifo.subview.access %8[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_3 = memref.reinterpret_cast %9 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_3[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_2[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj2(Consume, 1)
aie.objectfifo.release @obj4(Consume, 1)
aie.objectfifo.release @obj6(Produce, 1)
}
aie.end
}
%core_1_2 = aie.core(%tile_1_2) {
%c8 = arith.constant 8 : index
%c7 = arith.constant 7 : index
%c1 = arith.constant 1 : index
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
scf.for %arg0 = %c0 to %c4 step %c1 {
%0 = aie.objectfifo.acquire @obj7(Produce, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast = memref.reinterpret_cast %1 to offset: [0], sizes: [1, 1, 8, 8, 4, 4], strides: [1024, 1024, 128, 16, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x8x4x4xi32, 1>
%2 = aie.objectfifo.acquire @obj3(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_0 = memref.reinterpret_cast %3 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
%4 = aie.objectfifo.acquire @obj4(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%5 = aie.objectfifo.subview.access %4[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_1 = memref.reinterpret_cast %5 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c8 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c4 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
memref.store %c0_i32, %reinterpret_cast[%arg1, %arg2, %arg3, %arg4, %arg5, %arg6] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_1[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_0[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
scf.for %arg1 = %c1 to %c7 step %c1 {
aie.objectfifo.release @obj3(Consume, 1)
%10 = aie.objectfifo.acquire @obj3(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%11 = aie.objectfifo.subview.access %10[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_4 = memref.reinterpret_cast %11 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj4(Consume, 1)
%12 = aie.objectfifo.acquire @obj4(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%13 = aie.objectfifo.subview.access %12[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_5 = memref.reinterpret_cast %13 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c1 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c8 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c4 step %c1 {
scf.for %arg10 = %c0 to %c8 step %c1 {
%14 = memref.load %reinterpret_cast_5[%arg2, %arg4, %arg7, %arg5, %arg8, %arg10] : memref<1x1x4x8x4x8xi32, 1>
%15 = memref.load %reinterpret_cast_4[%arg4, %arg3, %arg6, %arg7, %arg10, %arg9] : memref<1x1x8x4x8x4xi32, 1>
%16 = memref.load %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
%17 = arith.muli %14, %15 : i32
%18 = arith.addi %16, %17 : i32
memref.store %18, %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj3(Consume, 1)
%6 = aie.objectfifo.acquire @obj3(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%7 = aie.objectfifo.subview.access %6[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_2 = memref.reinterpret_cast %7 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj4(Consume, 1)
%8 = aie.objectfifo.acquire @obj4(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%9 = aie.objectfifo.subview.access %8[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_3 = memref.reinterpret_cast %9 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_3[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_2[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj4(Consume, 1)
aie.objectfifo.release @obj3(Consume, 1)
aie.objectfifo.release @obj7(Produce, 1)
}
aie.end
}
%core_0_3 = aie.core(%tile_0_3) {
%c8 = arith.constant 8 : index
%c7 = arith.constant 7 : index
%c1 = arith.constant 1 : index
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
scf.for %arg0 = %c0 to %c4 step %c1 {
%0 = aie.objectfifo.acquire @obj8(Produce, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast = memref.reinterpret_cast %1 to offset: [0], sizes: [1, 1, 8, 8, 4, 4], strides: [1024, 1024, 128, 16, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x8x4x4xi32, 1>
%2 = aie.objectfifo.acquire @obj2(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_0 = memref.reinterpret_cast %3 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
%4 = aie.objectfifo.acquire @obj5(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%5 = aie.objectfifo.subview.access %4[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_1 = memref.reinterpret_cast %5 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c8 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c4 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
memref.store %c0_i32, %reinterpret_cast[%arg1, %arg2, %arg3, %arg4, %arg5, %arg6] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_1[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_0[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
scf.for %arg1 = %c1 to %c7 step %c1 {
aie.objectfifo.release @obj2(Consume, 1)
%10 = aie.objectfifo.acquire @obj2(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%11 = aie.objectfifo.subview.access %10[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_4 = memref.reinterpret_cast %11 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj5(Consume, 1)
%12 = aie.objectfifo.acquire @obj5(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%13 = aie.objectfifo.subview.access %12[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_5 = memref.reinterpret_cast %13 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c1 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c8 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c4 step %c1 {
scf.for %arg10 = %c0 to %c8 step %c1 {
%14 = memref.load %reinterpret_cast_5[%arg2, %arg4, %arg7, %arg5, %arg8, %arg10] : memref<1x1x4x8x4x8xi32, 1>
%15 = memref.load %reinterpret_cast_4[%arg4, %arg3, %arg6, %arg7, %arg10, %arg9] : memref<1x1x8x4x8x4xi32, 1>
%16 = memref.load %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
%17 = arith.muli %14, %15 : i32
%18 = arith.addi %16, %17 : i32
memref.store %18, %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj2(Consume, 1)
%6 = aie.objectfifo.acquire @obj2(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%7 = aie.objectfifo.subview.access %6[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_2 = memref.reinterpret_cast %7 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj5(Consume, 1)
%8 = aie.objectfifo.acquire @obj5(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%9 = aie.objectfifo.subview.access %8[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_3 = memref.reinterpret_cast %9 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_3[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_2[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj5(Consume, 1)
aie.objectfifo.release @obj2(Consume, 1)
aie.objectfifo.release @obj8(Produce, 1)
}
aie.end
}
%core_1_3 = aie.core(%tile_1_3) {
%c8 = arith.constant 8 : index
%c7 = arith.constant 7 : index
%c1 = arith.constant 1 : index
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
scf.for %arg0 = %c0 to %c4 step %c1 {
%0 = aie.objectfifo.acquire @obj9(Produce, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%1 = aie.objectfifo.subview.access %0[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast = memref.reinterpret_cast %1 to offset: [0], sizes: [1, 1, 8, 8, 4, 4], strides: [1024, 1024, 128, 16, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x8x4x4xi32, 1>
%2 = aie.objectfifo.acquire @obj3(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%3 = aie.objectfifo.subview.access %2[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_0 = memref.reinterpret_cast %3 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
%4 = aie.objectfifo.acquire @obj5(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%5 = aie.objectfifo.subview.access %4[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_1 = memref.reinterpret_cast %5 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c8 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c4 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
memref.store %c0_i32, %reinterpret_cast[%arg1, %arg2, %arg3, %arg4, %arg5, %arg6] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_1[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_0[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
scf.for %arg1 = %c1 to %c7 step %c1 {
aie.objectfifo.release @obj3(Consume, 1)
%10 = aie.objectfifo.acquire @obj3(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%11 = aie.objectfifo.subview.access %10[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_4 = memref.reinterpret_cast %11 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj5(Consume, 1)
%12 = aie.objectfifo.acquire @obj5(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%13 = aie.objectfifo.subview.access %12[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_5 = memref.reinterpret_cast %13 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c1 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c8 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c4 step %c1 {
scf.for %arg10 = %c0 to %c8 step %c1 {
%14 = memref.load %reinterpret_cast_5[%arg2, %arg4, %arg7, %arg5, %arg8, %arg10] : memref<1x1x4x8x4x8xi32, 1>
%15 = memref.load %reinterpret_cast_4[%arg4, %arg3, %arg6, %arg7, %arg10, %arg9] : memref<1x1x8x4x8x4xi32, 1>
%16 = memref.load %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
%17 = arith.muli %14, %15 : i32
%18 = arith.addi %16, %17 : i32
memref.store %18, %reinterpret_cast[%arg2, %arg3, %arg6, %arg5, %arg8, %arg9] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj3(Consume, 1)
%6 = aie.objectfifo.acquire @obj3(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%7 = aie.objectfifo.subview.access %6[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_2 = memref.reinterpret_cast %7 to offset: [0], sizes: [1, 1, 8, 4, 8, 4], strides: [1024, 1024, 128, 32, 4, 1] : memref<1024xi32, 1> to memref<1x1x8x4x8x4xi32, 1>
aie.objectfifo.release @obj5(Consume, 1)
%8 = aie.objectfifo.acquire @obj5(Consume, 1) : !aie.objectfifosubview<memref<1024xi32, 1>>
%9 = aie.objectfifo.subview.access %8[0] : !aie.objectfifosubview<memref<1024xi32, 1>> -> memref<1024xi32, 1>
%reinterpret_cast_3 = memref.reinterpret_cast %9 to offset: [0], sizes: [1, 1, 4, 8, 4, 8], strides: [1024, 1024, 256, 32, 8, 1] : memref<1024xi32, 1> to memref<1x1x4x8x4x8xi32, 1>
scf.for %arg1 = %c0 to %c1 step %c1 {
scf.for %arg2 = %c0 to %c1 step %c1 {
scf.for %arg3 = %c0 to %c1 step %c1 {
scf.for %arg4 = %c0 to %c8 step %c1 {
scf.for %arg5 = %c0 to %c8 step %c1 {
scf.for %arg6 = %c0 to %c4 step %c1 {
scf.for %arg7 = %c0 to %c4 step %c1 {
scf.for %arg8 = %c0 to %c4 step %c1 {
scf.for %arg9 = %c0 to %c8 step %c1 {
%10 = memref.load %reinterpret_cast_3[%arg1, %arg3, %arg6, %arg4, %arg7, %arg9] : memref<1x1x4x8x4x8xi32, 1>
%11 = memref.load %reinterpret_cast_2[%arg3, %arg2, %arg5, %arg6, %arg9, %arg8] : memref<1x1x8x4x8x4xi32, 1>
%12 = memref.load %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
%13 = arith.muli %10, %11 : i32
%14 = arith.addi %12, %13 : i32
memref.store %14, %reinterpret_cast[%arg1, %arg2, %arg5, %arg4, %arg7, %arg8] : memref<1x1x8x8x4x4xi32, 1>
}
}
}
}
}
}
}
}
}
aie.objectfifo.release @obj5(Consume, 1)
aie.objectfifo.release @obj3(Consume, 1)
aie.objectfifo.release @obj9(Produce, 1)
}
aie.end
}
func.func @sequence(%arg0: memref<128x256xi32>, %arg1: memref<256x128xi32>, %arg2: memref<128x128xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 0][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 0, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 32][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 32, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 64][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 64, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 96][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 96, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 128][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 128, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 160][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 160, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 192][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 192, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 224][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 224, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg2[1, 1, 0, 0][1, 1, 64, 64][1, 1, 128]) {id = 0 : i64, issue_token = true, metadata = @obj10} : memref<128x128xi32>
aiex.npu.dma_wait {symbol = @obj10}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 0][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 0, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 32][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 32, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 64][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 64, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 96][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 96, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 128][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 128, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 160][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 160, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 192][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 192, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 224][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 224, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg2[1, 1, 0, 64][1, 1, 64, 64][1, 1, 128]) {id = 0 : i64, issue_token = true, metadata = @obj10} : memref<128x128xi32>
aiex.npu.dma_wait {symbol = @obj10}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 0][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 0, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 32][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 32, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 64][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 64, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 96][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 96, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 128][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 128, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 160][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 160, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 192][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 192, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 224][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 224, 0][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg2[1, 1, 64, 0][1, 1, 64, 64][1, 1, 128]) {id = 0 : i64, issue_token = true, metadata = @obj10} : memref<128x128xi32>
aiex.npu.dma_wait {symbol = @obj10}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 0][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 0, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 32][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 32, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 64][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 64, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 96][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 96, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 128][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 128, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 160][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 160, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 192][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 192, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 64, 224][1, 2, 32, 32][1, 8192, 256]) {id = 0 : i64, issue_token = true, metadata = @obj0} : memref<128x256xi32>
aiex.npu.dma_wait {symbol = @obj0}
aiex.npu.dma_memcpy_nd(0, 0, %arg1[1, 0, 224, 64][1, 2, 32, 32][1, 32, 128]) {id = 0 : i64, issue_token = true, metadata = @obj1} : memref<256x128xi32>
aiex.npu.dma_wait {symbol = @obj1}
aiex.npu.dma_memcpy_nd(0, 0, %arg2[1, 1, 64, 64][1, 1, 64, 64][1, 1, 128]) {id = 0 : i64, issue_token = true, metadata = @obj10} : memref<128x128xi32>
aiex.npu.dma_wait {symbol = @obj10}
return
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment