Skip to content

Instantly share code, notes, and snippets.

@ivanradanov
Created September 6, 2023 04:24
Show Gist options
  • Save ivanradanov/ac0db6d25e613469b3218f9271922b17 to your computer and use it in GitHub Desktop.
Save ivanradanov/ac0db6d25e613469b3218f9271922b17 to your computer and use it in GitHub Desktop.
// RUN: polygeist-opt --canonicalize --allow-unregistered-dialect --split-input-file %s | FileCheck %s
#set0 = affine_set<(d0) : (-d0 == 0)>
#set1 = affine_set<(d0) : (d0 == 0)>
module {
func.func @bpnn_train_cuda() {
affine.parallel (%arg7) = (0) to (16) {
"test.pre"() : () -> ()
affine.if #set0(%arg7) {
%a = "test.create"() : () -> i32
"test.use"(%a) : (i32) -> ()
}
}
return
}
func.func @bpnn_train_cuda1() {
affine.parallel (%arg7) = (0) to (16) {
"test.pre"() : () -> ()
affine.if #set1(%arg7) {
%a = "test.create"() : () -> i32
"test.use"(%a) : (i32) -> ()
}
}
return
}
func.func @bpnn_train_cuda2() {
affine.parallel (%arg7) = (0) to (16) {
%a = "test.create"() : () -> i32
affine.if #set1(%arg7) {
"test.use"(%a) : (i32) -> ()
}
}
return
}
}
// CHECK: func.func @bpnn_train_cuda() {
// CHECK-NEXT: affine.parallel (%[[arg0:.+]]) = (0) to (16) {
// CHECK-NEXT: "test.pre"() : () -> ()
// CHECK-NEXT: }
// CHECK-NEXT: %[[V0:.+]] = "test.create"() : () -> i32
// CHECK-NEXT: "test.use"(%[[V0]]) : (i32) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
// CHECK: func.func @bpnn_train_cuda1() {
// CHECK-NEXT: affine.parallel (%[[arg0:.+]]) = (0) to (16) {
// CHECK-NEXT: "test.pre"() : () -> ()
// CHECK-NEXT: }
// CHECK-NEXT: %[[V0:.+]] = "test.create"() : () -> i32
// CHECK-NEXT: "test.use"(%[[V0]]) : (i32) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
// CHECK: func.func @bpnn_train_cuda2() {
// CHECK-NEXT: affine.parallel (%[[arg0:.+]]) = (0) to (16) {
// CHECK-NEXT: %[[V0:.+]] = "test.create"() : () -> i32
// CHECK-NEXT: affine.if #set(%[[arg0]]) {
// CHECK-NEXT: "test.use"(%[[V0]]) : (i32) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
#set = affine_set<(d0, d1) : (d0 == 0, d1 == 0)>
module {
func.func @_Z9test_caseIiEiPili(%30: memref<1024xi32>, %14: memref<i32>, %0: i32, %arg0: memref<?xi32>, %arg1: i64, %arg2: i32) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c256 = arith.constant 256 : index
%c256_i64 = arith.constant 256 : i64
%c4096_i64 = arith.constant 4096 : i64
%c4_i64 = arith.constant 4 : i64
%c1024_i32 = arith.constant 1024 : i32
%c2_i32 = arith.constant 2 : i32
%c0_i32 = arith.constant 0 : i32
%c1_i32 = arith.constant 1 : i32
%c0_i64 = arith.constant 0 : i64
%alloca = memref.alloca() : memref<1xi32>
affine.parallel (%arg3) = (0) to (256) {
affine.parallel (%arg4) = (0) to (256) {
%alloca_1 = memref.alloca() : memref<i32>
affine.store %0, %alloca_1[] : memref<i32>
affine.store %c0_i32, %alloca_1[] : memref<i32>
affine.for %arg5 = 0 to 4 {
affine.for %arg6 = 0 to 1024 step 4 {
%31 = affine.load %30[%arg6 + %arg5] : memref<1024xi32>
%32 = affine.load %alloca_1[] : memref<i32>
%33 = arith.addi %32, %31 : i32
affine.store %33, %alloca_1[] : memref<i32>
}
}
affine.if #set(%arg4, %arg3) {
%31 = affine.load %alloca_1[] : memref<i32>
affine.store %31, %14[] : memref<i32>
}
}
}
return
}
}
// CHECK: TODO: fix bug in AffineIfSinking
// Output is below, see the memref duplication
// module {
// func.func @_Z9test_caseIiEiPili(%arg0: memref<1024xi32>, %arg1: memref<i32>, %arg2: i32, %arg3: memref<?xi32>, %arg4: i64, %arg5: i32) {
// %c0_i32 = arith.constant 0 : i32
// affine.parallel (%arg6, %arg7) = (0, 0) to (256, 256) {
// %alloca_0 = memref.alloca() : memref<i32>
// affine.store %arg2, %alloca_0[] : memref<i32>
// affine.store %c0_i32, %alloca_0[] : memref<i32>
// affine.for %arg8 = 0 to 4 {
// affine.for %arg9 = 0 to 1024 step 4 {
// %1 = affine.load %arg0[%arg9 + %arg8] : memref<1024xi32>
// %2 = affine.load %alloca_0[] : memref<i32>
// %3 = arith.addi %2, %1 : i32
// affine.store %3, %alloca_0[] : memref<i32>
// }
// }
// }
// %alloca = memref.alloca() : memref<i32>
// %0 = affine.load %alloca[] : memref<i32>
// affine.store %0, %arg1[] : memref<i32>
// return
// }
// }
// RUN: polygeist-opt --mem2reg --split-input-file %s | FileCheck %s
// CHECK: TODO: fix crash
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f80, dense<128> : vector<2xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", polygeist.gpu_module.llvm.data_layout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64", polygeist.gpu_module.llvm.target_triple = "nvptx64-nvidia-cuda", "polygeist.target-cpu" = "x86-64", "polygeist.target-features" = "+cx8,+fxsr,+mmx,+sse,+sse2,+x87", "polygeist.tune-cpu" = "generic"} {
func.func @_Z3fooR7__half2(%arg0: memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>, %arg1: memref<?x2xi16>) attributes {llvm.linkage = #llvm.linkage<external>} {
%alloca = memref.alloca() : memref<1x2xi16>
%cast = memref.cast %alloca : memref<1x2xi16> to memref<?x2xi16>
call @_ZNK7__half2cv11__half2_rawEv(%arg0, %cast) : (memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>, memref<?x2xi16>) -> ()
%0 = affine.load %alloca[0, 0] : memref<1x2xi16>
%1 = affine.load %alloca[0, 1] : memref<1x2xi16>
affine.store %0, %arg1[0, 0] : memref<?x2xi16>
affine.store %1, %arg1[0, 1] : memref<?x2xi16>
return
}
func.func @_ZNK7__half2cv11__half2_rawEv(%arg0: memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>, %arg1: memref<?x2xi16>) attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
%c0_i16 = arith.constant 0 : i16
%alloca = memref.alloca() : memref<1x2xi16>
%alloca_0 = memref.alloca() : memref<1x2xi16>
affine.store %c0_i16, %alloca_0[0, 0] : memref<1x2xi16>
affine.store %c0_i16, %alloca_0[0, 1] : memref<1x2xi16>
%0 = "polygeist.memref2pointer"(%arg0) : (memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>) -> !llvm.ptr<i32>
%1 = llvm.load %0 : !llvm.ptr<i32>
%2 = "polygeist.memref2pointer"(%alloca_0) : (memref<1x2xi16>) -> !llvm.ptr<i32>
llvm.store %1, %2 : !llvm.ptr<i32>
%3 = affine.load %alloca_0[0, 0] : memref<1x2xi16>
affine.store %3, %alloca[0, 0] : memref<1x2xi16>
%4 = affine.load %alloca_0[0, 1] : memref<1x2xi16>
affine.store %4, %alloca[0, 1] : memref<1x2xi16>
%5 = affine.load %alloca[0, 0] : memref<1x2xi16>
affine.store %5, %arg1[0, 0] : memref<?x2xi16>
%6 = affine.load %alloca[0, 1] : memref<1x2xi16>
affine.store %6, %arg1[0, 1] : memref<?x2xi16>
return
}
func.func @main(%arg0: i32, %arg1: memref<?xmemref<?xi8>>) -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
%c0_i32 = arith.constant 0 : i32
return %c0_i32 : i32
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment