-
-
Save ivanradanov/ac0db6d25e613469b3218f9271922b17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// RUN: polygeist-opt --canonicalize --allow-unregistered-dialect --split-input-file %s | FileCheck %s | |
#set0 = affine_set<(d0) : (-d0 == 0)> | |
#set1 = affine_set<(d0) : (d0 == 0)> | |
module { | |
func.func @bpnn_train_cuda() { | |
affine.parallel (%arg7) = (0) to (16) { | |
"test.pre"() : () -> () | |
affine.if #set0(%arg7) { | |
%a = "test.create"() : () -> i32 | |
"test.use"(%a) : (i32) -> () | |
} | |
} | |
return | |
} | |
func.func @bpnn_train_cuda1() { | |
affine.parallel (%arg7) = (0) to (16) { | |
"test.pre"() : () -> () | |
affine.if #set1(%arg7) { | |
%a = "test.create"() : () -> i32 | |
"test.use"(%a) : (i32) -> () | |
} | |
} | |
return | |
} | |
func.func @bpnn_train_cuda2() { | |
affine.parallel (%arg7) = (0) to (16) { | |
%a = "test.create"() : () -> i32 | |
affine.if #set1(%arg7) { | |
"test.use"(%a) : (i32) -> () | |
} | |
} | |
return | |
} | |
} | |
// CHECK: func.func @bpnn_train_cuda() { | |
// CHECK-NEXT: affine.parallel (%[[arg0:.+]]) = (0) to (16) { | |
// CHECK-NEXT: "test.pre"() : () -> () | |
// CHECK-NEXT: } | |
// CHECK-NEXT: %[[V0:.+]] = "test.create"() : () -> i32 | |
// CHECK-NEXT: "test.use"(%[[V0]]) : (i32) -> () | |
// CHECK-NEXT: return | |
// CHECK-NEXT: } | |
// CHECK: func.func @bpnn_train_cuda1() { | |
// CHECK-NEXT: affine.parallel (%[[arg0:.+]]) = (0) to (16) { | |
// CHECK-NEXT: "test.pre"() : () -> () | |
// CHECK-NEXT: } | |
// CHECK-NEXT: %[[V0:.+]] = "test.create"() : () -> i32 | |
// CHECK-NEXT: "test.use"(%[[V0]]) : (i32) -> () | |
// CHECK-NEXT: return | |
// CHECK-NEXT: } | |
// CHECK: func.func @bpnn_train_cuda2() { | |
// CHECK-NEXT: affine.parallel (%[[arg0:.+]]) = (0) to (16) { | |
// CHECK-NEXT: %[[V0:.+]] = "test.create"() : () -> i32 | |
// CHECK-NEXT: affine.if #set(%[[arg0]]) { | |
// CHECK-NEXT: "test.use"(%[[V0]]) : (i32) -> () | |
// CHECK-NEXT: } | |
// CHECK-NEXT: } | |
// CHECK-NEXT: return | |
// CHECK-NEXT: } | |
#set = affine_set<(d0, d1) : (d0 == 0, d1 == 0)> | |
module { | |
func.func @_Z9test_caseIiEiPili(%30: memref<1024xi32>, %14: memref<i32>, %0: i32, %arg0: memref<?xi32>, %arg1: i64, %arg2: i32) { | |
%c0 = arith.constant 0 : index | |
%c1 = arith.constant 1 : index | |
%c256 = arith.constant 256 : index | |
%c256_i64 = arith.constant 256 : i64 | |
%c4096_i64 = arith.constant 4096 : i64 | |
%c4_i64 = arith.constant 4 : i64 | |
%c1024_i32 = arith.constant 1024 : i32 | |
%c2_i32 = arith.constant 2 : i32 | |
%c0_i32 = arith.constant 0 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%c0_i64 = arith.constant 0 : i64 | |
%alloca = memref.alloca() : memref<1xi32> | |
affine.parallel (%arg3) = (0) to (256) { | |
affine.parallel (%arg4) = (0) to (256) { | |
%alloca_1 = memref.alloca() : memref<i32> | |
affine.store %0, %alloca_1[] : memref<i32> | |
affine.store %c0_i32, %alloca_1[] : memref<i32> | |
affine.for %arg5 = 0 to 4 { | |
affine.for %arg6 = 0 to 1024 step 4 { | |
%31 = affine.load %30[%arg6 + %arg5] : memref<1024xi32> | |
%32 = affine.load %alloca_1[] : memref<i32> | |
%33 = arith.addi %32, %31 : i32 | |
affine.store %33, %alloca_1[] : memref<i32> | |
} | |
} | |
affine.if #set(%arg4, %arg3) { | |
%31 = affine.load %alloca_1[] : memref<i32> | |
affine.store %31, %14[] : memref<i32> | |
} | |
} | |
} | |
return | |
} | |
} | |
// CHECK: TODO: fix bug in AffineIfSinking | |
// Output is below, see the memref duplication | |
// module { | |
// func.func @_Z9test_caseIiEiPili(%arg0: memref<1024xi32>, %arg1: memref<i32>, %arg2: i32, %arg3: memref<?xi32>, %arg4: i64, %arg5: i32) { | |
// %c0_i32 = arith.constant 0 : i32 | |
// affine.parallel (%arg6, %arg7) = (0, 0) to (256, 256) { | |
// %alloca_0 = memref.alloca() : memref<i32> | |
// affine.store %arg2, %alloca_0[] : memref<i32> | |
// affine.store %c0_i32, %alloca_0[] : memref<i32> | |
// affine.for %arg8 = 0 to 4 { | |
// affine.for %arg9 = 0 to 1024 step 4 { | |
// %1 = affine.load %arg0[%arg9 + %arg8] : memref<1024xi32> | |
// %2 = affine.load %alloca_0[] : memref<i32> | |
// %3 = arith.addi %2, %1 : i32 | |
// affine.store %3, %alloca_0[] : memref<i32> | |
// } | |
// } | |
// } | |
// %alloca = memref.alloca() : memref<i32> | |
// %0 = affine.load %alloca[] : memref<i32> | |
// affine.store %0, %arg1[] : memref<i32> | |
// return | |
// } | |
// } | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// RUN: polygeist-opt --mem2reg --split-input-file %s | FileCheck %s | |
// CHECK: TODO: fix crash | |
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<i64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f80, dense<128> : vector<2xi32>>, #dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", polygeist.gpu_module.llvm.data_layout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64", polygeist.gpu_module.llvm.target_triple = "nvptx64-nvidia-cuda", "polygeist.target-cpu" = "x86-64", "polygeist.target-features" = "+cx8,+fxsr,+mmx,+sse,+sse2,+x87", "polygeist.tune-cpu" = "generic"} { | |
func.func @_Z3fooR7__half2(%arg0: memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>, %arg1: memref<?x2xi16>) attributes {llvm.linkage = #llvm.linkage<external>} { | |
%alloca = memref.alloca() : memref<1x2xi16> | |
%cast = memref.cast %alloca : memref<1x2xi16> to memref<?x2xi16> | |
call @_ZNK7__half2cv11__half2_rawEv(%arg0, %cast) : (memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>, memref<?x2xi16>) -> () | |
%0 = affine.load %alloca[0, 0] : memref<1x2xi16> | |
%1 = affine.load %alloca[0, 1] : memref<1x2xi16> | |
affine.store %0, %arg1[0, 0] : memref<?x2xi16> | |
affine.store %1, %arg1[0, 1] : memref<?x2xi16> | |
return | |
} | |
func.func @_ZNK7__half2cv11__half2_rawEv(%arg0: memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>, %arg1: memref<?x2xi16>) attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} { | |
%c0_i16 = arith.constant 0 : i16 | |
%alloca = memref.alloca() : memref<1x2xi16> | |
%alloca_0 = memref.alloca() : memref<1x2xi16> | |
affine.store %c0_i16, %alloca_0[0, 0] : memref<1x2xi16> | |
affine.store %c0_i16, %alloca_0[0, 1] : memref<1x2xi16> | |
%0 = "polygeist.memref2pointer"(%arg0) : (memref<?x!llvm.struct<(struct<(i16)>, struct<(i16)>)>>) -> !llvm.ptr<i32> | |
%1 = llvm.load %0 : !llvm.ptr<i32> | |
%2 = "polygeist.memref2pointer"(%alloca_0) : (memref<1x2xi16>) -> !llvm.ptr<i32> | |
llvm.store %1, %2 : !llvm.ptr<i32> | |
%3 = affine.load %alloca_0[0, 0] : memref<1x2xi16> | |
affine.store %3, %alloca[0, 0] : memref<1x2xi16> | |
%4 = affine.load %alloca_0[0, 1] : memref<1x2xi16> | |
affine.store %4, %alloca[0, 1] : memref<1x2xi16> | |
%5 = affine.load %alloca[0, 0] : memref<1x2xi16> | |
affine.store %5, %arg1[0, 0] : memref<?x2xi16> | |
%6 = affine.load %alloca[0, 1] : memref<1x2xi16> | |
affine.store %6, %arg1[0, 1] : memref<?x2xi16> | |
return | |
} | |
func.func @main(%arg0: i32, %arg1: memref<?xmemref<?xi8>>) -> i32 attributes {llvm.linkage = #llvm.linkage<external>} { | |
%c0_i32 = arith.constant 0 : i32 | |
return %c0_i32 : i32 | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment