(mlir-py39cuda12) nyck33@lenovo-gtx1650:/mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore$ mlir-opt wmma-matmul-f16.mlir | mlir-opt -test-lower-to-nvvm="cubin-chip=sm_75 cubin-format=cubin" | mlir-cpu-runner --shared-libs=$MLIR_CUDA_RUNTIME --shared-libs=$MLIR_RUNNER_UTILS --entry-point-result=void
<stdin>:1:1: error: Invalid format specified.
module {
^
<stdin>:1:1: note: see current operation:
"builtin.module"() ({
"llvm.func"() <{CConv = #llvm.cconv<ccc>, function_type = !llvm.func<ptr (i64)>, linkage = #llvm.linkage<external>, sym_name = "malloc", visibility_ = 0 : i64}> ({
}) : () -> ()
"llvm.func"() <{CConv = #llvm.cconv<ccc>, function_type = !llvm.func<void ()>, linkage = #llvm.linkage<external>, sym_name = "main", visibility_ = 0 : i64}> ({
%0 = "llvm.mlir.constant"() <{value = 0.000000e+00 : f16}> : () -> f16
%1 = "llvm.mlir.constant"() <{value = 0 : index}> : () -> i64
%2 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%3 = "llvm.mlir.constant"() <{value = 32 : index}> : () -> i64
%4 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64
%5 = "builtin.unrealized_conversion_cast"(%4) : (i64) -> index
%6 = "builtin.unrealized_conversion_cast"(%3) : (i64) -> index
%7 = "builtin.unrealized_conversion_cast"(%1) : (i64) -> index
%8 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%9 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%10 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64
%11 = "llvm.mlir.constant"() <{value = 256 : index}> : () -> i64
%12 = "llvm.mlir.zero"() : () -> !llvm.ptr
%13 = "llvm.getelementptr"(%12, %11) <{elem_type = f16, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
%14 = "llvm.ptrtoint"(%13) : (!llvm.ptr) -> i64
%15 = "llvm.call"(%14) <{callee = @malloc, callee_type = !llvm.func<ptr (i64)>, fastmathFlags = #llvm.fastmath<none>}> : (i64) -> !llvm.ptr
%16 = "llvm.mlir.undef"() : () -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%17 = "llvm.insertvalue"(%16, %15) <{position = array<i64: 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%18 = "llvm.insertvalue"(%17, %15) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%19 = "llvm.mlir.constant"() <{value = 0 : index}> : () -> i64
%20 = "llvm.insertvalue"(%18, %19) <{position = array<i64: 2>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%21 = "llvm.insertvalue"(%20, %8) <{position = array<i64: 3, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%22 = "llvm.insertvalue"(%21, %9) <{position = array<i64: 3, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%23 = "llvm.insertvalue"(%22, %9) <{position = array<i64: 4, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%24 = "llvm.insertvalue"(%23, %10) <{position = array<i64: 4, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%25 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%26 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%27 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64
%28 = "llvm.mlir.constant"() <{value = 256 : index}> : () -> i64
%29 = "llvm.mlir.zero"() : () -> !llvm.ptr
%30 = "llvm.getelementptr"(%29, %28) <{elem_type = f16, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
%31 = "llvm.ptrtoint"(%30) : (!llvm.ptr) -> i64
%32 = "llvm.call"(%31) <{callee = @malloc, callee_type = !llvm.func<ptr (i64)>, fastmathFlags = #llvm.fastmath<none>}> : (i64) -> !llvm.ptr
%33 = "llvm.mlir.undef"() : () -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%34 = "llvm.insertvalue"(%33, %32) <{position = array<i64: 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%35 = "llvm.insertvalue"(%34, %32) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%36 = "llvm.mlir.constant"() <{value = 0 : index}> : () -> i64
%37 = "llvm.insertvalue"(%35, %36) <{position = array<i64: 2>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%38 = "llvm.insertvalue"(%37, %25) <{position = array<i64: 3, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%39 = "llvm.insertvalue"(%38, %26) <{position = array<i64: 3, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%40 = "llvm.insertvalue"(%39, %26) <{position = array<i64: 4, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%41 = "llvm.insertvalue"(%40, %27) <{position = array<i64: 4, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%42 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%43 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%44 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64
%45 = "llvm.mlir.constant"() <{value = 256 : index}> : () -> i64
%46 = "llvm.mlir.zero"() : () -> !llvm.ptr
%47 = "llvm.getelementptr"(%46, %45) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
%48 = "llvm.ptrtoint"(%47) : (!llvm.ptr) -> i64
%49 = "llvm.call"(%48) <{callee = @malloc, callee_type = !llvm.func<ptr (i64)>, fastmathFlags = #llvm.fastmath<none>}> : (i64) -> !llvm.ptr
%50 = "llvm.mlir.undef"() : () -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%51 = "llvm.insertvalue"(%50, %49) <{position = array<i64: 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%52 = "llvm.insertvalue"(%51, %49) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%53 = "llvm.mlir.constant"() <{value = 0 : index}> : () -> i64
%54 = "llvm.insertvalue"(%52, %53) <{position = array<i64: 2>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%55 = "llvm.insertvalue"(%54, %42) <{position = array<i64: 3, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%56 = "llvm.insertvalue"(%55, %43) <{position = array<i64: 3, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%57 = "llvm.insertvalue"(%56, %43) <{position = array<i64: 4, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
%58 = "llvm.insertvalue"(%57, %44) <{position = array<i64: 4, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
"llvm.br"(%1)[^bb1] : (i64) -> ()
^bb1(%59: i64): // 2 preds: ^bb0, ^bb4
%60 = "builtin.unrealized_conversion_cast"(%59) : (i64) -> index
%61 = "builtin.unrealized_conversion_cast"(%60) : (index) -> i64
%62 = "llvm.icmp"(%61, %2) <{predicate = 2 : i64}> : (i64, i64) -> i1
"llvm.cond_br"(%62, %1, %1)[^bb2, ^bb5] <{operandSegmentSizes = array<i32: 1, 1, 1>}> : (i1, i64, i64) -> ()
^bb2(%63: i64): // 2 preds: ^bb1, ^bb3
%64 = "builtin.unrealized_conversion_cast"(%63) : (i64) -> index
%65 = "builtin.unrealized_conversion_cast"(%64) : (index) -> i64
%66 = "llvm.icmp"(%65, %2) <{predicate = 2 : i64}> : (i64, i64) -> i1
"llvm.cond_br"(%66)[^bb3, ^bb4] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
^bb3: // pred: ^bb2
%67 = "llvm.trunc"(%65) : (i64) -> i16
%68 = "llvm.sitofp"(%67) : (i16) -> f16
%69 = "llvm.extractvalue"(%24) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%70 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%71 = "llvm.mul"(%59, %70) : (i64, i64) -> i64
%72 = "llvm.add"(%71, %63) : (i64, i64) -> i64
%73 = "llvm.getelementptr"(%69, %72) <{elem_type = f16, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
"llvm.store"(%68, %73) <{ordering = 0 : i64}> : (f16, !llvm.ptr) -> ()
%74 = "llvm.add"(%65, %4) : (i64, i64) -> i64
%75 = "builtin.unrealized_conversion_cast"(%74) : (i64) -> index
"llvm.br"(%74)[^bb2] : (i64) -> ()
^bb4: // pred: ^bb2
%76 = "llvm.add"(%61, %4) : (i64, i64) -> i64
%77 = "builtin.unrealized_conversion_cast"(%76) : (i64) -> index
"llvm.br"(%76)[^bb1] : (i64) -> ()
^bb5(%78: i64): // 2 preds: ^bb1, ^bb8
%79 = "builtin.unrealized_conversion_cast"(%78) : (i64) -> index
%80 = "builtin.unrealized_conversion_cast"(%79) : (index) -> i64
%81 = "llvm.icmp"(%80, %2) <{predicate = 2 : i64}> : (i64, i64) -> i1
"llvm.cond_br"(%81, %1)[^bb6, ^bb9] <{operandSegmentSizes = array<i32: 1, 1, 0>}> : (i1, i64) -> ()
^bb6(%82: i64): // 2 preds: ^bb5, ^bb7
%83 = "builtin.unrealized_conversion_cast"(%82) : (i64) -> index
%84 = "builtin.unrealized_conversion_cast"(%83) : (index) -> i64
%85 = "llvm.icmp"(%84, %2) <{predicate = 2 : i64}> : (i64, i64) -> i1
"llvm.cond_br"(%85)[^bb7, ^bb8] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
^bb7: // pred: ^bb6
%86 = "llvm.extractvalue"(%41) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%87 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%88 = "llvm.mul"(%78, %87) : (i64, i64) -> i64
%89 = "llvm.add"(%88, %82) : (i64, i64) -> i64
%90 = "llvm.getelementptr"(%86, %89) <{elem_type = f16, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
"llvm.store"(%0, %90) <{ordering = 0 : i64}> : (f16, !llvm.ptr) -> ()
%91 = "llvm.add"(%84, %4) : (i64, i64) -> i64
%92 = "builtin.unrealized_conversion_cast"(%91) : (i64) -> index
"llvm.br"(%91)[^bb6] : (i64) -> ()
^bb8: // pred: ^bb6
%93 = "llvm.add"(%80, %4) : (i64, i64) -> i64
%94 = "builtin.unrealized_conversion_cast"(%93) : (i64) -> index
"llvm.br"(%93)[^bb5] : (i64) -> ()
^bb9: // pred: ^bb5
%95 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64
%96 = "llvm.alloca"(%95) <{elem_type = !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>}> : (i64) -> !llvm.ptr
"llvm.store"(%24, %96) <{ordering = 0 : i64}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> ()
%97 = "llvm.mlir.constant"() <{value = 2 : index}> : () -> i64
%98 = "llvm.mlir.undef"() : () -> !llvm.struct<(i64, ptr)>
%99 = "llvm.insertvalue"(%98, %97) <{position = array<i64: 0>}> : (!llvm.struct<(i64, ptr)>, i64) -> !llvm.struct<(i64, ptr)>
%100 = "llvm.insertvalue"(%99, %96) <{position = array<i64: 1>}> : (!llvm.struct<(i64, ptr)>, !llvm.ptr) -> !llvm.struct<(i64, ptr)>
%101 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64
%102 = "llvm.alloca"(%101) <{elem_type = !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>}> : (i64) -> !llvm.ptr
"llvm.store"(%41, %102) <{ordering = 0 : i64}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> ()
%103 = "llvm.mlir.constant"() <{value = 2 : index}> : () -> i64
%104 = "llvm.mlir.undef"() : () -> !llvm.struct<(i64, ptr)>
%105 = "llvm.insertvalue"(%104, %103) <{position = array<i64: 0>}> : (!llvm.struct<(i64, ptr)>, i64) -> !llvm.struct<(i64, ptr)>
%106 = "llvm.insertvalue"(%105, %102) <{position = array<i64: 1>}> : (!llvm.struct<(i64, ptr)>, !llvm.ptr) -> !llvm.struct<(i64, ptr)>
%107 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64
%108 = "llvm.alloca"(%107) <{elem_type = !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>}> : (i64) -> !llvm.ptr
"llvm.store"(%58, %108) <{ordering = 0 : i64}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr) -> ()
%109 = "llvm.mlir.constant"() <{value = 2 : index}> : () -> i64
%110 = "llvm.mlir.undef"() : () -> !llvm.struct<(i64, ptr)>
%111 = "llvm.insertvalue"(%110, %109) <{position = array<i64: 0>}> : (!llvm.struct<(i64, ptr)>, i64) -> !llvm.struct<(i64, ptr)>
%112 = "llvm.insertvalue"(%111, %108) <{position = array<i64: 1>}> : (!llvm.struct<(i64, ptr)>, !llvm.ptr) -> !llvm.struct<(i64, ptr)>
%113 = "llvm.mlir.zero"() : () -> !llvm.ptr
%114 = "llvm.getelementptr"(%113) <{elem_type = f16, rawConstantIndices = array<i32: 1>}> : (!llvm.ptr) -> !llvm.ptr
%115 = "llvm.ptrtoint"(%114) : (!llvm.ptr) -> i64
%116 = "llvm.extractvalue"(%100) <{position = array<i64: 0>}> : (!llvm.struct<(i64, ptr)>) -> i64
%117 = "llvm.extractvalue"(%100) <{position = array<i64: 1>}> : (!llvm.struct<(i64, ptr)>) -> !llvm.ptr
"llvm.call"(%116, %117, %115) <{callee = @mgpuMemHostRegisterMemRef, callee_type = !llvm.func<void (i64, ptr, i64)>, fastmathFlags = #llvm.fastmath<none>}> : (i64, !llvm.ptr, i64) -> ()
%118 = "llvm.mlir.zero"() : () -> !llvm.ptr
%119 = "llvm.getelementptr"(%118) <{elem_type = f16, rawConstantIndices = array<i32: 1>}> : (!llvm.ptr) -> !llvm.ptr
%120 = "llvm.ptrtoint"(%119) : (!llvm.ptr) -> i64
%121 = "llvm.extractvalue"(%106) <{position = array<i64: 0>}> : (!llvm.struct<(i64, ptr)>) -> i64
%122 = "llvm.extractvalue"(%106) <{position = array<i64: 1>}> : (!llvm.struct<(i64, ptr)>) -> !llvm.ptr
"llvm.call"(%121, %122, %120) <{callee = @mgpuMemHostRegisterMemRef, callee_type = !llvm.func<void (i64, ptr, i64)>, fastmathFlags = #llvm.fastmath<none>}> : (i64, !llvm.ptr, i64) -> ()
%123 = "llvm.extractvalue"(%24) <{position = array<i64: 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%124 = "llvm.extractvalue"(%24) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%125 = "llvm.extractvalue"(%24) <{position = array<i64: 2>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%126 = "llvm.extractvalue"(%24) <{position = array<i64: 3, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%127 = "llvm.extractvalue"(%24) <{position = array<i64: 3, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%128 = "llvm.extractvalue"(%24) <{position = array<i64: 4, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%129 = "llvm.extractvalue"(%24) <{position = array<i64: 4, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%130 = "llvm.extractvalue"(%41) <{position = array<i64: 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%131 = "llvm.extractvalue"(%41) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%132 = "llvm.extractvalue"(%41) <{position = array<i64: 2>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%133 = "llvm.extractvalue"(%41) <{position = array<i64: 3, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%134 = "llvm.extractvalue"(%41) <{position = array<i64: 3, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%135 = "llvm.extractvalue"(%41) <{position = array<i64: 4, 0>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
%136 = "llvm.extractvalue"(%41) <{position = array<i64: 4, 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> i64
"gpu.launch_func"(%4, %4, %4, %3, %4, %4, %123, %124, %125, %126, %127, %128, %129, %1, %130, %131, %132, %133, %134, %135, %136) <{kernel = @main_kernel::@main_kernel, operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0, 15, 0>}> : (i64, i64, i64, i64, i64, i64, !llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64, i64, !llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64) -> ()
"llvm.br"(%1)[^bb10] : (i64) -> ()
^bb10(%137: i64): // 2 preds: ^bb9, ^bb13
%138 = "builtin.unrealized_conversion_cast"(%137) : (i64) -> index
%139 = "builtin.unrealized_conversion_cast"(%138) : (index) -> i64
%140 = "llvm.icmp"(%139, %2) <{predicate = 2 : i64}> : (i64, i64) -> i1
"llvm.cond_br"(%140, %1)[^bb11, ^bb14] <{operandSegmentSizes = array<i32: 1, 1, 0>}> : (i1, i64) -> ()
^bb11(%141: i64): // 2 preds: ^bb10, ^bb12
%142 = "builtin.unrealized_conversion_cast"(%141) : (i64) -> index
%143 = "builtin.unrealized_conversion_cast"(%142) : (index) -> i64
%144 = "llvm.icmp"(%143, %2) <{predicate = 2 : i64}> : (i64, i64) -> i1
"llvm.cond_br"(%144)[^bb12, ^bb13] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
^bb12: // pred: ^bb11
%145 = "llvm.extractvalue"(%24) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%146 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%147 = "llvm.mul"(%137, %146) : (i64, i64) -> i64
%148 = "llvm.add"(%147, %141) : (i64, i64) -> i64
%149 = "llvm.getelementptr"(%145, %148) <{elem_type = f16, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
%150 = "llvm.load"(%149) <{ordering = 0 : i64}> : (!llvm.ptr) -> f16
%151 = "llvm.fpext"(%150) : (f16) -> f32
%152 = "llvm.extractvalue"(%58) <{position = array<i64: 1>}> : (!llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>) -> !llvm.ptr
%153 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
%154 = "llvm.mul"(%137, %153) : (i64, i64) -> i64
%155 = "llvm.add"(%154, %141) : (i64, i64) -> i64
%156 = "llvm.getelementptr"(%152, %155) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
"llvm.store"(%151, %156) <{ordering = 0 : i64}> : (f32, !llvm.ptr) -> ()
%157 = "llvm.add"(%143, %4) : (i64, i64) -> i64
%158 = "builtin.unrealized_conversion_cast"(%157) : (i64) -> index
"llvm.br"(%157)[^bb11] : (i64) -> ()
^bb13: // pred: ^bb11
%159 = "llvm.add"(%139, %4) : (i64, i64) -> i64
%160 = "builtin.unrealized_conversion_cast"(%159) : (i64) -> index
"llvm.br"(%159)[^bb10] : (i64) -> ()
^bb14: // pred: ^bb10
%161 = "llvm.extractvalue"(%112) <{position = array<i64: 0>}> : (!llvm.struct<(i64, ptr)>) -> i64
%162 = "llvm.extractvalue"(%112) <{position = array<i64: 1>}> : (!llvm.struct<(i64, ptr)>) -> !llvm.ptr
"llvm.call"(%161, %162) <{callee = @printMemrefF32, fastmathFlags = #llvm.fastmath<none>}> : (i64, !llvm.ptr) -> ()
"llvm.return"() : () -> ()
}) : () -> ()
"gpu.module"() <{targets = [#nvvm.target<chip = "sm_75">]}> ({
"llvm.func"() <{CConv = #llvm.cconv<ccc>, function_type = !llvm.func<void (ptr, ptr, i64, i64, i64, i64, i64, i64, ptr, ptr, i64, i64, i64, i64, i64)>, linkage = #llvm.linkage<external>, sym_name = "main_kernel", visibility_ = 0 : i64}> ({
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64, %arg7: i64, %arg8: !llvm.ptr, %arg9: !llvm.ptr, %arg10: i64, %arg11: i64, %arg12: i64, %arg13: i64, %arg14: i64):
%0 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i32
%1 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64
"llvm.br"()[^bb1] : () -> ()
^bb1: // pred: ^bb0
%2 = "llvm.mul"(%arg7, %1) : (i64, i64) -> i64
%3 = "llvm.add"(%2, %arg7) : (i64, i64) -> i64
%4 = "llvm.getelementptr"(%arg1, %3) <{elem_type = f16, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
%5 = "nvvm.wmma.load"(%4, %0) <{eltype = #nvvm.mma_type<f16>, frag = #nvvm.mma_frag<a>, k = 16 : i32, layout = #nvvm.mma_layout<col>, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
%6 = "nvvm.wmma.load"(%4, %0) <{eltype = #nvvm.mma_type<f16>, frag = #nvvm.mma_frag<b>, k = 16 : i32, layout = #nvvm.mma_layout<row>, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
%7 = "llvm.getelementptr"(%arg9, %3) <{elem_type = f16, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr
%8 = "nvvm.wmma.load"(%7, %0) <{eltype = #nvvm.mma_type<f16>, frag = #nvvm.mma_frag<c>, k = 16 : i32, layout = #nvvm.mma_layout<row>, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
%9 = "llvm.extractvalue"(%5) <{position = array<i64: 0>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%10 = "llvm.extractvalue"(%5) <{position = array<i64: 1>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%11 = "llvm.extractvalue"(%5) <{position = array<i64: 2>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%12 = "llvm.extractvalue"(%5) <{position = array<i64: 3>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%13 = "llvm.extractvalue"(%5) <{position = array<i64: 4>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%14 = "llvm.extractvalue"(%5) <{position = array<i64: 5>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%15 = "llvm.extractvalue"(%5) <{position = array<i64: 6>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%16 = "llvm.extractvalue"(%5) <{position = array<i64: 7>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%17 = "llvm.extractvalue"(%6) <{position = array<i64: 0>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%18 = "llvm.extractvalue"(%6) <{position = array<i64: 1>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%19 = "llvm.extractvalue"(%6) <{position = array<i64: 2>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%20 = "llvm.extractvalue"(%6) <{position = array<i64: 3>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%21 = "llvm.extractvalue"(%6) <{position = array<i64: 4>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%22 = "llvm.extractvalue"(%6) <{position = array<i64: 5>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%23 = "llvm.extractvalue"(%6) <{position = array<i64: 6>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%24 = "llvm.extractvalue"(%6) <{position = array<i64: 7>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%25 = "llvm.extractvalue"(%8) <{position = array<i64: 0>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%26 = "llvm.extractvalue"(%8) <{position = array<i64: 1>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%27 = "llvm.extractvalue"(%8) <{position = array<i64: 2>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%28 = "llvm.extractvalue"(%8) <{position = array<i64: 3>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%29 = "nvvm.wmma.mma"(%9, %10, %11, %12, %13, %14, %15, %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28) <{eltypeA = #nvvm.mma_type<f16>, eltypeB = #nvvm.mma_type<f16>, k = 16 : i32, layoutA = #nvvm.mma_layout<col>, layoutB = #nvvm.mma_layout<row>, m = 16 : i32, n = 16 : i32}> : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
%30 = "llvm.extractvalue"(%29) <{position = array<i64: 0>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%31 = "llvm.extractvalue"(%29) <{position = array<i64: 1>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%32 = "llvm.extractvalue"(%29) <{position = array<i64: 2>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
%33 = "llvm.extractvalue"(%29) <{position = array<i64: 3>}> : (!llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) -> vector<2xf16>
"nvvm.wmma.store"(%4, %30, %31, %32, %33, %0) <{eltype = #nvvm.mma_type<f16>, k = 16 : i32, layout = #nvvm.mma_layout<row>, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, i32) -> ()
"llvm.return"() : () -> ()
}) {gpu.kernel, gpu.known_block_size = array<i32: 32, 1, 1>, gpu.known_grid_size = array<i32: 1, 1, 1>, nvvm.kernel} : () -> ()
"gpu.module_end"() : () -> ()
}) {sym_name = "main_kernel"} : () -> ()
"llvm.func"() <{CConv = #llvm.cconv<ccc>, function_type = !llvm.func<void (i64, ptr)>, linkage = #llvm.linkage<external>, sym_name = "printMemrefF32", visibility_ = 0 : i64}> ({
}) {sym_visibility = "private"} : () -> ()
"llvm.func"() <{CConv = #llvm.cconv<ccc>, function_type = !llvm.func<void (i64, ptr, i64)>, linkage = #llvm.linkage<external>, sym_name = "mgpuMemHostRegisterMemRef", visibility_ = 0 : i64}> ({
}) : () -> ()
}) {gpu.container_module} : () -> ()
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0. Program arguments: mlir-opt "-test-lower-to-nvvm=cubin-chip=sm_75 cubin-format=cubin"
^C
(mlir-py39cuda12) nyck33@lenovo-gtx1650:/mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore$ mlir-opt wmma-matmul-f16.mlir | mlir-opt -test-lower-to-nvvm="cubin-chip=sm_75 cubin-format=fatbin" | mlir-cpu-runner --shared-libs=$MLIR_CUDA_RUNTIME --shared-libs=$MLIR_RUNNER_UTILS --entry-po
int-result=void
Failed to create MemoryBuffer for: /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore/
Error: Is a directory
Failed to create MemoryBuffer for: /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore/
Error: Is a directory
JIT session error: Symbols not found: [ printMemrefF32, mgpuLaunchKernel, mgpuMemHostRegisterMemRef, mgpuModuleGetFunction, mgpuModuleLoad, mgpuModuleUnload, mgpuStreamCreate, mgpuStreamDestroy, mgpuStreamSynchronize ]
Error: Failed to materialize symbols: { (main, { _mlir_main, main }) }
(mlir-py39cuda12) nyck33@lenovo-gtx1650:/mnt/d/LLVM/ NewPolygeistDir/llvm-project/mlir/test/INtegration/G
(mlir-py39cuda12) nyck33@lenovo-gtx1650:/mnt/d/LLVM/NewPolygeist
(mlir-py39cuda12) nyck33@lenovo-gtx1650:/mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore$ m
lir-opt wmma-matmul-f16.mlir | mlir-opt -test-lower-to-nvvm="cubin-chip=sm_75 cubin-format=fatbin" | mlir-cpu-runner --shared-libs=$MLIR_CUDA_RUNTIME --shared-libs=$MLIR_RUNNER_UTILS --entry-point-result=void -shared-libs=/mnt/d/LLVM/Polygeist/llvm-project/build/lib/libmlir_c_runner_utils.so
Failed to create MemoryBuffer for: /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore/
Error: Is a directory
Failed to create MemoryBuffer for: /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore/
Error: Is a directory
Failed to create MemoryBuffer for: /mnt/d/LLVM/Polygeist/llvm-project/build/lib/libmlir_c_runner_utils.so
Error: No such file or directory
JIT session error: Symbols not found: [ printMemrefF32, mgpuLaunchKernel, mgpuMemHostRegisterMemRef, mgpuModuleGetFunction, mgpuModuleLoad, mgpuModuleUnload, mgpuStreamCreate, mgpuStreamDestroy, mgpuStreamSynchronize ]
Error: Failed to materialize symbols: { (main, { _mlir_main, main }) }
(mlir-py39cuda12) nyck33@lenovo-gtx1650:/mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/test/INtegration/GPU/Cuda/TensorCore$
Created
April 5, 2024 08:38
-
-
Save nyck33/d8e44f19970cae728c407b45dfa6deac to your computer and use it in GitHub Desktop.
llvm issue for mlir
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment