-
-
Save SirYwell/d9ae4b5393de135ec15429c54d031820 to your computer and use it in GitHub Desktop.
TornadoVM output
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WARNING: Using incubator modules: jdk.incubator.vector | |
Computing MxM of 512x512 | |
[INFO] Loading Backend: uk.ac.manchester.tornado.drivers.opencl.OCLTornadoDriverProvider@36f0f1be | |
TornadoGraph dependency matrix... | |
+----+---------------+ | |
| 5 [data]| <none> | |
|----+---------------+ | |
| 6 [data]| <none> | |
|----+---------------+ | |
| 7 [data]| <none> | |
|----+---------------+ | |
| 8 [data]| <none> | |
|----+---------------+ | |
| 9 [data]| 10 | |
|----+---------------+ | |
| 10 [task]| 6 7 8 | |
|----+---------------+ | |
| 11 [data]| 10 | |
|----+---------------+ | |
| 12 [data]| 11 | |
|----+---------------+ | |
| 13 [data]| 11 | |
|----+---------------+ | |
| 14 [data]| 11 | |
|----+---------------+ | |
----------------------------------- | |
Device Table: | |
[0]: [NVIDIA CUDA] -- NVIDIA GeForce RTX 2070 SUPER | |
Constant Table: | |
[0]: 512 | |
Object Table: | |
[0]: 0x4cf4d528 MatrixFloat <512 x 512> | |
[1]: 0x73a8da0f MatrixFloat <512 x 512> | |
[2]: 0x7fbdb894 MatrixFloat <512 x 512> | |
Task Table: | |
[0]: task s0.t0 - matrixMultiplication | |
----------------------------------- | |
----------------------------------- | |
TaskGraph: | |
[0]: constant 0 | |
[1]: object 0 | |
[2]: object 1 | |
[3]: object 2 | |
[4]: context device=0, [ 5 6 7 8 10 11 12 13 14 ] | |
[5]: persist node | |
[6]: copy in object 0 | |
[7]: copy in object 1 | |
[8]: copy in object 2 | |
[9]: dependent write on object 2 by task 10 | |
[10]: task=0, args=[ 6 7 8 0 ] | |
[11]: copy out object 2 after task 10 | |
[12]: deallocate object 1 after 11 | |
[13]: deallocate object 2 after 11 | |
[14]: deallocate object 3 after 11 | |
----------------------------------- | |
#pragma OPENCL EXTENSION cl_khr_fp64 : enable | |
#pragma OPENCL EXTENSION cl_khr_fp16 : enable | |
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable | |
__kernel void matrixMultiplication(__global long *_kernel_context, __constant uchar *_constant_region, __local uchar *_local_region, __global int *_atomics, __global uchar *A, __global uchar *B, __global uchar *C, __private int size) | |
{ | |
ulong ul_18, ul_25, ul_24, ul_22, ul_17, ul_5, ul_35, ul_2, ul_34, ul_7, ul_39, ul_6, ul_1, ul_0, ul_30; | |
long l_20, l_21, l_37, l_38, l_28, l_29; | |
int i_33, i_3, i_4, i_36, i_9, i_41, i_8, i_40, i_11, i_10, i_13, i_12, i_14, i_16, i_19, i_27, i_26; | |
float f_32, f_15, f_31, f_23; | |
// BLOCK 0 | |
ul_0 = (ulong) A; | |
ul_1 = (ulong) B; | |
ul_2 = (ulong) C; | |
i_3 = get_global_size(0); | |
i_4 = get_global_size(1); | |
ul_5 = ul_2 + 32L; | |
ul_6 = ul_1 + 32L; | |
ul_7 = ul_0 + 32L; | |
i_8 = get_global_id(0); | |
i_9 = get_global_id(1); | |
// BLOCK 1 MERGES [0 8 ] | |
i_10 = i_9; | |
for(;i_10 < 512;) | |
{ | |
// BLOCK 2 | |
i_11 = i_10 << 9; | |
i_12 = i_11 + 6; | |
// BLOCK 3 MERGES [2 7 ] | |
i_13 = i_8; | |
for(;i_13 < 512;) | |
{ | |
// BLOCK 4 | |
i_14 = i_13 + 6; | |
// BLOCK 5 MERGES [4 6 ] | |
f_15 = 0.0F; | |
i_16 = 0; | |
for(;i_16 < 512;) | |
{ | |
// BLOCK 6 | |
ul_17 = *((__global ulong *) ul_7); | |
ul_18 = ul_0 + ul_17; | |
i_19 = i_12 + i_16; | |
l_20 = (long) i_19; | |
l_21 = l_20 << 2; | |
ul_22 = ul_18 + l_21; | |
f_23 = *((__global float *) ul_22); | |
ul_24 = *((__global ulong *) ul_6); | |
ul_25 = ul_1 + ul_24; | |
i_26 = i_16 << 9; | |
i_27 = i_26 + i_14; | |
l_28 = (long) i_27; | |
l_29 = l_28 << 2; | |
ul_30 = ul_25 + l_29; | |
f_31 = *((__global float *) ul_30); | |
f_32 = fma(f_23, f_31, f_15); | |
i_33 = i_16 + 1; | |
f_15 = f_32; | |
i_16 = i_33; | |
} // B6 | |
// BLOCK 7 | |
ul_34 = *((__global ulong *) ul_5); | |
ul_35 = ul_2 + ul_34; | |
i_36 = i_13 + i_12; | |
l_37 = (long) i_36; | |
l_38 = l_37 << 2; | |
ul_39 = ul_35 + l_38; | |
*((__global float *) ul_39) = f_15; | |
i_40 = i_3 + i_13; | |
i_13 = i_40; | |
} // B7 | |
// BLOCK 8 | |
i_41 = i_4 + i_10; | |
i_10 = i_41; | |
} // B8 | |
// BLOCK 9 | |
return; | |
} // kernel | |
[TornadoVM-OCL-JNI] ERROR : clBuildProgram -> Returned: -11 | |
#pragma OPENCL EXTENSION cl_khr_fp64 : enable | |
#pragma OPENCL EXTENSION cl_khr_fp16 : enable | |
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable | |
__kernel void matrixMultiplication(__global long *_kernel_context, __constant uchar *_constant_region, __local uchar *_local_region, __global int *_atomics, __global uchar *A, __global uchar *B, __global uchar *C, __private int size) | |
{ | |
ulong ul_18, ul_25, ul_24, ul_22, ul_17, ul_5, ul_35, ul_2, ul_34, ul_7, ul_39, ul_6, ul_1, ul_0, ul_30; | |
long l_20, l_21, l_37, l_38, l_28, l_29; | |
int i_33, i_3, i_4, i_36, i_9, i_41, i_8, i_40, i_11, i_10, i_13, i_12, i_14, i_16, i_19, i_27, i_26; | |
float f_32, f_15, f_31, f_23; | |
// BLOCK 0 | |
ul_0 = (ulong) A; | |
ul_1 = (ulong) B; | |
ul_2 = (ulong) C; | |
i_3 = get_global_size(0); | |
i_4 = get_global_size(1); | |
ul_5 = ul_2 + 32L; | |
ul_6 = ul_1 + 32L; | |
ul_7 = ul_0 + 32L; | |
i_8 = get_global_id(0); | |
i_9 = get_global_id(1); | |
// BLOCK 1 MERGES [0 8 ] | |
i_10 = i_9; | |
for(;i_10 < 512;) | |
{ | |
// BLOCK 2 | |
i_11 = i_10 << 9; | |
i_12 = i_11 + 6; | |
// BLOCK 3 MERGES [2 7 ] | |
i_13 = i_8; | |
for(;i_13 < 512;) | |
{ | |
// BLOCK 4 | |
i_14 = i_13 + 6; | |
// BLOCK 5 MERGES [4 6 ] | |
f_15 = 0.0F; | |
i_16 = 0; | |
for(;i_16 < 512;) | |
{ | |
// BLOCK 6 | |
ul_17 = *((__global ulong *) ul_7); | |
ul_18 = ul_0 + ul_17; | |
i_19 = i_12 + i_16; | |
l_20 = (long) i_19; | |
l_21 = l_20 << 2; | |
ul_22 = ul_18 + l_21; | |
f_23 = *((__global float *) ul_22); | |
ul_24 = *((__global ulong *) ul_6); | |
ul_25 = ul_1 + ul_24; | |
i_26 = i_16 << 9; | |
i_27 = i_26 + i_14; | |
l_28 = (long) i_27; | |
l_29 = l_28 << 2; | |
ul_30 = ul_25 + l_29; | |
f_31 = *((__global float *) ul_30); | |
f_32 = fma(f_23, f_31, f_15); | |
i_33 = i_16 + 1; | |
f_15 = f_32; | |
i_16 = i_33; | |
} // B6 | |
// BLOCK 7 | |
ul_34 = *((__global ulong *) ul_5); | |
ul_35 = ul_2 + ul_34; | |
i_36 = i_13 + i_12; | |
l_37 = (long) i_36; | |
l_38 = l_37 << 2; | |
ul_39 = ul_35 + l_38; | |
*((__global float *) ul_39) = f_15; | |
i_40 = i_3 + i_13; | |
i_13 = i_40; | |
} // B7 | |
// BLOCK 8 | |
i_41 = i_4 + i_10; | |
i_10 = i_41; | |
} // B8 | |
// BLOCK 9 | |
return; | |
} // kernel | |
[TornadoVM-OCL-JNI] ERROR : clBuildProgram -> Returned: -11 | |
uk.ac.manchester.tornado.api.exceptions.TornadoRuntimeException: [ERROR] Generated Kernel is NULL. | |
Please report this issue to https://github.com/beehive-lab/TornadoVM | |
at tornado.drivers.opencl@1.0.3/uk.ac.manchester.tornado.drivers.opencl.graal.OCLInstalledCode.checkKernelNotNull(OCLInstalledCode.java:346) | |
at tornado.drivers.opencl@1.0.3/uk.ac.manchester.tornado.drivers.opencl.graal.OCLInstalledCode.submitWithoutEvents(OCLInstalledCode.java:352) | |
at tornado.drivers.opencl@1.0.3/uk.ac.manchester.tornado.drivers.opencl.graal.OCLInstalledCode.launchWithoutDependencies(OCLInstalledCode.java:392) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.interpreter.TornadoVMInterpreter.executeLaunch(TornadoVMInterpreter.java:747) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.interpreter.TornadoVMInterpreter.execute(TornadoVMInterpreter.java:338) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.interpreter.TornadoVMInterpreter.execute(TornadoVMInterpreter.java:855) | |
at java.base/java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:1024) | |
at java.base/java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:762) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.TornadoVM.executeInterpreterSingleThreaded(TornadoVM.java:123) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.TornadoVM.execute(TornadoVM.java:110) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph.scheduleInner(TornadoTaskGraph.java:858) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph.execute(TornadoTaskGraph.java:1338) | |
at tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph.execute(TornadoTaskGraph.java:1350) | |
at tornado.api@1.0.3/uk.ac.manchester.tornado.api.TaskGraph.execute(TaskGraph.java:777) | |
at tornado.api@1.0.3/uk.ac.manchester.tornado.api.ImmutableTaskGraph.execute(ImmutableTaskGraph.java:49) | |
at tornado.api@1.0.3/uk.ac.manchester.tornado.api.TornadoExecutionPlan$TornadoExecutor.lambda$execute$0(TornadoExecutionPlan.java:400) | |
at java.base/java.util.ArrayList.forEach(ArrayList.java:1596) | |
at tornado.api@1.0.3/uk.ac.manchester.tornado.api.TornadoExecutionPlan$TornadoExecutor.execute(TornadoExecutionPlan.java:400) | |
at tornado.api@1.0.3/uk.ac.manchester.tornado.api.TornadoExecutionPlan.execute(TornadoExecutionPlan.java:116) | |
at example.MatrixMultiplication.main(MatrixMultiplication.java:95) | |
Bailout from LAUNCH Bytecode: | |
Reason: uk.ac.manchester.tornado.api.exceptions.TornadoRuntimeException: [ERROR] Generated Kernel is NULL. | |
Please report this issue to https://github.com/beehive-lab/TornadoVM | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.interpreter.TornadoVMInterpreter.executeLaunch(TornadoVMInterpreter.java:755) | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.interpreter.TornadoVMInterpreter.execute(TornadoVMInterpreter.java:338) | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.interpreter.TornadoVMInterpreter.execute(TornadoVMInterpreter.java:855) | |
java.base/java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:1024) | |
java.base/java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:762) | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.TornadoVM.executeInterpreterSingleThreaded(TornadoVM.java:123) | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.TornadoVM.execute(TornadoVM.java:110) | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph.scheduleInner(TornadoTaskGraph.java:858) | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph.execute(TornadoTaskGraph.java:1338) | |
tornado.runtime@1.0.3/uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph.execute(TornadoTaskGraph.java:1350) | |
tornado.api@1.0.3/uk.ac.manchester.tornado.api.TaskGraph.execute(TaskGraph.java:777) | |
tornado.api@1.0.3/uk.ac.manchester.tornado.api.ImmutableTaskGraph.execute(ImmutableTaskGraph.java:49) | |
tornado.api@1.0.3/uk.ac.manchester.tornado.api.TornadoExecutionPlan$TornadoExecutor.lambda$execute$0(TornadoExecutionPlan.java:400) | |
java.base/java.util.ArrayList.forEach(ArrayList.java:1596) | |
tornado.api@1.0.3/uk.ac.manchester.tornado.api.TornadoExecutionPlan$TornadoExecutor.execute(TornadoExecutionPlan.java:400) | |
tornado.api@1.0.3/uk.ac.manchester.tornado.api.TornadoExecutionPlan.execute(TornadoExecutionPlan.java:116) | |
example.MatrixMultiplication.main(MatrixMultiplication.java:95) | |
Single Threaded CPU Execution: 1.35 GFlops, Total time = 199 ms | |
Streams Execution: 7.46 GFlops, Total time = 36 ms | |
TornadoVM Execution on GPU (Accelerated): 268.44 GFlops, Total Time = 1 ms | |
Speedup: 199.0x | |
Verification false | |
cleanup: programs ..........0.000540105 s | |
cleanup: context ..........0.000007084 s | |
cleanup: total ..........0.000547189 s | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment