Juan Fumero jjfumero

## install.sh
## Install compute-runtime for the OpenCL/LevelZero driver for the Intel HD Graphics
sudo dnf install intel-compute-runtime

## Link: https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=linux&distributions=offline

## Download and Install oneAPI 2024
wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/20f4e6a1-6b0b-4752-b8c1-e5eacba10e01/l_BaseKit_p_2024.0.0.49564_offline.sh
sudo sh ./l_BaseKit_p_2024.0.0.49564_offline.sh

## oneAPI env

## gist:bf9988dbfd40c7084c6c36a970b9eff6
Driver initialized.
zelLoaderGetVersions number of components found: 1
Version 0
Name: loader
Major: 1
Minor: 6
Patch: 0
Found ZE_DEVICE_TYPE_GPU device...
Driver version: 16929314
API version: 1.1

## copyLevelZeroSPIRV.cpp
#include "ze_api.h"

#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <vector>

/**

## clinfo
clinfo
Number of platforms:				 4
  Platform Profile:				 FULL_PROFILE
  Platform Version:				 OpenCL 1.2 CUDA 10.2.97
  Platform Name:				 NVIDIA CUDA
  Platform Vendor:				 NVIDIA Corporation
  Platform Extensions:				 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_nv_copy_opts cl_nv_create_buffer cl_khr_int64_base_atomics cl_khr_int64_extended_atomics
  Platform Profile:				 FULL_PROFILE
  Platform Version:				 OpenCL 1.2 LINUX
  Platform Name:				 Intel(R) OpenCL

## TornadoVMdemoJoker.sh
## Demo for TornadoVM at Joker<?> Conf. 2019

## run sequential code
tornado uk.ac.manchester.tornado.examples.dynamic.NBodyDynamic 4096 sequential 100

## run parallel code
tornado uk.ac.manchester.tornado.examples.dynamic.NBodyDynamic 4096 tornado 100

## run parallel code - debug
tornado --debug uk.ac.manchester.tornado.examples.dynamic.NBodyDynamic 4096 tornado 100

## reduce.cl
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
__kernel void reductionAddFloats(__global uchar *_heap_base, ulong _frame_base, __constant uchar *_constant_region, __local uchar *_local_region, __global uchar *_private_region)
{
  float f_35, f_23, f_29, f_30;
  ulong ul_40, ul_12, ul_28, ul_1, ul_0;
  int i_24, i_18, i_19, i_20, i_21, i_14, i_15, i_16, i_17, i_13, i_6, i_7, i_8, i_41, i_2, i_3, i_4, i_36, i_5, i_31, i_32, i_33;
  long l_38, l_39, l_37, l_10, l_26, l_11, l_27, l_9, l_25;
  bool z_22, z_34;

  __global ulong *_frame = (__global ulong *) &_heap_base[_frame_base];

## truffle-material.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jjfumero
                / truffle-material.md
            
            
              Created
              April 20, 2018 20:51
                — forked from smarr/truffle-material.md
            
              
                Truffle: Languages and Material
              
          
    Introductory Material


Add Graal JIT Compilation to Your JVM Language in 5 Steps, A Tutorial
http://stefan-marr.de/2015/11/add-graal-jit-compilation-to-your-jvm-language-in-5-easy-steps-step-1/


The SimpleLanguage, an example of using Truffle with great JavaDocs. It is the officle getting-started project:
https://github.com/graalvm/simplelanguage


Truffle Tutorial, Christan Wimmer, PLDI 2016, 3h recording
https://youtu.be/FJY96_6Y3a4 Slides


## JuanFumeroPhDThesis17.bib
@PhdThesis{Fumero17:AIPGPUs,
  title = "{Accelerating Interpreted Programming Languages on GPUs with Just-In-Time and Runtime Optimisations}",
  author = "Juan Fumero",
  school = "The University of Edinburgh, UK",
  month = "August",
  year = "2017",
}


## veeBibTex.bib
@inproceedings{Fumero:2017:JGC:3050748.3050761,
 author = {Fumero, Juan and Steuwer, Michel and Stadler, Lukas and Dubach, Christophe},
 title = {Just-In-Time GPU Compilation for Interpreted Languages with Partial Evaluation},
 booktitle = {Proceedings of the 13th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution Environments},
 series = {VEE '17},
 year = {2017},
 isbn = {978-1-4503-4948-2},
 location = {Xi'an, China},
 pages = {60--73},
 numpages = {14},

## MatrixVector.java
@Test
public static void matrixVectorProduct() {
    System.out.println("Matrix Vector multiplication");
    final int DIM = 5;

    int [][]a = new int[DIM][DIM];
    int counter = 1;
    for (int i = 0; i < a.length; i++) {
        for (int j = 0; j < a[0].length; j++) {
            a[i][j] = counter++;
	## Install compute-runtime for the OpenCL/LevelZero driver for the Intel HD Graphics
	sudo dnf install intel-compute-runtime

	## Link: https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=linux&distributions=offline

	## Download and Install oneAPI 2024
	wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/20f4e6a1-6b0b-4752-b8c1-e5eacba10e01/l_BaseKit_p_2024.0.0.49564_offline.sh
	sudo sh ./l_BaseKit_p_2024.0.0.49564_offline.sh

	## oneAPI env
	Driver initialized.
	zelLoaderGetVersions number of components found: 1
	Version 0
	Name: loader
	Major: 1
	Minor: 6
	Patch: 0
	Found ZE_DEVICE_TYPE_GPU device...
	Driver version: 16929314
	API version: 1.1
	#include "ze_api.h"

	#include <cstring>
	#include <fstream>
	#include <iostream>
	#include <limits>
	#include <memory>
	#include <vector>

	/**
	clinfo
	Number of platforms: 4
	Platform Profile: FULL_PROFILE
	Platform Version: OpenCL 1.2 CUDA 10.2.97
	Platform Name: NVIDIA CUDA
	Platform Vendor: NVIDIA Corporation
	Platform Extensions: cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_nv_copy_opts cl_nv_create_buffer cl_khr_int64_base_atomics cl_khr_int64_extended_atomics
	Platform Profile: FULL_PROFILE
	Platform Version: OpenCL 1.2 LINUX
	Platform Name: Intel(R) OpenCL
	## Demo for TornadoVM at Joker<?> Conf. 2019

	## run sequential code
	tornado uk.ac.manchester.tornado.examples.dynamic.NBodyDynamic 4096 sequential 100

	## run parallel code
	tornado uk.ac.manchester.tornado.examples.dynamic.NBodyDynamic 4096 tornado 100

	## run parallel code - debug
	tornado --debug uk.ac.manchester.tornado.examples.dynamic.NBodyDynamic 4096 tornado 100
	#pragma OPENCL EXTENSION cl_khr_fp64 : enable
	__kernel void reductionAddFloats(__global uchar _heap_base, ulong _frame_base, __constant uchar _constant_region, __local uchar _local_region, __global uchar _private_region)
	{
	float f_35, f_23, f_29, f_30;
	ulong ul_40, ul_12, ul_28, ul_1, ul_0;
	int i_24, i_18, i_19, i_20, i_21, i_14, i_15, i_16, i_17, i_13, i_6, i_7, i_8, i_41, i_2, i_3, i_4, i_36, i_5, i_31, i_32, i_33;
	long l_38, l_39, l_37, l_10, l_26, l_11, l_27, l_9, l_25;
	bool z_22, z_34;

	__global ulong _frame = (__global ulong ) &_heap_base[_frame_base];
	@PhdThesis{Fumero17:AIPGPUs,
	title = "{Accelerating Interpreted Programming Languages on GPUs with Just-In-Time and Runtime Optimisations}",
	author = "Juan Fumero",
	school = "The University of Edinburgh, UK",
	month = "August",
	year = "2017",
	}
	@inproceedings{Fumero:2017:JGC:3050748.3050761,
	author = {Fumero, Juan and Steuwer, Michel and Stadler, Lukas and Dubach, Christophe},
	title = {Just-In-Time GPU Compilation for Interpreted Languages with Partial Evaluation},
	booktitle = {Proceedings of the 13th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution Environments},
	series = {VEE '17},
	year = {2017},
	isbn = {978-1-4503-4948-2},
	location = {Xi'an, China},
	pages = {60--73},
	numpages = {14},
	@Test
	public static void matrixVectorProduct() {
	System.out.println("Matrix Vector multiplication");
	final int DIM = 5;

	int [][]a = new int[DIM][DIM];
	int counter = 1;
	for (int i = 0; i < a.length; i++) {
	for (int j = 0; j < a[0].length; j++) {
	a[i][j] = counter++;