Tadej Ciglarič t4c1

## lib.cpp
#include "lib.hpp"

int dft::func(int i){
	return i+1;
}

## async_bench.cpp
#include <chrono>
#include <sycl/sycl.hpp>
#include <vector>

constexpr size_t buffer_size = 1024 * 1024 * 128 / sizeof(int);
constexpr int compute_amount = 70;
constexpr int n_repeats = 100;

sycl::event run_kernel(sycl::queue q, volatile int *ptr, sycl::event wait) {
  return q.submit([&](sycl::handler &cgh) {

## bernoulli_logit_lpmf_bench.cpp
#define STAN_OPENCL
#define OPENCL_PLATFORM_ID 0
#define OPENCL_DEVICE_ID 0
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define STAN_THREADS 4
#include <benchmark/benchmark.h>
#include <stan/math/opencl/rev/opencl.hpp>
#include <stan/math.hpp>

## bernoulli_lpmf_benchmark.cpp
#define STAN_OPENCL
#define OPENCL_PLATFORM_ID 0
#define OPENCL_DEVICE_ID 0
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define STAN_THREADS
#include <benchmark/benchmark.h>
#include <stan/math/opencl/rev/opencl.hpp>
#include <stan/math.hpp>

## holder_bench.cpp
#include <benchmark/benchmark.h>
#include <stan/math.hpp>

using namespace stan::math;
using namespace Eigen;
using namespace std;

template <typename EigMat>
inline auto value_of_eval(const EigMat& a) {
  return a.unaryExpr([](const auto& scal) { return value_of(scal); }).eval();

## funcs_checks.cpp
#include <benchmark/benchmark.h>
//#include <stan/math/prim/mat/fun/Eigen.hpp>
#include <stan/math.hpp>
#include <math.h>

using namespace std;
using namespace Eigen;

#define BENCH(fun) \
static void std_##fun(benchmark::State& state) { \

## eigendecomp_parts_bench.cpp
//#include <stan/math.hpp>
#include <stan/math/prim/mat/fun/symmetric_eigensolver.hpp>
#include <chrono>
#include <cstdio>

using namespace std;
using namespace Eigen;
using namespace stan::math;

using Scalar = double;

## clinfo1.txt
Number of platforms                               2
  Platform Name                                   NVIDIA CUDA
  Platform Vendor                                 NVIDIA Corporation
  Platform Version                                OpenCL 1.2 CUDA 10.1.120
  Platform Profile                                FULL_PROFILE
  Platform Extensions                             cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing cl_nv_copy_opts cl_nv_create_buffer
  Platform Extensions function suffix             NV

  Platform Name                                   Experimental OpenCL 2.1 CPU Only Platform
  Platform Vendor                                 Intel(R) Corporation

## plot_thresholds.py
import pandas, numpy
import matplotlib.pyplot as plt

data = pandas.read_csv("thresholds.txt")
models=numpy.array(data["model"])
speedup=numpy.array(data["speedup"])
attributes=numpy.array(data["attributes"])
cases=numpy.array(data["cases"])

selection = numpy.logical_and(models!="normal_id",numpy.logical_or(cases>20000,numpy.logical_and(cases>8000, models=="negbin_log_2")))

## cov_benchmark.cpp
#include <iostream>

#define STAN_OPENCL
#define OPENCL_PLATFORM_ID 0
#define OPENCL_DEVICE_ID 0

#include <stan/math.hpp>
#include <stan/math/opencl/copy.hpp>
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/rev/mat/fun/gp_exp_quad_cov.hpp>
	#include <chrono>
	#include <sycl/sycl.hpp>
	#include <vector>

	constexpr size_t buffer_size = 1024 * 1024 * 128 / sizeof(int);
	constexpr int compute_amount = 70;
	constexpr int n_repeats = 100;

	sycl::event run_kernel(sycl::queue q, volatile int *ptr, sycl::event wait) {
	return q.submit([&](sycl::handler &cgh) {
	#define STAN_OPENCL
	#define OPENCL_PLATFORM_ID 0
	#define OPENCL_DEVICE_ID 0
	#define CL_HPP_ENABLE_EXCEPTIONS
	#define CL_HPP_TARGET_OPENCL_VERSION 120
	#define CL_HPP_MINIMUM_OPENCL_VERSION 120
	#define STAN_THREADS 4
	#include <benchmark/benchmark.h>
	#include <stan/math/opencl/rev/opencl.hpp>
	#include <stan/math.hpp>
	#include <benchmark/benchmark.h>
	#include <stan/math.hpp>

	using namespace stan::math;
	using namespace Eigen;
	using namespace std;

	template <typename EigMat>
	inline auto value_of_eval(const EigMat& a) {
	return a.unaryExpr([](const auto& scal) { return value_of(scal); }).eval();
	#include <benchmark/benchmark.h>
	//#include <stan/math/prim/mat/fun/Eigen.hpp>
	#include <stan/math.hpp>
	#include <math.h>

	using namespace std;
	using namespace Eigen;

	#define BENCH(fun) \
	static void std_##fun(benchmark::State& state) { \
	//#include <stan/math.hpp>
	#include <stan/math/prim/mat/fun/symmetric_eigensolver.hpp>
	#include <chrono>
	#include <cstdio>

	using namespace std;
	using namespace Eigen;
	using namespace stan::math;

	using Scalar = double;
	Number of platforms 2
	Platform Name NVIDIA CUDA
	Platform Vendor NVIDIA Corporation
	Platform Version OpenCL 1.2 CUDA 10.1.120
	Platform Profile FULL_PROFILE
	Platform Extensions cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing cl_nv_copy_opts cl_nv_create_buffer
	Platform Extensions function suffix NV

	Platform Name Experimental OpenCL 2.1 CPU Only Platform
	Platform Vendor Intel(R) Corporation
	import pandas, numpy
	import matplotlib.pyplot as plt

	data = pandas.read_csv("thresholds.txt")
	models=numpy.array(data["model"])
	speedup=numpy.array(data["speedup"])
	attributes=numpy.array(data["attributes"])
	cases=numpy.array(data["cases"])

	selection = numpy.logical_and(models!="normal_id",numpy.logical_or(cases>20000,numpy.logical_and(cases>8000, models=="negbin_log_2")))
	#include <iostream>

	#define STAN_OPENCL
	#define OPENCL_PLATFORM_ID 0
	#define OPENCL_DEVICE_ID 0

	#include <stan/math.hpp>
	#include <stan/math/opencl/copy.hpp>
	#include <stan/math/opencl/matrix_cl.hpp>
	#include <stan/math/rev/mat/fun/gp_exp_quad_cov.hpp>