Skip to content

Instantly share code, notes, and snippets.

@t4c1
t4c1 / lib.cpp
Created November 18, 2022 14:33
Wrapper
#include "lib.hpp"
int dft::func(int i){
return i+1;
}
@t4c1
t4c1 / async_bench.cpp
Created May 6, 2022 09:21
Benchmarks for multiple streams in a DPC++ queue
#include <chrono>
#include <sycl/sycl.hpp>
#include <vector>
constexpr size_t buffer_size = 1024 * 1024 * 128 / sizeof(int);
constexpr int compute_amount = 70;
constexpr int n_repeats = 100;
sycl::event run_kernel(sycl::queue q, volatile int *ptr, sycl::event wait) {
return q.submit([&](sycl::handler &cgh) {
#define STAN_OPENCL
#define OPENCL_PLATFORM_ID 0
#define OPENCL_DEVICE_ID 0
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define STAN_THREADS 4
#include <benchmark/benchmark.h>
#include <stan/math/opencl/rev/opencl.hpp>
#include <stan/math.hpp>
#define STAN_OPENCL
#define OPENCL_PLATFORM_ID 0
#define OPENCL_DEVICE_ID 0
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define STAN_THREADS
#include <benchmark/benchmark.h>
#include <stan/math/opencl/rev/opencl.hpp>
#include <stan/math.hpp>
@t4c1
t4c1 / holder_bench.cpp
Created June 5, 2020 10:51
value_of benchmark holder vs eval
#include <benchmark/benchmark.h>
#include <stan/math.hpp>
using namespace stan::math;
using namespace Eigen;
using namespace std;
template <typename EigMat>
inline auto value_of_eval(const EigMat& a) {
return a.unaryExpr([](const auto& scal) { return value_of(scal); }).eval();
@t4c1
t4c1 / funcs_checks.cpp
Created November 29, 2019 09:49
eigen funcs and checks
#include <benchmark/benchmark.h>
//#include <stan/math/prim/mat/fun/Eigen.hpp>
#include <stan/math.hpp>
#include <math.h>
using namespace std;
using namespace Eigen;
#define BENCH(fun) \
static void std_##fun(benchmark::State& state) { \
@t4c1
t4c1 / eigendecomp_parts_bench.cpp
Created November 13, 2019 09:34
Eigendecomposition times and errors
//#include <stan/math.hpp>
#include <stan/math/prim/mat/fun/symmetric_eigensolver.hpp>
#include <chrono>
#include <cstdio>
using namespace std;
using namespace Eigen;
using namespace stan::math;
using Scalar = double;
@t4c1
t4c1 / clinfo1.txt
Created September 13, 2019 06:37
my clinfo
Number of platforms 2
Platform Name NVIDIA CUDA
Platform Vendor NVIDIA Corporation
Platform Version OpenCL 1.2 CUDA 10.1.120
Platform Profile FULL_PROFILE
Platform Extensions cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing cl_nv_copy_opts cl_nv_create_buffer
Platform Extensions function suffix NV
Platform Name Experimental OpenCL 2.1 CPU Only Platform
Platform Vendor Intel(R) Corporation
@t4c1
t4c1 / plot_thresholds.py
Created September 2, 2019 10:16
Measurement and coefficient fitting for CPU-GPU threshold of GLMs in Stan
import pandas, numpy
import matplotlib.pyplot as plt
data = pandas.read_csv("thresholds.txt")
models=numpy.array(data["model"])
speedup=numpy.array(data["speedup"])
attributes=numpy.array(data["attributes"])
cases=numpy.array(data["cases"])
selection = numpy.logical_and(models!="normal_id",numpy.logical_or(cases>20000,numpy.logical_and(cases>8000, models=="negbin_log_2")))
@t4c1
t4c1 / cov_benchmark.cpp
Created June 17, 2019 12:33
gp_exp_quad_cov data for more extreme cases
#include <iostream>
#define STAN_OPENCL
#define OPENCL_PLATFORM_ID 0
#define OPENCL_DEVICE_ID 0
#include <stan/math.hpp>
#include <stan/math/opencl/copy.hpp>
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/rev/mat/fun/gp_exp_quad_cov.hpp>