Skip to content

Instantly share code, notes, and snippets.

@keisukefukuda
Created July 4, 2012 11:10
Show Gist options
  • Save keisukefukuda/3046791 to your computer and use it in GitHub Desktop.
Save keisukefukuda/3046791 to your computer and use it in GitHub Desktop.
StarPU's "pheft" behavior
#include <iostream>
#include <starpu.h>
struct starpu_codelet cl;
struct starpu_perfmodel perf;
starpu_data_handle_t handle;
void cpu_func(void *buffer[], void *arg) {
int N = STARPU_VECTOR_GET_NX(buffer[0]);
double *vec = (double*) STARPU_VECTOR_GET_PTR(buffer[0]);
std::cerr << "CPU kernel" << std::endl;
std::cerr << "starpu_combined_worker_get_size() = "
<< starpu_combined_worker_get_size() << std::endl;
#pragma omp parallel for num_threads(starpu_combined_worker_get_size())
for (int i = 0; i < N; i++) {
double v = vec[i];
for (int j = 0; j < 1000; j++) {
v *= 5 * sin(v); // something heavy to do
}
vec[i] = v;
}
}
static __global__
void cuda_kernel(int N, double *vec) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < N) {
double v = vec[i];
for (int j = 0; j < 1000; j++) {
v *= 5 * sin(v);
}
vec[i] = v;
}
}
void cuda_func(void *buffer[], void *arg) {
double *vec = (double*) STARPU_VECTOR_GET_PTR(buffer[0]);
int N = STARPU_VECTOR_GET_NX(buffer[0]);
dim3 gridDim = N / 1024 + 1;
dim3 blockDim = 1024;
std::cerr << "CUDA kernel" << std::endl;
cuda_kernel<<<gridDim, blockDim>>> (N, vec);
}
int main(int argc, char **argv) {
srand(time(NULL));
int N = 0;
if (argc == 1) {
N = 1000;
} else {
N = atoi(argv[1]);
}
double *vec = new double[N];
for (int i = 0; i < N; i++) {
vec[i] = (double) i;
}
printf("Using StarPU version %d.%d\n",
STARPU_MAJOR_VERSION,
STARPU_MINOR_VERSION);
starpu_conf conf;
starpu_conf_init(&conf);
conf.sched_policy_name = "pheft";
conf.single_combined_worker = 1;
int ret = starpu_init(&conf);
STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
perf.type = STARPU_REGRESSION_BASED;
perf.symbol = "my_test_program";
starpu_vector_data_register(&handle, 0, (uintptr_t) vec, sizeof(double), N);
bzero(&cl, sizeof(cl));
cl.where = STARPU_CUDA | STARPU_CPU;
cl.type = STARPU_FORKJOIN;
cl.max_parallelism = INT_MAX;
cl.cpu_funcs[0] = &cpu_func;
cl.cuda_funcs[0] = &cuda_func;
cl.nbuffers = 1;
cl.modes[0] = STARPU_RW;
cl.model = &perf;
struct starpu_task *task = starpu_task_create();
assert(task);
task->cl = &cl;
task->handles[0] = handle;
task->cl_arg = NULL;
task->use_tag = 0;
task->synchronous = 1;
task->callback_func = NULL;
ret = starpu_task_submit(task);
STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
starpu_data_unregister(handle);
starpu_shutdown();
delete[] vec;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment