Created
April 25, 2012 08:40
-
-
Save keisukefukuda/2488259 to your computer and use it in GitHub Desktop.
Simple code to test StarPU's combined worker feature.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simple program using combined worker. | |
#include <iostream> | |
#include <starpu.h> | |
#include <omp.h> | |
starpu_perfmodel perf_model, perf_model_omp; | |
starpu_data_handle_t handle; | |
starpu_task *task = NULL, *task_omp = NULL; | |
starpu_codelet cl, cl_omp; | |
starpu_conf conf; | |
const int LENGTH = 8192*2; // buffer length (a start line) | |
const int REPEAT = 15000; // dummy computing load | |
// A simple CPU kernel with OpenMP acceleration | |
void kernel_omp(void *buffers[], void *arg) { | |
double *ary = (double*) STARPU_VECTOR_GET_PTR(buffers[0]); | |
int len = STARPU_VECTOR_GET_NX(buffers[0]); | |
double beg = omp_get_wtime(); | |
// A heavy & parallel calculation | |
#pragma omp parallel for num_threads(starpu_combined_worker_get_size()) | |
for(int j = 0; j < len; j++) { | |
double t = ary[j]; | |
for(int i = 0; i < REPEAT; i++) { | |
t *= 1.0001; | |
} | |
ary[j] = t; | |
} | |
std::cout << "OpenMP task : " << (omp_get_wtime() - beg) << " [s]\t" | |
<< "worker size = " << starpu_combined_worker_get_size() << ", " | |
<< "OpenMP threads = " << omp_get_max_threads() | |
<< std::endl; | |
} | |
// A simple CPU kernel without OpenMP for comparison | |
void kernel(void *buffers[], void *arg) { | |
double *ary = (double*) STARPU_VECTOR_GET_PTR(buffers[0]); | |
int len = STARPU_VECTOR_GET_NX(buffers[0]); | |
double beg = omp_get_wtime(); | |
// A heavy & parallel calculation | |
for(int j = 0; j < len; j++) { | |
double t = ary[j]; | |
for(int i = 0; i < REPEAT; i++) { | |
t *= 1.0001; | |
} | |
ary[j] = t; | |
} | |
std::cout << "Serial task : " << (omp_get_wtime() - beg) << " [s]" << std::endl; | |
} | |
int main() { | |
starpu_conf_init(&conf); | |
conf.single_combined_worker = 1; | |
conf.sched_policy_name = "pheft"; | |
int ret = starpu_init(&conf); | |
STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); | |
perf_model.type = STARPU_REGRESSION_BASED; | |
perf_model.symbol = "simple"; | |
perf_model_omp.type = STARPU_REGRESSION_BASED; | |
perf_model_omp.symbol = "simple_omp"; | |
bzero(&cl, sizeof(cl)); | |
cl.where = STARPU_CPU; | |
cl.cpu_funcs[0] = kernel; | |
cl.nbuffers = 1; | |
cl.modes[0] = STARPU_RW; | |
cl.model = &perf_model; | |
cl.max_parallelism = INT_MAX; | |
bzero(&cl_omp, sizeof(cl_omp)); | |
cl_omp.where = STARPU_CPU; | |
cl_omp.type = STARPU_FORKJOIN; | |
cl_omp.cpu_funcs[0] = kernel_omp; | |
cl_omp.nbuffers = 1; | |
cl_omp.modes[0] = STARPU_RW; | |
cl_omp.model = &perf_model_omp; | |
cl_omp.max_parallelism = INT_MAX; | |
// Launch many tasks changing the buffers size. | |
for (float f = 1.0; f < 5.0; f += 0.1) { | |
int length = (int)(LENGTH * f); | |
double *ary = new double[length]; | |
// the content of ary can be whatever, we use the initial vandom value | |
starpu_vector_data_register(&handle, 0, (uintptr_t) ary, length, sizeof(ary[0])); | |
// Here, we don't care about race conditions and data consistency | |
task = starpu_task_create(); | |
task->cl = &cl; | |
task->handles[0] = handle; | |
task_omp = starpu_task_create(); | |
task_omp->cl = &cl_omp; | |
task_omp->handles[0] = handle; | |
ret = starpu_task_submit(task); | |
STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); | |
ret = starpu_task_submit(task_omp); | |
STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); | |
starpu_task_wait_for_all(); | |
starpu_data_unregister(handle); | |
delete[] ary; | |
} | |
starpu_shutdown(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment