Skip to content

Instantly share code, notes, and snippets.

@aolo2
Created September 27, 2020 16:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aolo2/ecdcb777ef383f46dc6f7b4b93a18898 to your computer and use it in GitHub Desktop.
Save aolo2/ecdcb777ef383f46dc6f7b4b93a18898 to your computer and use it in GitHub Desktop.
Ужасная (но рабочая!) попытка multiGPU
int main(int argc, char **argv)
{
try
{
double t1 = omp_get_wtime();
// считываем параметры командной сторки
int scale = 12;
int avg_degree = 15;
string graph_type = "rmat";
bool check = true;
bool load_from_file = false;
string file_name = "none";
bool convert = false;
string convert_name = "none";
int iterations = 10;
cout << "printing argv" << endl;
for(int i = 0; i < argc; ++i)
cout << argv[i] << endl;
cout << "done" << endl;
parse_cmd_params(argc, argv, scale, avg_degree, check, graph_type, load_from_file, file_name, convert, convert_name, iterations);
cout << "cmd parameters parsed" << endl;
Graph graph;
if(convert)
{
cout << "convert mode: " << convert << endl;
convert_real_graph(graph, convert_name, true);
graph.save_to_binary_file(convert_name + ".el_graph");
return 0;
}
if(load_from_file)
{
cout << "loading graph " << file_name << endl;
graph.load_from_binary_file(file_name);
cout << "loaded graph has " << graph.vertices_count << " vertices and " << graph.edges_count << " edges" << endl;
}
else
{
cout << "generating new graph" << endl;
cout << "scale: " << scale << endl;
cout << "avg_degree: " << avg_degree << endl;
// генерируем граф
if (graph_type == "rmat")
{
file_name = "rmat_" + std::to_string(scale) + "_" + std::to_string(avg_degree) + ".el_graph";
generate_R_MAT(graph, pow(2.0, scale), avg_degree);
}
else if (graph_type == "random_uniform")
{
file_name = "ru_" + std::to_string(scale) + "_" + std::to_string(avg_degree) + ".el_graph";
generate_random_uniform(graph, pow(2.0, scale), avg_degree);
}
else
{
cout << "Unknown graph type" << endl;
return 1;
}
cout << "graph generated" << endl;
graph.convert_to_undirected();
cout << "conversion done" << endl;
cout << "file name: " << file_name << endl;
if(file_name != "none")
{
graph.save_to_binary_file(file_name);
cout << "saved to " << file_name << " file" << endl;
}
}
double t2 = omp_get_wtime();
cout << "generation/load time: " << t2 - t1 << " sec" << endl;
// преобразовываем граф
GraphCSR csr_graph;
cout << "conversion started" << endl;
convert_edges_list_to_CSR(graph, csr_graph);
cout << "converted" << endl;
int *tmp;
SAFE_CALL(cudaMalloc((void**)&tmp, sizeof(int)));
cout << "test malloc done" << endl;
double t_start = omp_get_wtime();
int device_count = 1;
cudaGetDeviceCount(&device_count);
cout << "detected " << device_count << " devices" << endl;
GraphCSR gpu_graph[device_count]; // VLA :((
int *final_user_result;
int final_last_source;
#pragma omp parallel num_threads(device_count)
{
int t = omp_get_thread_num();
cudaSetDevice(t);
int *tmp22;
cudaMalloc((void **) &tmp, sizeof(int));
int *user_result;
cudaMallocHost((void **) &user_result, csr_graph.vertices_count * sizeof(int));
int *device_levels;
cudaMalloc((void **) &device_levels, csr_graph.vertices_count * sizeof(int));
// запускаем копирования данных
int last_source = 0;
t1 = omp_get_wtime();
user_copy_graph_to_device(csr_graph, gpu_graph[t]);
t2 = omp_get_wtime();
cout << "Device->host copy time: " << t2 - t1 << " sec" << endl;
// запускаем алгоритм
cudaDeviceSynchronize();
t1 = omp_get_wtime();
cout << "will do " << iterations / device_count << " iterations" << endl;
for(int i = 0; i < iterations / device_count; i++)
{
last_source = rand() % graph.vertices_count;
// printf("thread %d working on source %d\n", t, last_source);
user_algorithm(gpu_graph[t], user_result, device_levels, last_source);
}
if (t == 0) {
final_user_result = user_result;
final_last_source = last_source;
}
cudaDeviceSynchronize();
t2 = omp_get_wtime();
}
double t_end = omp_get_wtime();
cout << "BFS wall time: " << t2 - t1 << " sec" << endl;
#pragma omp parallel num_threads(device_count)
{
free_memory(gpu_graph[omp_get_thread_num()]);
}
cout << endl;
cout << "#algorithm executed!" << endl;
cout << "#perf: " << ((double)(iterations) * graph.edges_count) / ((t_end - t_start) * 1e6) << endl;
cout << "#time: " << t_end - t_start << endl;
cout << "#check: " << check << endl;
// делаем проверку корректности каждый раз
if(check)
{
verify_result(csr_graph, final_user_result, final_last_source);
}
// освобождаем память
// cudaFreeHost(user_result);
// cudaFree(device_levels);
}
catch (const char *error)
{
cout << error << endl;
}
catch (...)
{
cout << "unknown error" << endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment