Created
September 27, 2020 16:41
-
-
Save aolo2/ecdcb777ef383f46dc6f7b4b93a18898 to your computer and use it in GitHub Desktop.
Ужасная (но рабочая!) попытка multiGPU
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int main(int argc, char **argv) | |
{ | |
try | |
{ | |
double t1 = omp_get_wtime(); | |
// считываем параметры командной сторки | |
int scale = 12; | |
int avg_degree = 15; | |
string graph_type = "rmat"; | |
bool check = true; | |
bool load_from_file = false; | |
string file_name = "none"; | |
bool convert = false; | |
string convert_name = "none"; | |
int iterations = 10; | |
cout << "printing argv" << endl; | |
for(int i = 0; i < argc; ++i) | |
cout << argv[i] << endl; | |
cout << "done" << endl; | |
parse_cmd_params(argc, argv, scale, avg_degree, check, graph_type, load_from_file, file_name, convert, convert_name, iterations); | |
cout << "cmd parameters parsed" << endl; | |
Graph graph; | |
if(convert) | |
{ | |
cout << "convert mode: " << convert << endl; | |
convert_real_graph(graph, convert_name, true); | |
graph.save_to_binary_file(convert_name + ".el_graph"); | |
return 0; | |
} | |
if(load_from_file) | |
{ | |
cout << "loading graph " << file_name << endl; | |
graph.load_from_binary_file(file_name); | |
cout << "loaded graph has " << graph.vertices_count << " vertices and " << graph.edges_count << " edges" << endl; | |
} | |
else | |
{ | |
cout << "generating new graph" << endl; | |
cout << "scale: " << scale << endl; | |
cout << "avg_degree: " << avg_degree << endl; | |
// генерируем граф | |
if (graph_type == "rmat") | |
{ | |
file_name = "rmat_" + std::to_string(scale) + "_" + std::to_string(avg_degree) + ".el_graph"; | |
generate_R_MAT(graph, pow(2.0, scale), avg_degree); | |
} | |
else if (graph_type == "random_uniform") | |
{ | |
file_name = "ru_" + std::to_string(scale) + "_" + std::to_string(avg_degree) + ".el_graph"; | |
generate_random_uniform(graph, pow(2.0, scale), avg_degree); | |
} | |
else | |
{ | |
cout << "Unknown graph type" << endl; | |
return 1; | |
} | |
cout << "graph generated" << endl; | |
graph.convert_to_undirected(); | |
cout << "conversion done" << endl; | |
cout << "file name: " << file_name << endl; | |
if(file_name != "none") | |
{ | |
graph.save_to_binary_file(file_name); | |
cout << "saved to " << file_name << " file" << endl; | |
} | |
} | |
double t2 = omp_get_wtime(); | |
cout << "generation/load time: " << t2 - t1 << " sec" << endl; | |
// преобразовываем граф | |
GraphCSR csr_graph; | |
cout << "conversion started" << endl; | |
convert_edges_list_to_CSR(graph, csr_graph); | |
cout << "converted" << endl; | |
int *tmp; | |
SAFE_CALL(cudaMalloc((void**)&tmp, sizeof(int))); | |
cout << "test malloc done" << endl; | |
double t_start = omp_get_wtime(); | |
int device_count = 1; | |
cudaGetDeviceCount(&device_count); | |
cout << "detected " << device_count << " devices" << endl; | |
GraphCSR gpu_graph[device_count]; // VLA :(( | |
int *final_user_result; | |
int final_last_source; | |
#pragma omp parallel num_threads(device_count) | |
{ | |
int t = omp_get_thread_num(); | |
cudaSetDevice(t); | |
int *tmp22; | |
cudaMalloc((void **) &tmp, sizeof(int)); | |
int *user_result; | |
cudaMallocHost((void **) &user_result, csr_graph.vertices_count * sizeof(int)); | |
int *device_levels; | |
cudaMalloc((void **) &device_levels, csr_graph.vertices_count * sizeof(int)); | |
// запускаем копирования данных | |
int last_source = 0; | |
t1 = omp_get_wtime(); | |
user_copy_graph_to_device(csr_graph, gpu_graph[t]); | |
t2 = omp_get_wtime(); | |
cout << "Device->host copy time: " << t2 - t1 << " sec" << endl; | |
// запускаем алгоритм | |
cudaDeviceSynchronize(); | |
t1 = omp_get_wtime(); | |
cout << "will do " << iterations / device_count << " iterations" << endl; | |
for(int i = 0; i < iterations / device_count; i++) | |
{ | |
last_source = rand() % graph.vertices_count; | |
// printf("thread %d working on source %d\n", t, last_source); | |
user_algorithm(gpu_graph[t], user_result, device_levels, last_source); | |
} | |
if (t == 0) { | |
final_user_result = user_result; | |
final_last_source = last_source; | |
} | |
cudaDeviceSynchronize(); | |
t2 = omp_get_wtime(); | |
} | |
double t_end = omp_get_wtime(); | |
cout << "BFS wall time: " << t2 - t1 << " sec" << endl; | |
#pragma omp parallel num_threads(device_count) | |
{ | |
free_memory(gpu_graph[omp_get_thread_num()]); | |
} | |
cout << endl; | |
cout << "#algorithm executed!" << endl; | |
cout << "#perf: " << ((double)(iterations) * graph.edges_count) / ((t_end - t_start) * 1e6) << endl; | |
cout << "#time: " << t_end - t_start << endl; | |
cout << "#check: " << check << endl; | |
// делаем проверку корректности каждый раз | |
if(check) | |
{ | |
verify_result(csr_graph, final_user_result, final_last_source); | |
} | |
// освобождаем память | |
// cudaFreeHost(user_result); | |
// cudaFree(device_levels); | |
} | |
catch (const char *error) | |
{ | |
cout << error << endl; | |
} | |
catch (...) | |
{ | |
cout << "unknown error" << endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment