ctcyang/main.cc

## main.cc
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <iostream>

#define NUM_GPUS 8

void PrintMatrix( const std::string& str, int matrix[NUM_GPUS*NUM_GPUS],
    int num_rows, int num_cols ) {

  std::cout << str << ":\n";
  int count = 0;
  for (int row = 0; row < num_rows; ++row) {
    for (int col = 0; col < num_cols; ++col) {
      std::cout << matrix[count++] << " ";
    }
    std::cout << std::endl;
  }
}

int main(int argc, char** argv) {
  int perf[NUM_GPUS*NUM_GPUS];

  cudaDeviceP2PAttr attr;
  attr = cudaDevP2PAttrPerformanceRank;

  for (int row = 0; row < NUM_GPUS; ++row) {
    for (int col = 0; col < NUM_GPUS; ++col) {
      if (row==col) {
        perf[row*NUM_GPUS+col] = 0;
      } else {
        cudaDeviceGetP2PAttribute( &perf[row*NUM_GPUS+col], attr, row, col );
        perf[row*NUM_GPUS+col]++;
      }
    }
  }

  PrintMatrix( "perf", perf, NUM_GPUS, NUM_GPUS );
}
	#include <cuda.h>
	#include <cuda_runtime_api.h>
	#include <iostream>

	#define NUM_GPUS 8

	void PrintMatrix( const std::string& str, int matrix[NUM_GPUS*NUM_GPUS],
	int num_rows, int num_cols ) {

	std::cout << str << ":\n";
	int count = 0;
	for (int row = 0; row < num_rows; ++row) {
	for (int col = 0; col < num_cols; ++col) {
	std::cout << matrix[count++] << " ";
	}
	std::cout << std::endl;
	}
	}

	int main(int argc, char** argv) {
	int perf[NUM_GPUS*NUM_GPUS];

	cudaDeviceP2PAttr attr;
	attr = cudaDevP2PAttrPerformanceRank;

	for (int row = 0; row < NUM_GPUS; ++row) {
	for (int col = 0; col < NUM_GPUS; ++col) {
	if (row==col) {
	perf[row*NUM_GPUS+col] = 0;
	} else {
	cudaDeviceGetP2PAttribute( &perf[row*NUM_GPUS+col], attr, row, col );
	perf[row*NUM_GPUS+col]++;
	}
	}
	}

	PrintMatrix( "perf", perf, NUM_GPUS, NUM_GPUS );
	}