Usually, located at /usr/local/cuda/bin
$ nvprof python train_mnist.py
I prefer to use --print-gpu-trace.
# Check for the presence of AVX and figure out the flags to use for it. | |
macro(CHECK_FOR_AVX) | |
set(AVX_FLAGS) | |
include(CheckCXXSourceRuns) | |
set(CMAKE_REQUIRED_FLAGS) | |
# Check AVX | |
if(MSVC AND NOT MSVC_VERSION LESS 1600) | |
set(CMAKE_REQUIRED_FLAGS "/arch:AVX") |