zonca/test_cuda_docker.ipynb

## test_cuda_docker.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NVIDIA-SMI couldn't find libnvidia-ml.so library in your system. Please make sure that the NVIDIA Display Driver is properly installed and present in your system.\r\n",
      "Please also try adding directory that contains libnvidia-ml.so to your system PATH.\r\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wed Oct 12 21:57:44 2016       \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| NVIDIA-SMI 361.93.02              Driver Version: 361.93.02                 |\r\n",
      "|-------------------------------+----------------------+----------------------+\r\n",
      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
      "|===============================+======================+======================|\r\n",
      "|   0  Tesla M40 24GB      Off  | 0000:04:00.0     Off |                    0 |\r\n",
      "| N/A   25C    P8    16W / 250W |      0MiB / 22945MiB |      0%      Default |\r\n",
      "+-------------------------------+----------------------+----------------------+\r\n",
      "|   1  Tesla M40 24GB      Off  | 0000:8C:00.0     Off |                    0 |\r\n",
      "| N/A   24C    P8    17W / 250W |      0MiB / 22945MiB |      0%      Default |\r\n",
      "+-------------------------------+----------------------+----------------------+\r\n",
      "                                                                               \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| Processes:                                                       GPU Memory |\r\n",
      "|  GPU       PID  Type  Process name                               Usage      |\r\n",
      "|=============================================================================|\r\n",
      "|  No running processes found                                                 |\r\n",
      "+-----------------------------------------------------------------------------+\r\n"
     ]
    }
   ],
   "source": [
    "!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting hello.cu\n"
     ]
    }
   ],
   "source": [
    "%%file hello.cu \n",
    "#include <stdio.h>\n",
    "#include <stdlib.h>\n",
    "#include <math.h>\n",
    " \n",
    "// CUDA kernel. Each thread takes care of one element of c\n",
    "__global__ void vecAdd(double *a, double *b, double *c, int n)\n",
    "{\n",
    "    // Get our global thread ID\n",
    "    int id = blockIdx.x*blockDim.x+threadIdx.x;\n",
    " \n",
    "    // Make sure we do not go out of bounds\n",
    "    if (id < n)\n",
    "        c[id] = a[id] + b[id];\n",
    "}\n",
    " \n",
    "int main( int argc, char* argv[] )\n",
    "{\n",
    "    // Size of vectors\n",
    "    int n = 100000;\n",
    " \n",
    "    // Host input vectors\n",
    "    double *h_a;\n",
    "    double *h_b;\n",
    "    //Host output vector\n",
    "    double *h_c;\n",
    " \n",
    "    // Device input vectors\n",
    "    double *d_a;\n",
    "    double *d_b;\n",
    "    //Device output vector\n",
    "    double *d_c;\n",
    " \n",
    "    // Size, in bytes, of each vector\n",
    "    size_t bytes = n*sizeof(double);\n",
    " \n",
    "    // Allocate memory for each vector on host\n",
    "    h_a = (double*)malloc(bytes);\n",
    "    h_b = (double*)malloc(bytes);\n",
    "    h_c = (double*)malloc(bytes);\n",
    " \n",
    "    // Allocate memory for each vector on GPU\n",
    "    cudaMalloc(&d_a, bytes);\n",
    "    cudaMalloc(&d_b, bytes);\n",
    "    cudaMalloc(&d_c, bytes);\n",
    " \n",
    "    int i;\n",
    "    // Initialize vectors on host\n",
    "    for( i = 0; i < n; i++ ) {\n",
    "        h_a[i] = sin(i)*sin(i);\n",
    "        h_b[i] = cos(i)*cos(i);\n",
    "    }\n",
    " \n",
    "    // Copy host vectors to device\n",
    "    cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);\n",
    "    cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);\n",
    " \n",
    "    int blockSize, gridSize;\n",
    " \n",
    "    // Number of threads in each thread block\n",
    "    blockSize = 1024;\n",
    " \n",
    "    // Number of thread blocks in grid\n",
    "    gridSize = (int)ceil((float)n/blockSize);\n",
    " \n",
    "    // Execute the kernel\n",
    "    vecAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);\n",
    " \n",
    "    // Copy array back to host\n",
    "    cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );\n",
    " \n",
    "    // Sum up vector c and print result divided by n, this should equal 1 within error\n",
    "    double sum = 0;\n",
    "    for(i=0; i<n; i++)\n",
    "        sum += h_c[i];\n",
    "    printf(\"final result: %f\\n\", sum/n);\n",
    " \n",
    "    // Release device memory\n",
    "    cudaFree(d_a);\n",
    "    cudaFree(d_b);\n",
    "    cudaFree(d_c);\n",
    " \n",
    "    // Release host memory\n",
    "    free(h_a);\n",
    "    free(h_b);\n",
    "    free(h_c);\n",
    " \n",
    "    return 0;\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).\r\n"
     ]
    }
   ],
   "source": [
    "!nvcc hello.cu -o hello.out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "final result: 1.000000\r\n"
     ]
    }
   ],
   "source": [
    "!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 ./hello.out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"NVIDIA-SMI couldn't find libnvidia-ml.so library in your system. Please make sure that the NVIDIA Display Driver is properly installed and present in your system.\r\n",
	"Please also try adding directory that contains libnvidia-ml.so to your system PATH.\r\n"
	]
	}
	],
	"source": [
	"!nvidia-smi"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Wed Oct 12 21:57:44 2016 \r\n",
	"+-----------------------------------------------------------------------------+\r\n",
	"\| NVIDIA-SMI 361.93.02 Driver Version: 361.93.02 \|\r\n",
	"\|-------------------------------+----------------------+----------------------+\r\n",
	"\| GPU Name Persistence-M\| Bus-Id Disp.A \| Volatile Uncorr. ECC \|\r\n",
	"\| Fan Temp Perf Pwr:Usage/Cap\| Memory-Usage \| GPU-Util Compute M. \|\r\n",
	"\|===============================+======================+======================\|\r\n",
	"\| 0 Tesla M40 24GB Off \| 0000:04:00.0 Off \| 0 \|\r\n",
	"\| N/A 25C P8 16W / 250W \| 0MiB / 22945MiB \| 0% Default \|\r\n",
	"+-------------------------------+----------------------+----------------------+\r\n",
	"\| 1 Tesla M40 24GB Off \| 0000:8C:00.0 Off \| 0 \|\r\n",
	"\| N/A 24C P8 17W / 250W \| 0MiB / 22945MiB \| 0% Default \|\r\n",
	"+-------------------------------+----------------------+----------------------+\r\n",
	" \r\n",
	"+-----------------------------------------------------------------------------+\r\n",
	"\| Processes: GPU Memory \|\r\n",
	"\| GPU PID Type Process name Usage \|\r\n",
	"\|=============================================================================\|\r\n",
	"\| No running processes found \|\r\n",
	"+-----------------------------------------------------------------------------+\r\n"
	]
	}
	],
	"source": [
	"!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 nvidia-smi"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Overwriting hello.cu\n"
	]
	}
	],
	"source": [
	"%%file hello.cu \n",
	"#include <stdio.h>\n",
	"#include <stdlib.h>\n",
	"#include <math.h>\n",
	" \n",
	"// CUDA kernel. Each thread takes care of one element of c\n",
	"__global__ void vecAdd(double a, double b, double *c, int n)\n",
	"{\n",
	" // Get our global thread ID\n",
	" int id = blockIdx.x*blockDim.x+threadIdx.x;\n",
	" \n",
	" // Make sure we do not go out of bounds\n",
	" if (id < n)\n",
	" c[id] = a[id] + b[id];\n",
	"}\n",
	" \n",
	"int main( int argc, char* argv[] )\n",
	"{\n",
	" // Size of vectors\n",
	" int n = 100000;\n",
	" \n",
	" // Host input vectors\n",
	" double *h_a;\n",
	" double *h_b;\n",
	" //Host output vector\n",
	" double *h_c;\n",
	" \n",
	" // Device input vectors\n",
	" double *d_a;\n",
	" double *d_b;\n",
	" //Device output vector\n",
	" double *d_c;\n",
	" \n",
	" // Size, in bytes, of each vector\n",
	" size_t bytes = n*sizeof(double);\n",
	" \n",
	" // Allocate memory for each vector on host\n",
	" h_a = (double*)malloc(bytes);\n",
	" h_b = (double*)malloc(bytes);\n",
	" h_c = (double*)malloc(bytes);\n",
	" \n",
	" // Allocate memory for each vector on GPU\n",
	" cudaMalloc(&d_a, bytes);\n",
	" cudaMalloc(&d_b, bytes);\n",
	" cudaMalloc(&d_c, bytes);\n",
	" \n",
	" int i;\n",
	" // Initialize vectors on host\n",
	" for( i = 0; i < n; i++ ) {\n",
	" h_a[i] = sin(i)*sin(i);\n",
	" h_b[i] = cos(i)*cos(i);\n",
	" }\n",
	" \n",
	" // Copy host vectors to device\n",
	" cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);\n",
	" cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);\n",
	" \n",
	" int blockSize, gridSize;\n",
	" \n",
	" // Number of threads in each thread block\n",
	" blockSize = 1024;\n",
	" \n",
	" // Number of thread blocks in grid\n",
	" gridSize = (int)ceil((float)n/blockSize);\n",
	" \n",
	" // Execute the kernel\n",
	" vecAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);\n",
	" \n",
	" // Copy array back to host\n",
	" cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );\n",
	" \n",
	" // Sum up vector c and print result divided by n, this should equal 1 within error\n",
	" double sum = 0;\n",
	" for(i=0; i<n; i++)\n",
	" sum += h_c[i];\n",
	" printf(\"final result: %f\\n\", sum/n);\n",
	" \n",
	" // Release device memory\n",
	" cudaFree(d_a);\n",
	" cudaFree(d_b);\n",
	" cudaFree(d_c);\n",
	" \n",
	" // Release host memory\n",
	" free(h_a);\n",
	" free(h_b);\n",
	" free(h_c);\n",
	" \n",
	" return 0;\n",
	"}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).\r\n"
	]
	}
	],
	"source": [
	"!nvcc hello.cu -o hello.out"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"final result: 1.000000\r\n"
	]
	}
	],
	"source": [
	"!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 ./hello.out"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}