Skip to content

Instantly share code, notes, and snippets.

@zonca
Created October 12, 2016 22:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zonca/a14af3b92ab472580f7b97b721a2251e to your computer and use it in GitHub Desktop.
Save zonca/a14af3b92ab472580f7b97b721a2251e to your computer and use it in GitHub Desktop.
Test CUDA in Docker container in Jupyterhub
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"NVIDIA-SMI couldn't find libnvidia-ml.so library in your system. Please make sure that the NVIDIA Display Driver is properly installed and present in your system.\r\n",
"Please also try adding directory that contains libnvidia-ml.so to your system PATH.\r\n"
]
}
],
"source": [
"!nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wed Oct 12 21:57:44 2016 \r\n",
"+-----------------------------------------------------------------------------+\r\n",
"| NVIDIA-SMI 361.93.02 Driver Version: 361.93.02 |\r\n",
"|-------------------------------+----------------------+----------------------+\r\n",
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n",
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n",
"|===============================+======================+======================|\r\n",
"| 0 Tesla M40 24GB Off | 0000:04:00.0 Off | 0 |\r\n",
"| N/A 25C P8 16W / 250W | 0MiB / 22945MiB | 0% Default |\r\n",
"+-------------------------------+----------------------+----------------------+\r\n",
"| 1 Tesla M40 24GB Off | 0000:8C:00.0 Off | 0 |\r\n",
"| N/A 24C P8 17W / 250W | 0MiB / 22945MiB | 0% Default |\r\n",
"+-------------------------------+----------------------+----------------------+\r\n",
" \r\n",
"+-----------------------------------------------------------------------------+\r\n",
"| Processes: GPU Memory |\r\n",
"| GPU PID Type Process name Usage |\r\n",
"|=============================================================================|\r\n",
"| No running processes found |\r\n",
"+-----------------------------------------------------------------------------+\r\n"
]
}
],
"source": [
"!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting hello.cu\n"
]
}
],
"source": [
"%%file hello.cu \n",
"#include <stdio.h>\n",
"#include <stdlib.h>\n",
"#include <math.h>\n",
" \n",
"// CUDA kernel. Each thread takes care of one element of c\n",
"__global__ void vecAdd(double *a, double *b, double *c, int n)\n",
"{\n",
" // Get our global thread ID\n",
" int id = blockIdx.x*blockDim.x+threadIdx.x;\n",
" \n",
" // Make sure we do not go out of bounds\n",
" if (id < n)\n",
" c[id] = a[id] + b[id];\n",
"}\n",
" \n",
"int main( int argc, char* argv[] )\n",
"{\n",
" // Size of vectors\n",
" int n = 100000;\n",
" \n",
" // Host input vectors\n",
" double *h_a;\n",
" double *h_b;\n",
" //Host output vector\n",
" double *h_c;\n",
" \n",
" // Device input vectors\n",
" double *d_a;\n",
" double *d_b;\n",
" //Device output vector\n",
" double *d_c;\n",
" \n",
" // Size, in bytes, of each vector\n",
" size_t bytes = n*sizeof(double);\n",
" \n",
" // Allocate memory for each vector on host\n",
" h_a = (double*)malloc(bytes);\n",
" h_b = (double*)malloc(bytes);\n",
" h_c = (double*)malloc(bytes);\n",
" \n",
" // Allocate memory for each vector on GPU\n",
" cudaMalloc(&d_a, bytes);\n",
" cudaMalloc(&d_b, bytes);\n",
" cudaMalloc(&d_c, bytes);\n",
" \n",
" int i;\n",
" // Initialize vectors on host\n",
" for( i = 0; i < n; i++ ) {\n",
" h_a[i] = sin(i)*sin(i);\n",
" h_b[i] = cos(i)*cos(i);\n",
" }\n",
" \n",
" // Copy host vectors to device\n",
" cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);\n",
" cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);\n",
" \n",
" int blockSize, gridSize;\n",
" \n",
" // Number of threads in each thread block\n",
" blockSize = 1024;\n",
" \n",
" // Number of thread blocks in grid\n",
" gridSize = (int)ceil((float)n/blockSize);\n",
" \n",
" // Execute the kernel\n",
" vecAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);\n",
" \n",
" // Copy array back to host\n",
" cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );\n",
" \n",
" // Sum up vector c and print result divided by n, this should equal 1 within error\n",
" double sum = 0;\n",
" for(i=0; i<n; i++)\n",
" sum += h_c[i];\n",
" printf(\"final result: %f\\n\", sum/n);\n",
" \n",
" // Release device memory\n",
" cudaFree(d_a);\n",
" cudaFree(d_b);\n",
" cudaFree(d_c);\n",
" \n",
" // Release host memory\n",
" free(h_a);\n",
" free(h_b);\n",
" free(h_c);\n",
" \n",
" return 0;\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).\r\n"
]
}
],
"source": [
"!nvcc hello.cu -o hello.out"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"final result: 1.000000\r\n"
]
}
],
"source": [
"!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 ./hello.out"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment