Created
October 12, 2016 22:21
-
-
Save zonca/a14af3b92ab472580f7b97b721a2251e to your computer and use it in GitHub Desktop.
Test CUDA in Docker container in Jupyterhub
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"NVIDIA-SMI couldn't find libnvidia-ml.so library in your system. Please make sure that the NVIDIA Display Driver is properly installed and present in your system.\r\n", | |
"Please also try adding directory that contains libnvidia-ml.so to your system PATH.\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!nvidia-smi" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wed Oct 12 21:57:44 2016 \r\n", | |
"+-----------------------------------------------------------------------------+\r\n", | |
"| NVIDIA-SMI 361.93.02 Driver Version: 361.93.02 |\r\n", | |
"|-------------------------------+----------------------+----------------------+\r\n", | |
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", | |
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", | |
"|===============================+======================+======================|\r\n", | |
"| 0 Tesla M40 24GB Off | 0000:04:00.0 Off | 0 |\r\n", | |
"| N/A 25C P8 16W / 250W | 0MiB / 22945MiB | 0% Default |\r\n", | |
"+-------------------------------+----------------------+----------------------+\r\n", | |
"| 1 Tesla M40 24GB Off | 0000:8C:00.0 Off | 0 |\r\n", | |
"| N/A 24C P8 17W / 250W | 0MiB / 22945MiB | 0% Default |\r\n", | |
"+-------------------------------+----------------------+----------------------+\r\n", | |
" \r\n", | |
"+-----------------------------------------------------------------------------+\r\n", | |
"| Processes: GPU Memory |\r\n", | |
"| GPU PID Type Process name Usage |\r\n", | |
"|=============================================================================|\r\n", | |
"| No running processes found |\r\n", | |
"+-----------------------------------------------------------------------------+\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 nvidia-smi" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Overwriting hello.cu\n" | |
] | |
} | |
], | |
"source": [ | |
"%%file hello.cu \n", | |
"#include <stdio.h>\n", | |
"#include <stdlib.h>\n", | |
"#include <math.h>\n", | |
" \n", | |
"// CUDA kernel. Each thread takes care of one element of c\n", | |
"__global__ void vecAdd(double *a, double *b, double *c, int n)\n", | |
"{\n", | |
" // Get our global thread ID\n", | |
" int id = blockIdx.x*blockDim.x+threadIdx.x;\n", | |
" \n", | |
" // Make sure we do not go out of bounds\n", | |
" if (id < n)\n", | |
" c[id] = a[id] + b[id];\n", | |
"}\n", | |
" \n", | |
"int main( int argc, char* argv[] )\n", | |
"{\n", | |
" // Size of vectors\n", | |
" int n = 100000;\n", | |
" \n", | |
" // Host input vectors\n", | |
" double *h_a;\n", | |
" double *h_b;\n", | |
" //Host output vector\n", | |
" double *h_c;\n", | |
" \n", | |
" // Device input vectors\n", | |
" double *d_a;\n", | |
" double *d_b;\n", | |
" //Device output vector\n", | |
" double *d_c;\n", | |
" \n", | |
" // Size, in bytes, of each vector\n", | |
" size_t bytes = n*sizeof(double);\n", | |
" \n", | |
" // Allocate memory for each vector on host\n", | |
" h_a = (double*)malloc(bytes);\n", | |
" h_b = (double*)malloc(bytes);\n", | |
" h_c = (double*)malloc(bytes);\n", | |
" \n", | |
" // Allocate memory for each vector on GPU\n", | |
" cudaMalloc(&d_a, bytes);\n", | |
" cudaMalloc(&d_b, bytes);\n", | |
" cudaMalloc(&d_c, bytes);\n", | |
" \n", | |
" int i;\n", | |
" // Initialize vectors on host\n", | |
" for( i = 0; i < n; i++ ) {\n", | |
" h_a[i] = sin(i)*sin(i);\n", | |
" h_b[i] = cos(i)*cos(i);\n", | |
" }\n", | |
" \n", | |
" // Copy host vectors to device\n", | |
" cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);\n", | |
" cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);\n", | |
" \n", | |
" int blockSize, gridSize;\n", | |
" \n", | |
" // Number of threads in each thread block\n", | |
" blockSize = 1024;\n", | |
" \n", | |
" // Number of thread blocks in grid\n", | |
" gridSize = (int)ceil((float)n/blockSize);\n", | |
" \n", | |
" // Execute the kernel\n", | |
" vecAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);\n", | |
" \n", | |
" // Copy array back to host\n", | |
" cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );\n", | |
" \n", | |
" // Sum up vector c and print result divided by n, this should equal 1 within error\n", | |
" double sum = 0;\n", | |
" for(i=0; i<n; i++)\n", | |
" sum += h_c[i];\n", | |
" printf(\"final result: %f\\n\", sum/n);\n", | |
" \n", | |
" // Release device memory\n", | |
" cudaFree(d_a);\n", | |
" cudaFree(d_b);\n", | |
" cudaFree(d_c);\n", | |
" \n", | |
" // Release host memory\n", | |
" free(h_a);\n", | |
" free(h_b);\n", | |
" free(h_c);\n", | |
" \n", | |
" return 0;\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!nvcc hello.cu -o hello.out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"final result: 1.000000\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 ./hello.out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment