Skip to content

Instantly share code, notes, and snippets.

@StanczakDominik
Last active June 25, 2017 18:02
Show Gist options
  • Save StanczakDominik/0dda47d4274628272d35d732c9045ed9 to your computer and use it in GitHub Desktop.
Save StanczakDominik/0dda47d4274628272d35d732c9045ed9 to your computer and use it in GitHub Desktop.
Some PyCUDA examples
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 1.17558968 0.26406515 0.80357075 -1.79517698]\n",
" [-2.23722863 1.30630934 -0.92374867 0.92402256]\n",
" [ 1.09881485 0.3819513 -0.02121204 0.18705048]\n",
" [-1.19093406 1.9892801 0.13481987 -2.36596513]]\n",
"[[ 2. 2. 2. 2.]\n",
" [ 2. 2. 2. 2.]\n",
" [ 2. 2. 2. 2.]\n",
" [ 2. 2. 2. 2.]]\n",
"[[ 0.58779484 0.13203257 0.40178537 -0.89758849]\n",
" [-1.11861432 0.65315467 -0.46187434 0.46201128]\n",
" [ 0.54940742 0.19097565 -0.01060602 0.09352524]\n",
" [-0.59546703 0.99464005 0.06740993 -1.18298256]]\n"
]
}
],
"source": [
"import pycuda.driver as cuda\n",
"import pycuda.autoinit\n",
"import numpy as np\n",
"import pycuda.compiler\n",
"\n",
"a = np.random.randn(4, 4).astype(np.float32)\n",
"a_gpu = cuda.mem_alloc(a.nbytes)\n",
"cuda.memcpy_htod(a_gpu, a) #host to device\n",
"\n",
"\n",
"mod = pycuda.compiler.SourceModule(\"\"\"\n",
" __global__ void multiply_by_two(float *a)\n",
" {\n",
" int idx = threadIdx.x + threadIdx.y * 4;\n",
" a[idx] *= 2;\n",
" }\n",
" \"\"\")\n",
"\n",
"func = mod.get_function(\"multiply_by_two\")\n",
"func(a_gpu, block=(4,4,1))\n",
"a_doubled = np.empty_like(a)\n",
"cuda.memcpy_dtoh(a_doubled, a_gpu) #device to host\n",
"print(a_doubled, a_doubled/a, a, sep='\\n')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.74762076 -1.6985631 -1.50394118 0.92405546]\n",
" [-4.40119934 -0.40460199 3.72085404 -0.54394537]\n",
" [ 1.6078819 -1.56536436 -1.75098097 -2.33173347]\n",
" [ 2.76836824 -0.34919369 0.37828121 -0.55695456]]\n"
]
}
],
"source": [
"import numpy as np\n",
"import pycuda.autoinit\n",
"import pycuda.gpuarray as gpuarray\n",
"\n",
"a_gpu = gpuarray.to_gpu(\n",
" np.random.randn(4,4).astype(np.float32)\n",
" )\n",
"a_doubled = (2*a_gpu).get()\n",
"print(a_doubled)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment