Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save NHDaly/bf18263fdee4d78bae5c4e1de8ad7208 to your computer and use it in GitHub Desktop.
Save NHDaly/bf18263fdee4d78bae5c4e1de8ad7208 to your computer and use it in GitHub Desktop.
Example of choosing which OpenCL device to use in GPUArray.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"using GPUArrays"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cpu Julia arrays"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"gpu_ops (generic function with 1 method)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function gpu_ops(a)\n",
" for _ in 1:100\n",
" b = similar(a)\n",
" b .= a .+ 1f0\n",
" function test(a, b)\n",
" Complex64(sin(a / b))\n",
" end\n",
" complex_c = test.(a, b);\n",
" a = b\n",
" end\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.257220 seconds (110.92 k allocations: 122.439 MiB, 7.24% gc time)\n"
]
}
],
"source": [
"a = rand(Float32, 320, 320); # can be constructed from any Julia Array\n",
"@time gpu_ops(a);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CLBackend CPU device"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"CLContext: Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"CLBackend.init(device_type=:cpu,device_idx=1)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"gpu_ops (generic function with 1 method)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function gpu_ops(a)\n",
" for _ in 1:100\n",
" b = similar(a)\n",
" b .= a .+ 1f0\n",
" function test(a, b)\n",
" Complex64(sin(a / b))\n",
" end\n",
" complex_c = test.(a, b);\n",
" a = b\n",
" end\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" "
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[91mERROR (unhandled task failure): \u001b[91mMethodError: no method matching unsafe_string(::Ptr{Void})\u001b[0m\n",
"Closest candidates are:\n",
" unsafe_string(\u001b[91m::ZMQ.Message\u001b[39m) at /Users/daly/.julia/v0.6/ZMQ/src/ZMQ.jl:439\n",
" unsafe_string(\u001b[91m::Cstring\u001b[39m) at c.jl:79\n",
" unsafe_string(\u001b[91m::Union{Ptr{Int8}, Ptr{UInt8}}\u001b[39m) at strings/string.jl:39\n",
" ...\u001b[39m\n",
"Stacktrace:\n",
" [1] \u001b[1mmacro expansion\u001b[22m\u001b[22m at \u001b[1m/Users/daly/.julia/v0.6/OpenCL/src/context.jl:95\u001b[22m\u001b[22m [inlined]\n",
" [2] \u001b[1m(::OpenCL.cl.##43#44)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m./task.jl:335\u001b[22m\u001b[22m\n",
"\u001b[39m"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8.614400 seconds (5.92 M allocations: 319.527 MiB, 1.42% gc time)\n"
]
}
],
"source": [
"a = GPUArray(rand(Float32, 320, 320)); # can be constructed from any Julia Array\n",
"@time gpu_ops(a);"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.074956 seconds (12.10 k allocations: 481.406 KiB)\n"
]
}
],
"source": [
"a = GPUArray(rand(Float32, 320, 320)); # can be constructed from any Julia Array\n",
"@time gpu_ops(a);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CLBackend GPU device 1 (Intel HD Graphics 530)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"CLContext: Intel(R) HD Graphics 530"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"CLBackend.init(device_type=:gpu,device_idx=1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"gpu_ops (generic function with 1 method)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function gpu_ops(a)\n",
" for _ in 1:100\n",
" b = similar(a)\n",
" b .= a .+ 1f0\n",
" function test(a, b)\n",
" Complex64(sin(a / b))\n",
" end\n",
" complex_c = test.(a, b);\n",
" a = b\n",
" end\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.809771 seconds (192.35 k allocations: 10.353 MiB, 1.01% gc time)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[91mERROR (unhandled task failure): \u001b[91mMethodError: no method matching unsafe_string(::Ptr{Void})\u001b[0m\n",
"Closest candidates are:\n",
" unsafe_string(\u001b[91m::ZMQ.Message\u001b[39m) at /Users/daly/.julia/v0.6/ZMQ/src/ZMQ.jl:439\n",
" unsafe_string(\u001b[91m::Cstring\u001b[39m) at c.jl:79\n",
" unsafe_string(\u001b[91m::Union{Ptr{Int8}, Ptr{UInt8}}\u001b[39m) at strings/string.jl:39\n",
" ...\u001b[39m\n",
"Stacktrace:\n",
" [1] \u001b[1mmacro expansion\u001b[22m\u001b[22m at \u001b[1m/Users/daly/.julia/v0.6/OpenCL/src/context.jl:95\u001b[22m\u001b[22m [inlined]\n",
" [2] \u001b[1m(::OpenCL.cl.##43#44)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m./task.jl:335\u001b[22m\u001b[22m\n",
"\u001b[39m"
]
}
],
"source": [
"a = GPUArray(rand(Float32, 320, 320)); # can be constructed from any Julia Array\n",
"@time gpu_ops(a);"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.102809 seconds (11.90 k allocations: 475.156 KiB)\n"
]
}
],
"source": [
"a = GPUArray(rand(Float32, 320, 320)); # can be constructed from any Julia Array\n",
"@time gpu_ops(a);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CLBackend GPU device 2 (AMD RP460)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"CLContext: AMD Radeon Pro 460 Compute Engine"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"CLBackend.init(device_type=:gpu,device_idx=2)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"gpu_ops (generic function with 1 method)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function gpu_ops(a)\n",
" for _ in 1:100\n",
" b = similar(a)\n",
" b .= a .+ 1f0\n",
" function test(a, b)\n",
" Complex64(sin(a / b))\n",
" end\n",
" complex_c = test.(a, b);\n",
" a = b\n",
" end\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.654090 seconds (201.03 k allocations: 10.947 MiB)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[91mERROR (unhandled task failure): \u001b[91mMethodError: no method matching unsafe_string(::Ptr{Void})\u001b[0m\n",
"Closest candidates are:\n",
" unsafe_string(\u001b[91m::ZMQ.Message\u001b[39m) at /Users/daly/.julia/v0.6/ZMQ/src/ZMQ.jl:439\n",
" unsafe_string(\u001b[91m::Cstring\u001b[39m) at c.jl:79\n",
" unsafe_string(\u001b[91m::Union{Ptr{Int8}, Ptr{UInt8}}\u001b[39m) at strings/string.jl:39\n",
" ...\u001b[39m\n",
"Stacktrace:\n",
" [1] \u001b[1mmacro expansion\u001b[22m\u001b[22m at \u001b[1m/Users/daly/.julia/v0.6/OpenCL/src/context.jl:95\u001b[22m\u001b[22m [inlined]\n",
" [2] \u001b[1m(::OpenCL.cl.##43#44)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m./task.jl:335\u001b[22m\u001b[22m\n",
"\u001b[39m"
]
}
],
"source": [
"a = GPUArray(rand(Float32, 320, 320)); # can be constructed from any Julia Array\n",
"@time gpu_ops(a);"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.168805 seconds (11.90 k allocations: 475.156 KiB)\n"
]
}
],
"source": [
"a = GPUArray(rand(Float32, 320, 320)); # can be constructed from any Julia Array\n",
"@time gpu_ops(a);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 0.6.0",
"language": "julia",
"name": "julia-0.6"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "0.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@NHDaly
Copy link
Author

NHDaly commented Jul 22, 2017

  • Not sure why the unsafe_string errors keep coming up?
  • Not sure why each device seems to be slower than the last. My guess is because this isn't actually a very beefy GPU operation, so most of the latency comes from transporting the data from the cpu to the GPU, and the rp460 is (i guess) farther away from the CPU?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment