Skip to content

Instantly share code, notes, and snippets.

@zhongwen
Created June 9, 2014 20:54
Show Gist options
  • Save zhongwen/50f1ae2e9efb351fc814 to your computer and use it in GitHub Desktop.
Save zhongwen/50f1ae2e9efb351fc814 to your computer and use it in GitHub Desktop.
cudaBlas-no-PCA
{
"metadata": {
"name": "cuBLAS-no-PCA"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": "import pycuda.gpuarray as gpuarray\nimport pycuda.autoinit\nimport numpy as np\nfrom scikits.cuda import linalg, misc\nlinalg.init()",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": "a_dims = [(20000, 100000), (2000, 800000), (20000, 32768)]\nb_dims = [(100000, 1), (800000, 1), (32768, 5000)]\n\nfor a_dim, b_dim in zip(a_dims, b_dims):\n print a_dim, b_dim\n a = np.asarray(np.random.rand(a_dim[0], a_dim[1]), np.float32)\n b = np.asarray(np.random.rand(b_dim[0], b_dim[1]), np.float32)\n print 'time to transfer A into GPU memory'\n %timeit a_gpu = gpuarray.to_gpu(a)\n print 'time to transfer B into GPU memory'\n %timeit b_gpu = gpuarray.to_gpu(b)\n a_gpu = gpuarray.to_gpu(a)\n b_gpu = gpuarray.to_gpu(b)\n print 'time for C = A*B'\n %timeit c_gpu = linalg.dot(a_gpu, b_gpu)\n c_gpu = linalg.dot(a_gpu, b_gpu)\n print 'time for getting C back into memory'\n %timeit c = c_gpu.get()\n del a_gpu, b_gpu, c_gpu",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "(20000, 100000) (100000, 1)\ntime to transfer A into GPU memory"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n1 loops, best of 3: 1.24 s per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime to transfer B into GPU memory\n10000 loops, best of 3: 136 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime for C = A*B"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n10 loops, best of 3: 149 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime for getting C back into memory\n10000 loops, best of 3: 37 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n(2000, 800000) (800000, 1)\ntime to transfer A into GPU memory"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n1 loops, best of 3: 994 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime to transfer B into GPU memory\n1000 loops, best of 3: 734 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime for C = A*B"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n10 loops, best of 3: 1.06 s per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime for getting C back into memory\n1 loops, best of 3: 20 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n(20000, 32768) (32768, 5000)\ntime to transfer A into GPU memory"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n1 loops, best of 3: 403 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime to transfer B into GPU memory\n10 loops, best of 3: 102 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime for C = A*B"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n10 loops, best of 3: 2.14 s per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\ntime for getting C back into memory\n1 loops, best of 3: 183 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n"
}
],
"prompt_number": 14
},
{
"cell_type": "markdown",
"metadata": {},
"source": ""
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment