Skip to content

Instantly share code, notes, and snippets.

@seibert
Created December 8, 2014 17:47
Show Gist options
  • Save seibert/50b725012fbda101013b to your computer and use it in GitHub Desktop.
Save seibert/50b725012fbda101013b to your computer and use it in GitHub Desktop.
Simple Numba Benchmarks for comparing AVX and SSE
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:2e116b63cf60314c65252af07ea0c70a2ca09c9871efb5e7ef80edf1ab32b037"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"os.environ['NUMBA_ENABLE_AVX'] = '1'\n",
"import numpy as np\n",
"import numba\n",
"import math"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = np.arange(10000).astype(np.float32)\n",
"b = a.copy()\n",
"out = np.empty_like(a)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"@numba.jit\n",
"def do_basic(a, b, out):\n",
" for i in range(a.shape[0]):\n",
" out[i] = a[i] + b[i] * math.fabs(a[i])\n",
"\n",
"@numba.jit\n",
"def do_numpy_cos(a, out):\n",
" for i in range(a.shape[0]):\n",
" out[i] = np.cos(a[i])\n",
"\n",
"@numba.jit\n",
"def do_math_cos(a, out):\n",
" for i in range(a.shape[0]):\n",
" out[i] = math.cos(a[i])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Only want to inspect the assembly for the first function to make sure AVX \n",
"# is properly on (vaddps, vmulps, etc) or off (addps, mulps, etc).\n",
"numba.config.DUMP_ASSEMBLY = 1\n",
"do_basic(a, b, out)\n",
"numba.config.DUMP_ASSEMBLY = 0\n",
"# trigger the JIT for the remaining functions\n",
"do_numpy_cos(a, out)\n",
"do_math_cos(a, out)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"-------------------------------ASSEMBLY do_basic--------------------------------\n",
"\t.section\t__TEXT,__text,regular,pure_instructions\n",
"\t.macosx_version_min 14, 0\n",
"\t.section\t__TEXT,__const\n",
"\t.align\t5\n",
"LCPI0_0:\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.section\t__TEXT,__literal16,16byte_literals\n",
"\t.align\t4\n",
"LCPI0_1:\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.section\t__TEXT,__text,regular,pure_instructions\n",
"\t.globl\t\"___main__.do_basic.array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst)\"\n",
"\t.align\t4, 0x90\n",
"\"___main__.do_basic.array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst)\":\n",
"\tpushq\t%rbp\n",
"\tpushq\t%r15\n",
"\tpushq\t%r14\n",
"\tpushq\t%rbx\n",
"\tmovq\t32(%rdx), %rax\n",
"\ttestq\t%rax, %rax\n",
"\tjle\tLBB0_19\n",
"\tmovq\t24(%rdx), %r9\n",
"\tmovq\t24(%rcx), %r10\n",
"\tmovq\t24(%r8), %r8\n",
"\txorl\t%esi, %esi\n",
"\ttestq\t%rax, %rax\n",
"\tje\tLBB0_13\n",
"\txorl\t%esi, %esi\n",
"\tmovq\t%rax, %r11\n",
"\tmovq\t%rax, %r14\n",
"\tandq\t$-8, %r11\n",
"\tje\tLBB0_12\n",
"\tleaq\t-1(%rax), %rcx\n",
"\tleaq\t-4(%r8,%rax,4), %rdx\n",
"\tleaq\t(%r9,%rcx,4), %rsi\n",
"\tcmpq\t%rsi, %r8\n",
"\tsetbe\t%bpl\n",
"\tcmpq\t%rdx, %r9\n",
"\tsetbe\t%bl\n",
"\tleaq\t(%r10,%rcx,4), %rcx\n",
"\tcmpq\t%rcx, %r8\n",
"\tsetbe\t%cl\n",
"\tcmpq\t%rdx, %r10\n",
"\tsetbe\t%dl\n",
"\txorl\t%esi, %esi\n",
"\tmovq\t%rax, %r14\n",
"\ttestb\t%bl, %bpl\n",
"\tjne\tLBB0_12\n",
"\tandb\t%dl, %cl\n",
"\tjne\tLBB0_12\n",
"\tmovq\t%rax, %r14\n",
"\tsubq\t%r11, %r14\n",
"\tmovq\t%rax, %rsi\n",
"\tandq\t$-8, %rsi\n",
"\tleaq\t-8(%rsi), %rcx\n",
"\tshrq\t$3, %rcx\n",
"\txorl\t%r15d, %r15d\n",
"\tincq\t%rcx\n",
"\tje\tLBB0_7\n",
"\tmovq\t%rcx, %rdx\n",
"\tandq\t$1, %rdx\n",
"\tje\tLBB0_8\n",
"LBB0_7:\n",
"\tvmovups\t(%r9), %xmm0\n",
"\tvinsertf128\t$1, 16(%r9), %ymm0, %ymm0\n",
"\tvmovups\t(%r10), %xmm1\n",
"\tvinsertf128\t$1, 16(%r10), %ymm1, %ymm1\n",
"\tmovabsq\t$LCPI0_0, %rdx\n",
"\tvandps\t(%rdx), %ymm0, %ymm2\n",
"\tvmulps\t%ymm2, %ymm1, %ymm1\n",
"\tvaddps\t%ymm1, %ymm0, %ymm0\n",
"\tvextractf128\t$1, %ymm0, 16(%r8)\n",
"\tvmovups\t%xmm0, (%r8)\n",
"\tmovl\t$8, %r15d\n",
"LBB0_8:\n",
"\tcmpq\t$2, %rcx\n",
"\tjb\tLBB0_11\n",
"\tleaq\t32(%r9,%r15,4), %rdx\n",
"\tleaq\t32(%r10,%r15,4), %rbx\n",
"\tleaq\t32(%r8,%r15,4), %rcx\n",
"\tsubq\t%r15, %rsi\n",
"\tmovabsq\t$LCPI0_0, %rbp\n",
"\tvmovaps\t(%rbp), %ymm0\n",
"\t.align\t4, 0x90\n",
"LBB0_10:\n",
"\tvmovups\t-32(%rdx), %xmm1\n",
"\tvinsertf128\t$1, -16(%rdx), %ymm1, %ymm1\n",
"\tvmovups\t-32(%rbx), %xmm2\n",
"\tvinsertf128\t$1, -16(%rbx), %ymm2, %ymm2\n",
"\tvandps\t%ymm0, %ymm1, %ymm3\n",
"\tvmulps\t%ymm3, %ymm2, %ymm2\n",
"\tvaddps\t%ymm2, %ymm1, %ymm1\n",
"\tvextractf128\t$1, %ymm1, -16(%rcx)\n",
"\tvmovups\t%xmm1, -32(%rcx)\n",
"\tvmovups\t(%rdx), %xmm1\n",
"\tvinsertf128\t$1, 16(%rdx), %ymm1, %ymm1\n",
"\tvmovups\t(%rbx), %xmm2\n",
"\tvinsertf128\t$1, 16(%rbx), %ymm2, %ymm2\n",
"\tvandps\t%ymm0, %ymm1, %ymm3\n",
"\tvmulps\t%ymm3, %ymm2, %ymm2\n",
"\tvaddps\t%ymm2, %ymm1, %ymm1\n",
"\tvextractf128\t$1, %ymm1, 16(%rcx)\n",
"\tvmovups\t%xmm1, (%rcx)\n",
"\taddq\t$64, %rdx\n",
"\taddq\t$64, %rbx\n",
"\taddq\t$64, %rcx\n",
"\taddq\t$-16, %rsi\n",
"\tjne\tLBB0_10\n",
"LBB0_11:\n",
"\tmovq\t%r11, %rsi\n",
"LBB0_12:\n",
"\tcmpq\t%rsi, %rax\n",
"\tmovq\t%r14, %rax\n",
"\tje\tLBB0_19\n",
"LBB0_13:\n",
"\tmovq\t%rax, %rcx\n",
"\tnegq\t%rcx\n",
"\tcmpq\t$-2, %rcx\n",
"\tmovq\t$-1, %rdx\n",
"\tcmovgq\t%rcx, %rdx\n",
"\tleaq\t1(%rax,%rdx), %rcx\n",
"\ttestq\t%rcx, %rcx\n",
"\tje\tLBB0_15\n",
"\tmovq\t%rcx, %rdx\n",
"\tandq\t$1, %rdx\n",
"\tje\tLBB0_16\n",
"LBB0_15:\n",
"\tdecq\t%rax\n",
"\tvmovss\t(%r9,%rsi,4), %xmm0\n",
"\tmovabsq\t$LCPI0_1, %rdx\n",
"\tvandps\t(%rdx), %xmm0, %xmm1\n",
"\tvmulss\t(%r10,%rsi,4), %xmm1, %xmm1\n",
"\tvaddss\t%xmm1, %xmm0, %xmm0\n",
"\tvmovss\t%xmm0, (%r8,%rsi,4)\n",
"\tleaq\t1(%rsi), %rsi\n",
"LBB0_16:\n",
"\tcmpq\t$2, %rcx\n",
"\tjb\tLBB0_19\n",
"\tleaq\t4(%r8,%rsi,4), %rcx\n",
"\tleaq\t4(%r10,%rsi,4), %rdx\n",
"\tleaq\t4(%r9,%rsi,4), %rsi\n",
"\tmovabsq\t$LCPI0_1, %rbp\n",
"\tvmovss\t(%rbp), %xmm0\n",
"\t.align\t4, 0x90\n",
"LBB0_18:\n",
"\tvmovss\t-4(%rsi), %xmm1\n",
"\tvandps\t%xmm0, %xmm1, %xmm2\n",
"\tvmulss\t-4(%rdx), %xmm2, %xmm2\n",
"\tvaddss\t%xmm2, %xmm1, %xmm1\n",
"\tvmovss\t%xmm1, -4(%rcx)\n",
"\taddq\t$-2, %rax\n",
"\tvmovss\t(%rsi), %xmm1\n",
"\tvandps\t%xmm0, %xmm1, %xmm2\n",
"\tvmulss\t(%rdx), %xmm2, %xmm2\n",
"\tvaddss\t%xmm2, %xmm1, %xmm1\n",
"\tvmovss\t%xmm1, (%rcx)\n",
"\taddq\t$8, %rcx\n",
"\taddq\t$8, %rdx\n",
"\taddq\t$8, %rsi\n",
"\ttestq\t%rax, %rax\n",
"\tjg\tLBB0_18\n",
"LBB0_19:\n",
"\tmovq\t$0, (%rdi)\n",
"\txorl\t%eax, %eax\n",
"\tpopq\t%rbx\n",
"\tpopq\t%r14\n",
"\tpopq\t%r15\n",
"\tpopq\t%rbp\n",
"\tvzeroupper\n",
"\tretq\n",
"\n",
"\t.section\t__TEXT,__const\n",
"\t.align\t5\n",
"LCPI1_0:\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.section\t__TEXT,__literal16,16byte_literals\n",
"\t.align\t4\n",
"LCPI1_1:\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.long\t2147483647\n",
"\t.section\t__TEXT,__text,regular,pure_instructions\n",
"\t.globl\t\"_wrapper.__main__.do_basic.array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst)\"\n",
"\t.align\t4, 0x90\n",
"\"_wrapper.__main__.do_basic.array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst).array(float32,_1d,_C,_nonconst)\":\n",
"\t.cfi_startproc\n",
"\tpushq\t%rbp\n",
"Ltmp0:\n",
"\t.cfi_def_cfa_offset 16\n",
"\tpushq\t%r15\n",
"Ltmp1:\n",
"\t.cfi_def_cfa_offset 24\n",
"\tpushq\t%r14\n",
"Ltmp2:\n",
"\t.cfi_def_cfa_offset 32\n",
"\tpushq\t%r13\n",
"Ltmp3:\n",
"\t.cfi_def_cfa_offset 40\n",
"\tpushq\t%r12\n",
"Ltmp4:\n",
"\t.cfi_def_cfa_offset 48\n",
"\tpushq\t%rbx\n",
"Ltmp5:\n",
"\t.cfi_def_cfa_offset 56\n",
"\tsubq\t$184, %rsp\n",
"Ltmp6:\n",
"\t.cfi_def_cfa_offset 240\n",
"Ltmp7:\n",
"\t.cfi_offset %rbx, -56\n",
"Ltmp8:\n",
"\t.cfi_offset %r12, -48\n",
"Ltmp9:\n",
"\t.cfi_offset %r13, -40\n",
"Ltmp10:\n",
"\t.cfi_offset %r14, -32\n",
"Ltmp11:\n",
"\t.cfi_offset %r15, -24\n",
"Ltmp12:\n",
"\t.cfi_offset %rbp, -16\n",
"\tmovq\t%rdx, %r10\n",
"\tleaq\t160(%rsp), %rax\n",
"\tmovq\t%rax, (%rsp)\n",
"\tmovabsq\t$_.const.OOO, %rdx\n",
"\tmovabsq\t$_.kwlist, %rcx\n",
"\tmovabsq\t$_PyArg_ParseTupleAndKeywords, %rbp\n",
"\tleaq\t176(%rsp), %r8\n",
"\tleaq\t168(%rsp), %r9\n",
"\txorl\t%eax, %eax\n",
"\tmovq\t%rsi, %rdi\n",
"\tmovq\t%r10, %rsi\n",
"\tcallq\t*%rbp\n",
"\ttestl\t%eax, %eax\n",
"\tje\tLBB1_1\n",
"\tmovq\t176(%rsp), %rdi\n",
"\tmovabsq\t$_numba_adapt_ndarray, %rbx\n",
"\tleaq\t112(%rsp), %rsi\n",
"\tcallq\t*%rbx\n",
"\tmovq\t136(%rsp), %r14\n",
"\tmovq\t144(%rsp), %r13\n",
"\tmovabsq\t$_PyErr_Occurred, %rbp\n",
"\tcallq\t*%rbp\n",
"\ttestq\t%rax, %rax\n",
"\tjne\tLBB1_1\n",
"\tmovq\t168(%rsp), %rdi\n",
"\tleaq\t64(%rsp), %rsi\n",
"\tcallq\t*%rbx\n",
"\tmovq\t88(%rsp), %r15\n",
"\tcallq\t*%rbp\n",
"\ttestq\t%rax, %rax\n",
"\tjne\tLBB1_1\n",
"\tmovq\t160(%rsp), %rdi\n",
"\tleaq\t16(%rsp), %rsi\n",
"\tcallq\t*%rbx\n",
"\tmovq\t40(%rsp), %r12\n",
"\tcallq\t*%rbp\n",
"\ttestq\t%rax, %rax\n",
"\tje\tLBB1_6\n",
"LBB1_1:\n",
"\txorl\t%eax, %eax\n",
"LBB1_2:\n",
"\taddq\t$184, %rsp\n",
"\tpopq\t%rbx\n",
"\tpopq\t%r12\n",
"\tpopq\t%r13\n",
"\tpopq\t%r14\n",
"\tpopq\t%r15\n",
"\tpopq\t%rbp\n",
"\tretq\n",
"LBB1_6:\n",
"\ttestq\t%r13, %r13\n",
"\tjle\tLBB1_21\n",
"\txorl\t%ecx, %ecx\n",
"\ttestq\t%r13, %r13\n",
"\tje\tLBB1_15\n",
"\txorl\t%ecx, %ecx\n",
"\tmovq\t%r13, %rbp\n",
"\tandq\t$-16, %rbp\n",
"\tje\tLBB1_14\n",
"\tleaq\t-1(%r13), %rcx\n",
"\tleaq\t-4(%r12,%r13,4), %rsi\n",
"\tleaq\t(%r14,%rcx,4), %rdx\n",
"\tcmpq\t%rdx, %r12\n",
"\tsetbe\t%al\n",
"\tcmpq\t%rsi, %r14\n",
"\tsetbe\t%bl\n",
"\tleaq\t(%r15,%rcx,4), %rcx\n",
"\tcmpq\t%rcx, %r12\n",
"\tsetbe\t%dl\n",
"\tcmpq\t%rsi, %r15\n",
"\tsetbe\t%sil\n",
"\txorl\t%ecx, %ecx\n",
"\ttestb\t%bl, %al\n",
"\tjne\tLBB1_14\n",
"\tandb\t%sil, %dl\n",
"\tjne\tLBB1_14\n",
"\tleaq\t32(%r12), %rcx\n",
"\tleaq\t32(%r15), %rdx\n",
"\tleaq\t32(%r14), %rsi\n",
"\tmovq\t%r13, %rdi\n",
"\tandq\t$-16, %rdi\n",
"\tmovabsq\t$LCPI1_0, %rax\n",
"\tvmovaps\t(%rax), %ymm0\n",
"LBB1_12:\n",
"\tvmovups\t-32(%rsi), %xmm1\n",
"\tvmovups\t(%rsi), %xmm2\n",
"\tvinsertf128\t$1, -16(%rsi), %ymm1, %ymm1\n",
"\tvinsertf128\t$1, 16(%rsi), %ymm2, %ymm2\n",
"\tvmovups\t-32(%rdx), %xmm3\n",
"\tvmovups\t(%rdx), %xmm4\n",
"\tvinsertf128\t$1, -16(%rdx), %ymm3, %ymm3\n",
"\tvinsertf128\t$1, 16(%rdx), %ymm4, %ymm4\n",
"\tvandps\t%ymm0, %ymm1, %ymm5\n",
"\tvandps\t%ymm0, %ymm2, %ymm6\n",
"\tvmulps\t%ymm5, %ymm3, %ymm3\n",
"\tvmulps\t%ymm6, %ymm4, %ymm4\n",
"\tvaddps\t%ymm3, %ymm1, %ymm1\n",
"\tvaddps\t%ymm4, %ymm2, %ymm2\n",
"\tvextractf128\t$1, %ymm1, -16(%rcx)\n",
"\tvmovups\t%xmm1, -32(%rcx)\n",
"\tvextractf128\t$1, %ymm2, 16(%rcx)\n",
"\tvmovups\t%xmm2, (%rcx)\n",
"\taddq\t$64, %rcx\n",
"\taddq\t$64, %rdx\n",
"\taddq\t$64, %rsi\n",
"\taddq\t$-16, %rdi\n",
"\tjne\tLBB1_12\n",
"\tmovq\t%rbp, %rcx\n",
"LBB1_14:\n",
"\tcmpq\t%rcx, %r13\n",
"\tje\tLBB1_21\n",
"LBB1_15:\n",
"\tmovq\t%r13, %rdx\n",
"\tsubq\t%rcx, %rdx\n",
"\tmovq\t%r13, %rax\n",
"\tsubq\t%rcx, %rax\n",
"\tje\tLBB1_17\n",
"\tandq\t$1, %rdx\n",
"\tje\tLBB1_18\n",
"LBB1_17:\n",
"\tvmovss\t(%r14,%rcx,4), %xmm0\n",
"\tmovabsq\t$LCPI1_1, %rdx\n",
"\tvandps\t(%rdx), %xmm0, %xmm1\n",
"\tvmulss\t(%r15,%rcx,4), %xmm1, %xmm1\n",
"\tvaddss\t%xmm1, %xmm0, %xmm0\n",
"\tvmovss\t%xmm0, (%r12,%rcx,4)\n",
"\tleaq\t1(%rcx), %rcx\n",
"LBB1_18:\n",
"\tcmpq\t$2, %rax\n",
"\tjb\tLBB1_21\n",
"\tleaq\t4(%r12,%rcx,4), %rax\n",
"\tleaq\t4(%r15,%rcx,4), %rdx\n",
"\tleaq\t4(%r14,%rcx,4), %rsi\n",
"\tsubq\t%rcx, %r13\n",
"\tmovabsq\t$LCPI1_1, %rcx\n",
"\tvmovss\t(%rcx), %xmm0\n",
"\t.align\t4, 0x90\n",
"LBB1_20:\n",
"\tvmovss\t-4(%rsi), %xmm1\n",
"\tvandps\t%xmm0, %xmm1, %xmm2\n",
"\tvmulss\t-4(%rdx), %xmm2, %xmm2\n",
"\tvaddss\t%xmm2, %xmm1, %xmm1\n",
"\tvmovss\t%xmm1, -4(%rax)\n",
"\tvmovss\t(%rsi), %xmm1\n",
"\tvandps\t%xmm0, %xmm1, %xmm2\n",
"\tvmulss\t(%rdx), %xmm2, %xmm2\n",
"\tvaddss\t%xmm2, %xmm1, %xmm1\n",
"\tvmovss\t%xmm1, (%rax)\n",
"\taddq\t$8, %rax\n",
"\taddq\t$8, %rdx\n",
"\taddq\t$8, %rsi\n",
"\taddq\t$-2, %r13\n",
"\tjne\tLBB1_20\n",
"LBB1_21:\n",
"\tmovabsq\t$_Py_None, %rax\n",
"\tmovq\t(%rax), %rbx\n",
"\tmovabsq\t$_Py_IncRef, %rax\n",
"\tmovq\t%rbx, %rdi\n",
"\tvzeroupper\n",
"\tcallq\t*%rax\n",
"\tmovq\t%rbx, %rax\n",
"\tjmp\tLBB1_2\n",
"\t.cfi_endproc\n",
"\n",
"\t.section\t__TEXT,__const\n",
"_.const.a:\n",
"\t.asciz\t\"a\"\n",
"\n",
"_.const.b:\n",
"\t.asciz\t\"b\"\n",
"\n",
"_.const.out:\n",
"\t.asciz\t\"out\"\n",
"\n",
"\t.section\t__DATA,__const\n",
"\t.align\t4\n",
"_.kwlist:\n",
"\t.quad\t_.const.a\n",
"\t.quad\t_.const.b\n",
"\t.quad\t_.const.out\n",
"\t.quad\t0\n",
"\n",
"\t.section\t__TEXT,__const\n",
"_.const.OOO:\n",
"\t.asciz\t\"OOO\"\n",
"\n",
"\n",
".subsections_via_symbols\n",
"\n",
"================================================================================\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print('Basic:')\n",
"%timeit a + b * np.fabs(a)\n",
"%timeit do_basic(a, b, out)\n",
"print('cos():')\n",
"%timeit np.cos(a, out=out)\n",
"%timeit do_numpy_cos(a, out)\n",
"%timeit do_math_cos(a, out)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Basic:\n",
"10000 loops, best of 3: 31.4 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 2.8 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"cos():\n",
"10000 loops, best of 3: 95.8 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 87.6 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 78.1 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Higher order polynomials"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def gen_poly(order=1):\n",
" @numba.jit\n",
" def do_math(a, b, out):\n",
" for i in range(a.shape[0]):\n",
" out[i] = 1.0\n",
" for j in range(order):\n",
" out[i] += a[i] + (out[i] * b[i])\n",
" return do_math"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"poly_1 = gen_poly(1)\n",
"poly_5 = gen_poly(5)\n",
"poly_10 = gen_poly(10)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"poly_1(a, b, out)\n",
"poly_5(a, b, out)\n",
"poly_10(a, b, out)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit poly_1(a, b, out)\n",
"%timeit poly_5(a, b, out)\n",
"%timeit poly_10(a, b, out)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100000 loops, best of 3: 2.78 \u00b5s per loop\n",
"100000 loops, best of 3: 9.32 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 22.1 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print('Time per element per polynomial iteration:')\n",
"# Copy runtimes from previous cell\n",
"[2.78, 9.32, 22.1] / np.array([1.0, 5.0, 10.0])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Time per element per polynomial iteration:\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"array([ 2.78 , 1.864, 2.21 ])"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment