apahl/array_sim_nim.ipynb

## array_sim_nim.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Accelerating Array Similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:28.108672Z",
     "start_time": "2019-11-14T14:51:26.446080Z"
    }
   },
   "outputs": [],
   "source": [
    "%%capture\n",
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import scipy.spatial.distance as dist\n",
    "import scipy.stats as stats\n",
    "\n",
    "from cellpainting3 import processing as cpp, tools as cpt, reporting as cpr, report_templ as cprt\n",
    "from IPython.core.display import HTML, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:28.521490Z",
     "start_time": "2019-11-14T14:51:28.110828Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  - loading resource:                      (DATASTORE)\n"
     ]
    }
   ],
   "source": [
    "cpp.load_resource(\"DATASTORE\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:31.596227Z",
     "start_time": "2019-11-14T14:51:28.523354Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2, 593)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w_ids = [\"246994:01:07_01.00\", \"280897:07:06_01.00\"]\n",
    "ds = cpp.DATASTORE[cpp.DATASTORE[\"Well_Id\"].isin(w_ids)].compute()\n",
    "ds.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:31.640606Z",
     "start_time": "2019-11-14T14:51:31.599120Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(numpy.ndarray, dtype('float32'))"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Numpy 1D arrays of length 579 (float32):\n",
    "prof1 = ds[ds[\"Well_Id\"] == w_ids[0]][cpp.ACT_PROF_PARAMETERS].values[0].astype(\"float32\")\n",
    "prof2 = ds[ds[\"Well_Id\"] == w_ids[1]][cpp.ACT_PROF_PARAMETERS].values[0].astype(\"float32\")\n",
    "type(prof1), prof1.dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Python Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:31.671465Z",
     "start_time": "2019-11-14T14:51:31.643411Z"
    }
   },
   "outputs": [],
   "source": [
    "def arr_sim_py1(p1, p2):  # Correlation distance\n",
    "    return 1 - dist.correlation(p1, p2)\n",
    "\n",
    "def arr_sim_py2(p1, p2):  # Pearson correlation\n",
    "    return stats.pearsonr(p1, p2)[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Both methods give the same value."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:31.712262Z",
     "start_time": "2019-11-14T14:51:31.673632Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.38950762152671814, 0.3895076247730392)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr_sim_py1(prof1, prof2), arr_sim_py2(prof1, prof2)  # Both give the same value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:39.919802Z",
     "start_time": "2019-11-14T14:51:31.715299Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 µs ± 6.42 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit arr_sim_py1(prof1, prof2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:48.573362Z",
     "start_time": "2019-11-14T14:51:39.926081Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "105 µs ± 3.36 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit arr_sim_py2(prof1, prof2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:48.601138Z",
     "start_time": "2019-11-14T14:51:48.575842Z"
    }
   },
   "outputs": [],
   "source": [
    "%load_ext nim_magic"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Nim Function for Lists\n",
    "\n",
    "The following Nim / nimpy function can not handle Numpy arrays directly. Although these have to be converted to lists first (e.g. `prof1.tolist()`), it still gives some speedup."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:51.311022Z",
     "start_time": "2019-11-14T14:51:48.605358Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "%%nim -d:release\n",
    "    \n",
    "from math import sum, sqrt\n",
    "\n",
    "proc mean[T](x: openarray[T]): float32 =\n",
    "  result = x.sum / float32(x.len)\n",
    "\n",
    "## Simple Pearson implementation\n",
    "## Taken from wikipedia: \n",
    "## https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_sample\n",
    "proc arr_sim_nim_list(p1, p2: openarray[float32]): float32 {.exportpy.} =\n",
    "  var\n",
    "    sumNum, sumP1, sumP2: float32\n",
    "    meanP1 = p1.mean\n",
    "    meanP2 = p2.mean\n",
    "  for ix in 0 .. p1.len-1:\n",
    "    let\n",
    "      termP1 = p1[ix] - meanP1\n",
    "      termP2 = p2[ix] - meanP2\n",
    "    sumNum += (termP1) * (termP2)\n",
    "    sumP1 += termP1 * termP1\n",
    "    sumP2 += termP2 * termP2\n",
    "  result = sumNum / (sqrt(sumP1) * sqrt(sumP2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Does it give the same value as the Python functions? -Yes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:51.351702Z",
     "start_time": "2019-11-14T14:51:51.313530Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.389507532119751"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr_sim_nim_list(prof1.tolist(), prof2.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:54.028340Z",
     "start_time": "2019-11-14T14:51:51.354060Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "31.9 µs ± 529 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit arr_sim_nim_list(prof1.tolist(), prof2.tolist())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## NimFunction for Numpy Arrays\n",
    "\n",
    "The next version uses `RawPyBuffers` and can handle Numpy arrays. This gives a very nice speedup."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:57.003800Z",
     "start_time": "2019-11-14T14:51:54.030129Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "%%nim -d:release\n",
    "\n",
    "import nimpy / raw_buffers\n",
    "from math import sum, sqrt\n",
    "\n",
    "# Operator [] overloading \n",
    "proc `[]`(p: RawPyBuffer; x: int): float32 {.inline.} =\n",
    "  cast[ptr UncheckedArray[float32]](p.buf)[x]\n",
    "\n",
    "proc mean(x: RawPyBuffer): float32 =\n",
    "  for ix in 0 .. x.shape[]-1:\n",
    "    result = result + x[ix]\n",
    "  result /= float32(x.shape[])\n",
    "\n",
    "## Simple Pearson implementation\n",
    "## Taken from wikipedia: \n",
    "## https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_sample\n",
    "proc arr_sim_nim_np(pObj1, pObj2: PyObject): float32 {.exportpy.} =\n",
    "  var\n",
    "    p1Buf, p2Buf: RawPyBuffer\n",
    "    sumNum, sumP1, sumP2: float32\n",
    "\n",
    "  pObj1.getBuffer(p1Buf, PyBUF_WRITABLE or PyBUF_ND)\n",
    "  pObj2.getBuffer(p2Buf, PyBUF_WRITABLE or PyBUF_ND)\n",
    "  let\n",
    "    meanP1 = p1Buf.mean\n",
    "    meanP2 = p2Buf.mean\n",
    "  for ix in 0 .. p1Buf.shape[]-1:\n",
    "    let\n",
    "      termP1 = p1Buf[ix] - meanP1\n",
    "      termP2 = p2Buf[ix] - meanP2\n",
    "    sumNum += (termP1) * (termP2)\n",
    "    sumP1 += termP1 * termP1\n",
    "    sumP2 += termP2 * termP2\n",
    "  p1Buf.release()\n",
    "  p2Buf.release()\n",
    "  result = sumNum / (sqrt(sumP1) * sqrt(sumP2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Still gives the same value."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:57.028612Z",
     "start_time": "2019-11-14T14:51:57.005936Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.389507532119751"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr_sim_nim_np(prof1, prof2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:59.614700Z",
     "start_time": "2019-11-14T14:51:57.030379Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.14 µs ± 92.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit arr_sim_nim_np(prof1, prof2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T14:51:59.637338Z",
     "start_time": "2019-11-14T14:51:59.616553Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Removed temporary files used by nim_magic.\n"
     ]
    }
   ],
   "source": [
    "%nim_clear"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Accelerating Array Similarity"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:28.108672Z",
	"start_time": "2019-11-14T14:51:26.446080Z"
	}
	},
	"outputs": [],
	"source": [
	"%%capture\n",
	"%reload_ext autoreload\n",
	"%autoreload 2\n",
	"import warnings\n",
	"warnings.filterwarnings('ignore')\n",
	"\n",
	"import pandas as pd\n",
	"import numpy as np\n",
	"\n",
	"import scipy.spatial.distance as dist\n",
	"import scipy.stats as stats\n",
	"\n",
	"from cellpainting3 import processing as cpp, tools as cpt, reporting as cpr, report_templ as cprt\n",
	"from IPython.core.display import HTML, display"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:28.521490Z",
	"start_time": "2019-11-14T14:51:28.110828Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" - loading resource: (DATASTORE)\n"
	]
	}
	],
	"source": [
	"cpp.load_resource(\"DATASTORE\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:31.596227Z",
	"start_time": "2019-11-14T14:51:28.523354Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(2, 593)"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"w_ids = [\"246994:01:07_01.00\", \"280897:07:06_01.00\"]\n",
	"ds = cpp.DATASTORE[cpp.DATASTORE[\"Well_Id\"].isin(w_ids)].compute()\n",
	"ds.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:31.640606Z",
	"start_time": "2019-11-14T14:51:31.599120Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(numpy.ndarray, dtype('float32'))"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Numpy 1D arrays of length 579 (float32):\n",
	"prof1 = ds[ds[\"Well_Id\"] == w_ids[0]][cpp.ACT_PROF_PARAMETERS].values[0].astype(\"float32\")\n",
	"prof2 = ds[ds[\"Well_Id\"] == w_ids[1]][cpp.ACT_PROF_PARAMETERS].values[0].astype(\"float32\")\n",
	"type(prof1), prof1.dtype"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Python Functions"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:31.671465Z",
	"start_time": "2019-11-14T14:51:31.643411Z"
	}
	},
	"outputs": [],
	"source": [
	"def arr_sim_py1(p1, p2): # Correlation distance\n",
	" return 1 - dist.correlation(p1, p2)\n",
	"\n",
	"def arr_sim_py2(p1, p2): # Pearson correlation\n",
	" return stats.pearsonr(p1, p2)[0]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Both methods give the same value."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:31.712262Z",
	"start_time": "2019-11-14T14:51:31.673632Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(0.38950762152671814, 0.3895076247730392)"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"arr_sim_py1(prof1, prof2), arr_sim_py2(prof1, prof2) # Both give the same value"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:39.919802Z",
	"start_time": "2019-11-14T14:51:31.715299Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"100 µs ± 6.42 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
	]
	}
	],
	"source": [
	"%timeit arr_sim_py1(prof1, prof2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:48.573362Z",
	"start_time": "2019-11-14T14:51:39.926081Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"105 µs ± 3.36 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
	]
	}
	],
	"source": [
	"%timeit arr_sim_py2(prof1, prof2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:48.601138Z",
	"start_time": "2019-11-14T14:51:48.575842Z"
	}
	},
	"outputs": [],
	"source": [
	"%load_ext nim_magic"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Nim Function for Lists\n",
	"\n",
	"The following Nim / nimpy function can not handle Numpy arrays directly. Although these have to be converted to lists first (e.g. `prof1.tolist()`), it still gives some speedup."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:51.311022Z",
	"start_time": "2019-11-14T14:51:48.605358Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n"
	]
	}
	],
	"source": [
	"%%nim -d:release\n",
	" \n",
	"from math import sum, sqrt\n",
	"\n",
	"proc mean[T](x: openarray[T]): float32 =\n",
	" result = x.sum / float32(x.len)\n",
	"\n",
	"## Simple Pearson implementation\n",
	"## Taken from wikipedia: \n",
	"## https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_sample\n",
	"proc arr_sim_nim_list(p1, p2: openarray[float32]): float32 {.exportpy.} =\n",
	" var\n",
	" sumNum, sumP1, sumP2: float32\n",
	" meanP1 = p1.mean\n",
	" meanP2 = p2.mean\n",
	" for ix in 0 .. p1.len-1:\n",
	" let\n",
	" termP1 = p1[ix] - meanP1\n",
	" termP2 = p2[ix] - meanP2\n",
	" sumNum += (termP1) * (termP2)\n",
	" sumP1 += termP1 * termP1\n",
	" sumP2 += termP2 * termP2\n",
	" result = sumNum / (sqrt(sumP1) * sqrt(sumP2))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Does it give the same value as the Python functions? -Yes."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:51.351702Z",
	"start_time": "2019-11-14T14:51:51.313530Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.389507532119751"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"arr_sim_nim_list(prof1.tolist(), prof2.tolist())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:54.028340Z",
	"start_time": "2019-11-14T14:51:51.354060Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"31.9 µs ± 529 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
	]
	}
	],
	"source": [
	"%timeit arr_sim_nim_list(prof1.tolist(), prof2.tolist())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## NimFunction for Numpy Arrays\n",
	"\n",
	"The next version uses `RawPyBuffers` and can handle Numpy arrays. This gives a very nice speedup."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:57.003800Z",
	"start_time": "2019-11-14T14:51:54.030129Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n"
	]
	}
	],
	"source": [
	"%%nim -d:release\n",
	"\n",
	"import nimpy / raw_buffers\n",
	"from math import sum, sqrt\n",
	"\n",
	"# Operator [] overloading \n",
	"proc `[]`(p: RawPyBuffer; x: int): float32 {.inline.} =\n",
	" cast[ptr UncheckedArray[float32]](p.buf)[x]\n",
	"\n",
	"proc mean(x: RawPyBuffer): float32 =\n",
	" for ix in 0 .. x.shape[]-1:\n",
	" result = result + x[ix]\n",
	" result /= float32(x.shape[])\n",
	"\n",
	"## Simple Pearson implementation\n",
	"## Taken from wikipedia: \n",
	"## https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_sample\n",
	"proc arr_sim_nim_np(pObj1, pObj2: PyObject): float32 {.exportpy.} =\n",
	" var\n",
	" p1Buf, p2Buf: RawPyBuffer\n",
	" sumNum, sumP1, sumP2: float32\n",
	"\n",
	" pObj1.getBuffer(p1Buf, PyBUF_WRITABLE or PyBUF_ND)\n",
	" pObj2.getBuffer(p2Buf, PyBUF_WRITABLE or PyBUF_ND)\n",
	" let\n",
	" meanP1 = p1Buf.mean\n",
	" meanP2 = p2Buf.mean\n",
	" for ix in 0 .. p1Buf.shape[]-1:\n",
	" let\n",
	" termP1 = p1Buf[ix] - meanP1\n",
	" termP2 = p2Buf[ix] - meanP2\n",
	" sumNum += (termP1) * (termP2)\n",
	" sumP1 += termP1 * termP1\n",
	" sumP2 += termP2 * termP2\n",
	" p1Buf.release()\n",
	" p2Buf.release()\n",
	" result = sumNum / (sqrt(sumP1) * sqrt(sumP2))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Still gives the same value."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:57.028612Z",
	"start_time": "2019-11-14T14:51:57.005936Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.389507532119751"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"arr_sim_nim_np(prof1, prof2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:59.614700Z",
	"start_time": "2019-11-14T14:51:57.030379Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"3.14 µs ± 92.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
	]
	}
	],
	"source": [
	"%timeit arr_sim_nim_np(prof1, prof2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-11-14T14:51:59.637338Z",
	"start_time": "2019-11-14T14:51:59.616553Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Removed temporary files used by nim_magic.\n"
	]
	}
	],
	"source": [
	"%nim_clear"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"hide_input": false,
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	},
	"toc": {
	"base_numbering": 1,
	"nav_menu": {},
	"number_sections": true,
	"sideBar": true,
	"skip_h1_title": true,
	"title_cell": "Table of Contents",
	"title_sidebar": "Contents",
	"toc_cell": false,
	"toc_position": {},
	"toc_section_display": true,
	"toc_window_display": false
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}