Skip to content

Instantly share code, notes, and snippets.

@taldcroft
Created August 9, 2020 13:24
Show Gist options
  • Save taldcroft/2fa546ecb13ebe4a7ebc33ffa5dbb016 to your computer and use it in GitHub Desktop.
Save taldcroft/2fa546ecb13ebe4a7ebc33ffa5dbb016 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Performance of astropy table init from list (`table-list-init-performance` branch)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"astropy=4.2.dev397+gf62517a25.d20200805\n"
]
}
],
"source": [
"%astro"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Int with no masks"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"np_data_int = np.arange(1000000, dtype=np.int64)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_int_1d = np_data_int.tolist()\n",
"data_int_2d = np_data_int.reshape(1000, 1000).tolist()\n",
"data_int_3d = np_data_int.reshape(1000, 100, 10).tolist()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"54.9 ms ± 2.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_1d])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"51.1 ms ± 597 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_2d])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"71 ms ± 745 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_3d])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Int with masks"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"data_int_1d[-1] = np.ma.masked\n",
"data_int_2d[-1][-1] = np.ma.masked\n",
"data_int_3d[-1][-1][-1] = np.ma.masked"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"357 ms ± 4.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_1d]) # was 270 before bool fix"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"352 ms ± 6.03 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_2d])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"471 ms ± 21.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_3d]) # was 360"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140624132816272\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>int64</th></tr></thead>\n",
"<tr><td>1</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0\n",
"int64\n",
"-----\n",
" 1\n",
" --"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[1, np.ma.masked]])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140624132860368\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>int64</th></tr></thead>\n",
"<tr><td>1</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0\n",
"int64\n",
"-----\n",
" 1\n",
" --"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[1, np.ma.masked]], dtype=[np.int64])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Str with no masks"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"np_data_str = np_data_int.astype('U')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'999999'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np_data_str[-1]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"data_str_1d = np_data_str.tolist()\n",
"data_str_2d = np_data_str.reshape(1000, 1000).tolist()\n",
"data_str_3d = np_data_str.reshape(1000, 100, 10).tolist()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"114 ms ± 1.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_1d])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"103 ms ± 6.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_2d])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"137 ms ± 678 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_3d])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"data_str_1d[-1] = np.ma.masked\n",
"data_str_2d[-1][-1] = np.ma.masked\n",
"data_str_3d[-1][-1][-1] = np.ma.masked"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"408 ms ± 6.24 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_1d]) # was 320 before bool fix"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"394 ms ± 2.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_2d])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"490 ms ± 11.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_3d]) # was 400"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140624132903952\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>str1</th></tr></thead>\n",
"<tr><td>a</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
"col0\n",
"str1\n",
"----\n",
" a\n",
" --"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([['a', np.ma.masked]])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140624132896208\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>str1</th></tr></thead>\n",
"<tr><td>a</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
"col0\n",
"str1\n",
"----\n",
" a\n",
" --"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([['a', np.ma.masked]], dtype=['U'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All masked"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140624132860304\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>int64</th></tr></thead>\n",
"<tr><td>--</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0\n",
"int64\n",
"-----\n",
" --\n",
" --"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[np.ma.masked, np.ma.masked]])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140624132816144\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>complex64</th></tr></thead>\n",
"<tr><td>--</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0 \n",
"complex64\n",
"---------\n",
" --\n",
" --"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[np.ma.masked, np.ma.masked]], dtype=[np.complex64])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.6.10 64-bit ('astropy': conda)",
"language": "python",
"name": "python361064bitastropyconda317f3db251394b069b2d6bd0c770213a"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment