Skip to content

Instantly share code, notes, and snippets.

@taldcroft
Created August 9, 2020 13:25
Show Gist options
  • Save taldcroft/c55edc7f98d068d5fc94ea09cbf77c70 to your computer and use it in GitHub Desktop.
Save taldcroft/c55edc7f98d068d5fc94ea09cbf77c70 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Performance of astropy table init from list (4.0.1)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"astropy=4.0.1.post1\n"
]
}
],
"source": [
"%astro"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Int with no masks"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"np_data_int = np.arange(1000000, dtype=np.int64)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_int_1d = np_data_int.tolist()\n",
"data_int_2d = np_data_int.reshape(1000, 1000).tolist()\n",
"data_int_3d = np_data_int.reshape(1000, 100, 10).tolist()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.19 s ± 18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_1d])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"109 ms ± 4.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_2d])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"143 ms ± 3.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_3d])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Int with masks"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"data_int_1d[-1] = np.ma.masked\n",
"data_int_2d[-1][-1] = np.ma.masked\n",
"data_int_3d[-1][-1][-1] = np.ma.masked"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/aldcroft/miniconda3/envs/ska3-shiny/lib/python3.8/site-packages/numpy/ma/core.py:2794: UserWarning: Warning: converting a masked element to nan.\n",
" _data = np.array(data, dtype=dtype, copy=copy,\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.24 s ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_1d]) # Annoying warnings"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"112 ms ± 625 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"# This is fast but not actually doing the right thing due to MaskedArray issue\n",
"%timeit t = Table([data_int_2d])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Masked element from 1d array: --\n",
"Masked element from 2d array: nan\n"
]
}
],
"source": [
"# MaskedArray bug / inconsistency\n",
"print('Masked element from 1d array:', np.ma.MaskedArray(data_int_1d)[-1])\n",
"print('Masked element from 2d array:', np.ma.MaskedArray(data_int_2d)[-1][-1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"astropy.table.column.Column"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t = Table([data_int_2d])\n",
"type(t['col0'])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"149 ms ± 685 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_int_3d])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140233015911520\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>float64</th></tr></thead>\n",
"<tr><td>1.0</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0 \n",
"float64\n",
"-------\n",
" 1.0\n",
" --"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[1, np.ma.masked]]) # Wrong type"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type is <class 'astropy.table.column.Column'>\n"
]
},
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140233015911760\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0 [1]</th></tr></thead>\n",
"<thead><tr><th>float64</th></tr></thead>\n",
"<tr><td>1.0</td></tr>\n",
"<tr><td>nan</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
"col0 [1]\n",
"float64 \n",
"--------\n",
" 1.0\n",
" nan"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t = Table([[[1], [np.ma.masked]]]) # Wrong type\n",
"print('Type is ', type(t['col0']))\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fail: unable to convert data to Column for Table\n"
]
}
],
"source": [
"try:\n",
" Table([[1, np.ma.masked]], dtype=[np.int64]) # Fail\n",
"except Exception as exc:\n",
" print(f'Fail: {exc}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Str with no masks"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"np_data_str = np_data_int.astype('U')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'999999'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np_data_str[-1]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"data_str_1d = np_data_str.tolist()\n",
"data_str_2d = np_data_str.reshape(1000, 1000).tolist()\n",
"data_str_3d = np_data_str.reshape(1000, 100, 10).tolist()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.52 s ± 41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_1d])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"121 ms ± 1.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_2d])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"164 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_3d])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"data_str_1d[-1] = np.ma.masked\n",
"data_str_2d[-1][-1] = np.ma.masked\n",
"data_str_3d[-1][-1][-1] = np.ma.masked"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.5 s ± 25.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_1d])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"141 ms ± 5.68 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_2d])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"181 ms ± 1.25 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit t = Table([data_str_3d])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140233015911040\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>str32</th></tr></thead>\n",
"<tr><td>a</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0\n",
"str32\n",
"-----\n",
" a\n",
" --"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([['a', np.ma.masked]]) # Wrong string item length (got 32, expect 1)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140233015913296\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>str8</th></tr></thead>\n",
"<tr><td>a</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
"col0\n",
"str8\n",
"----\n",
" a\n",
" --"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([['a', np.ma.masked]], dtype=['U']) # Wrong string item length (got 8, expect 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### All masked"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140230304760352\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>float64</th></tr></thead>\n",
"<tr><td>--</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0 \n",
"float64\n",
"-------\n",
" --\n",
" --"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[np.ma.masked, np.ma.masked]])"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<i>Table length=2</i>\n",
"<table id=\"table140233015914256\" class=\"table-striped table-bordered table-condensed\">\n",
"<thead><tr><th>col0</th></tr></thead>\n",
"<thead><tr><th>complex64</th></tr></thead>\n",
"<tr><td>--</td></tr>\n",
"<tr><td>--</td></tr>\n",
"</table>"
],
"text/plain": [
"<Table length=2>\n",
" col0 \n",
"complex64\n",
"---------\n",
" --\n",
" --"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[np.ma.masked, np.ma.masked]], dtype=[np.complex64])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.1 64-bit ('ska3-shiny': conda)",
"language": "python",
"name": "python38164bitska3shinycondabe8b2c20d0c1418bb47bf53172dcc2cc"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment