Created
August 9, 2020 13:25
-
-
Save taldcroft/c55edc7f98d068d5fc94ea09cbf77c70 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Performance of astropy table init from list (4.0.1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"astropy=4.0.1.post1\n" | |
] | |
} | |
], | |
"source": [ | |
"%astro" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Int with no masks" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np_data_int = np.arange(1000000, dtype=np.int64)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data_int_1d = np_data_int.tolist()\n", | |
"data_int_2d = np_data_int.reshape(1000, 1000).tolist()\n", | |
"data_int_3d = np_data_int.reshape(1000, 100, 10).tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.19 s ± 18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_int_1d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"109 ms ± 4.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_int_2d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"143 ms ± 3.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_int_3d])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Int with masks" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data_int_1d[-1] = np.ma.masked\n", | |
"data_int_2d[-1][-1] = np.ma.masked\n", | |
"data_int_3d[-1][-1][-1] = np.ma.masked" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/aldcroft/miniconda3/envs/ska3-shiny/lib/python3.8/site-packages/numpy/ma/core.py:2794: UserWarning: Warning: converting a masked element to nan.\n", | |
" _data = np.array(data, dtype=dtype, copy=copy,\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.24 s ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_int_1d]) # Annoying warnings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"112 ms ± 625 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"# This is fast but not actually doing the right thing due to MaskedArray issue\n", | |
"%timeit t = Table([data_int_2d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Masked element from 1d array: --\n", | |
"Masked element from 2d array: nan\n" | |
] | |
} | |
], | |
"source": [ | |
"# MaskedArray bug / inconsistency\n", | |
"print('Masked element from 1d array:', np.ma.MaskedArray(data_int_1d)[-1])\n", | |
"print('Masked element from 2d array:', np.ma.MaskedArray(data_int_2d)[-1][-1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"astropy.table.column.Column" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t = Table([data_int_2d])\n", | |
"type(t['col0'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"149 ms ± 685 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_int_3d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<i>Table length=2</i>\n", | |
"<table id=\"table140233015911520\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>col0</th></tr></thead>\n", | |
"<thead><tr><th>float64</th></tr></thead>\n", | |
"<tr><td>1.0</td></tr>\n", | |
"<tr><td>--</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table length=2>\n", | |
" col0 \n", | |
"float64\n", | |
"-------\n", | |
" 1.0\n", | |
" --" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Table([[1, np.ma.masked]]) # Wrong type" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Type is <class 'astropy.table.column.Column'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<i>Table length=2</i>\n", | |
"<table id=\"table140233015911760\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>col0 [1]</th></tr></thead>\n", | |
"<thead><tr><th>float64</th></tr></thead>\n", | |
"<tr><td>1.0</td></tr>\n", | |
"<tr><td>nan</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table length=2>\n", | |
"col0 [1]\n", | |
"float64 \n", | |
"--------\n", | |
" 1.0\n", | |
" nan" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t = Table([[[1], [np.ma.masked]]]) # Wrong type\n", | |
"print('Type is ', type(t['col0']))\n", | |
"t" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Fail: unable to convert data to Column for Table\n" | |
] | |
} | |
], | |
"source": [ | |
"try:\n", | |
" Table([[1, np.ma.masked]], dtype=[np.int64]) # Fail\n", | |
"except Exception as exc:\n", | |
" print(f'Fail: {exc}')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Str with no masks" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np_data_str = np_data_int.astype('U')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'999999'" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np_data_str[-1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data_str_1d = np_data_str.tolist()\n", | |
"data_str_2d = np_data_str.reshape(1000, 1000).tolist()\n", | |
"data_str_3d = np_data_str.reshape(1000, 100, 10).tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.52 s ± 41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_str_1d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"121 ms ± 1.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_str_2d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"164 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_str_3d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data_str_1d[-1] = np.ma.masked\n", | |
"data_str_2d[-1][-1] = np.ma.masked\n", | |
"data_str_3d[-1][-1][-1] = np.ma.masked" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.5 s ± 25.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_str_1d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"141 ms ± 5.68 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_str_2d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"181 ms ± 1.25 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit t = Table([data_str_3d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<i>Table length=2</i>\n", | |
"<table id=\"table140233015911040\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>col0</th></tr></thead>\n", | |
"<thead><tr><th>str32</th></tr></thead>\n", | |
"<tr><td>a</td></tr>\n", | |
"<tr><td>--</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table length=2>\n", | |
" col0\n", | |
"str32\n", | |
"-----\n", | |
" a\n", | |
" --" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Table([['a', np.ma.masked]]) # Wrong string item length (got 32, expect 1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<i>Table length=2</i>\n", | |
"<table id=\"table140233015913296\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>col0</th></tr></thead>\n", | |
"<thead><tr><th>str8</th></tr></thead>\n", | |
"<tr><td>a</td></tr>\n", | |
"<tr><td>--</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table length=2>\n", | |
"col0\n", | |
"str8\n", | |
"----\n", | |
" a\n", | |
" --" | |
] | |
}, | |
"execution_count": 48, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Table([['a', np.ma.masked]], dtype=['U']) # Wrong string item length (got 8, expect 1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### All masked" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<i>Table length=2</i>\n", | |
"<table id=\"table140230304760352\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>col0</th></tr></thead>\n", | |
"<thead><tr><th>float64</th></tr></thead>\n", | |
"<tr><td>--</td></tr>\n", | |
"<tr><td>--</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table length=2>\n", | |
" col0 \n", | |
"float64\n", | |
"-------\n", | |
" --\n", | |
" --" | |
] | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Table([[np.ma.masked, np.ma.masked]])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<i>Table length=2</i>\n", | |
"<table id=\"table140233015914256\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>col0</th></tr></thead>\n", | |
"<thead><tr><th>complex64</th></tr></thead>\n", | |
"<tr><td>--</td></tr>\n", | |
"<tr><td>--</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table length=2>\n", | |
" col0 \n", | |
"complex64\n", | |
"---------\n", | |
" --\n", | |
" --" | |
] | |
}, | |
"execution_count": 53, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Table([[np.ma.masked, np.ma.masked]], dtype=[np.complex64])" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.8.1 64-bit ('ska3-shiny': conda)", | |
"language": "python", | |
"name": "python38164bitska3shinycondabe8b2c20d0c1418bb47bf53172dcc2cc" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment