Last active
July 8, 2020 04:31
-
-
Save Japanuspus/13e418944489fa94c05200487526223f to your computer and use it in GitHub Desktop.
Performance of np.array for parsing list of strings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"float_strings = [str(k) for k in range(1000)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"150 µs ± 2.99 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"np.array(float_strings, dtype=float)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"_cnv_flt = {\n", | |
" \"{\": lambda v: str(v),\n", | |
" \"N\": lambda v: np.nan,\n", | |
" \"n\": lambda v: np.nan,\n", | |
" \" \": lambda v: float(v),\n", | |
" \"-\": lambda v: np.nan if (len(v) == 1) else float(v),\n", | |
"}\n", | |
"for ch in \"+0123456789\":\n", | |
" _cnv_flt[ch] = lambda v: float(v)\n", | |
"\n", | |
"def _parse_float_column(values):\n", | |
" return np.array(values, dtype=np.float)\n", | |
" fvalues = [_cnv_flt[vv[0]](vv.strip()) for vv in values]\n", | |
" return np.array(fvalues, np.float)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"148 µs ± 1.89 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"_parse_float_column(float_strings)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment