Last active
December 19, 2015 15:59
-
-
Save jtratner/5980552 to your computer and use it in GitHub Desktop.
Perf-tests on Cython string comparisons
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Testing out string comparison methods for inf-like values" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": "Testing for multiple string representations of Infinity" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "Testing out various options for adding additional infinity tests (for pandas)" | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "%load_ext cythonmagic\ninpt1 = [\"inf\", \"Inf\", \"-inf\", \"-Inf\", \"infinity\", \"-infinity\", \"RASCAL\", \"rabbit\", \"sicko\", \"grabbit\"] * 500\ninpt2 = [\"a\", \"b\", \"c\", \"d\"] * 500\ninpt3 = [\"a\" * 10000, \"b\" * 10000, \"inf\" * 10000, \"-INF\" * 100, \"-INF\", \"Inf\"] * 500", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "%%cython\ncimport numpy as np\nimport numpy as np\ncdef char* cinf = b'inf'\ncdef char* negcinf = b'-inf'\ncdef char* cinf2 = b'Inf'\ncdef char* negcinf2 = b'-Inf'\ncdef np.ndarray infinities\n\ninfinities = np.array([b'inf', b'-inf', b'Inf', b'-Inf'])\n\nfrom libc.string cimport strcmp, strcasecmp\n\ncpdef compare_with_strcmp(lst):\n cdef int count = 0\n for elem in lst:\n if strcmp(elem, cinf) == 0 or strcmp(elem, cinf2) == 0:\n count += 1\n elif strcmp(elem, negcinf) == 0 or strcmp(elem, negcinf2) == 0:\n count += 1\n else:\n pass\n return count\n \ncpdef compare_with_strcmp_reversed(lst):\n cdef int count = 0\n for elem in lst:\n if strcmp(cinf, elem) == 0 or strcmp(cinf2, elem) == 0:\n count += 1\n elif strcmp(negcinf, elem) == 0 or strcmp(negcinf2, elem) == 0:\n count += 1\n else:\n pass\n return count\n\ncpdef compare_with_in_operator(lst):\n cdef int count = 0\n for elem in lst:\n if elem in infinities:\n count += 1\n return count\n\ncpdef compare_with_strcasecmp(lst):\n cdef int count = 0\n for elem in lst:\n if strcasecmp(elem, cinf) == 0:\n count += 1\n elif strcasecmp(elem, negcinf) == 0:\n count +=1\n return count", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "INPUT = inpt1\nprint \"Strlen\"\n%timeit compare_with_strcmp(INPUT)\nprint \"Strlen reversed\"\n%timeit compare_with_strcmp_reversed(INPUT)\nprint \"In operator\"\n%timeit compare_with_in_operator(INPUT)\nprint \"Case insensitive\"\n%timeit compare_with_strcasecmp(INPUT)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "Strlen\n1000 loops, best of 3: 273 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nStrlen reversed\n1000 loops, best of 3: 266 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nIn operator\n10 loops, best of 3: 48.3 ms per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nCase insensitive\n1000 loops, best of 3: 285 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\n" | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "INPUT = inpt2\nprint \"Strlen\"\n%timeit compare_with_strcmp(INPUT)\nprint \"Strlen reversed\"\n%timeit compare_with_strcmp_reversed(INPUT)\nprint \"In operator\"\n%timeit compare_with_in_operator(INPUT)\nprint \"Case insensitive\"\n%timeit compare_with_strcasecmp(INPUT)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "Strlen\n10000 loops, best of 3: 125 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nStrlen reversed\n10000 loops, best of 3: 123 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nIn operator\n100 loops, best of 3: 19.3 ms per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nCase insensitive\n10000 loops, best of 3: 104 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\n" | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "INPUT = inpt3\nprint \"Strlen\"\n%timeit compare_with_strcmp(INPUT)\nprint \"Strlen reversed\"\n%timeit compare_with_strcmp_reversed(INPUT)\nprint \"In operator\"\n%timeit compare_with_in_operator(INPUT)\nprint \"Case insensitive\"\n%timeit compare_with_strcasecmp(INPUT)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "Strlen\n10000 loops, best of 3: 105 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nStrlen reversed\n10000 loops, best of 3: 92.5 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nIn operator\n100 loops, best of 3: 19.2 ms per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\nCase insensitive\n10000 loops, best of 3: 89.1 us per loop" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "\n" | |
} | |
], | |
"prompt_number": 9 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment