Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jtratner/5980552 to your computer and use it in GitHub Desktop.
Save jtratner/5980552 to your computer and use it in GitHub Desktop.
Perf-tests on Cython string comparisons
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "Testing out string comparison methods for inf-like values"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": "Testing for multiple string representations of Infinity"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "Testing out various options for adding additional infinity tests (for pandas)"
},
{
"cell_type": "code",
"collapsed": false,
"input": "%load_ext cythonmagic\ninpt1 = [\"inf\", \"Inf\", \"-inf\", \"-Inf\", \"infinity\", \"-infinity\", \"RASCAL\", \"rabbit\", \"sicko\", \"grabbit\"] * 500\ninpt2 = [\"a\", \"b\", \"c\", \"d\"] * 500\ninpt3 = [\"a\" * 10000, \"b\" * 10000, \"inf\" * 10000, \"-INF\" * 100, \"-INF\", \"Inf\"] * 500",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": "%%cython\ncimport numpy as np\nimport numpy as np\ncdef char* cinf = b'inf'\ncdef char* negcinf = b'-inf'\ncdef char* cinf2 = b'Inf'\ncdef char* negcinf2 = b'-Inf'\ncdef np.ndarray infinities\n\ninfinities = np.array([b'inf', b'-inf', b'Inf', b'-Inf'])\n\nfrom libc.string cimport strcmp, strcasecmp\n\ncpdef compare_with_strcmp(lst):\n cdef int count = 0\n for elem in lst:\n if strcmp(elem, cinf) == 0 or strcmp(elem, cinf2) == 0:\n count += 1\n elif strcmp(elem, negcinf) == 0 or strcmp(elem, negcinf2) == 0:\n count += 1\n else:\n pass\n return count\n \ncpdef compare_with_strcmp_reversed(lst):\n cdef int count = 0\n for elem in lst:\n if strcmp(cinf, elem) == 0 or strcmp(cinf2, elem) == 0:\n count += 1\n elif strcmp(negcinf, elem) == 0 or strcmp(negcinf2, elem) == 0:\n count += 1\n else:\n pass\n return count\n\ncpdef compare_with_in_operator(lst):\n cdef int count = 0\n for elem in lst:\n if elem in infinities:\n count += 1\n return count\n\ncpdef compare_with_strcasecmp(lst):\n cdef int count = 0\n for elem in lst:\n if strcasecmp(elem, cinf) == 0:\n count += 1\n elif strcasecmp(elem, negcinf) == 0:\n count +=1\n return count",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": "INPUT = inpt1\nprint \"Strlen\"\n%timeit compare_with_strcmp(INPUT)\nprint \"Strlen reversed\"\n%timeit compare_with_strcmp_reversed(INPUT)\nprint \"In operator\"\n%timeit compare_with_in_operator(INPUT)\nprint \"Case insensitive\"\n%timeit compare_with_strcasecmp(INPUT)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "Strlen\n1000 loops, best of 3: 273 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nStrlen reversed\n1000 loops, best of 3: 266 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nIn operator\n10 loops, best of 3: 48.3 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCase insensitive\n1000 loops, best of 3: 285 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n"
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": "INPUT = inpt2\nprint \"Strlen\"\n%timeit compare_with_strcmp(INPUT)\nprint \"Strlen reversed\"\n%timeit compare_with_strcmp_reversed(INPUT)\nprint \"In operator\"\n%timeit compare_with_in_operator(INPUT)\nprint \"Case insensitive\"\n%timeit compare_with_strcasecmp(INPUT)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "Strlen\n10000 loops, best of 3: 125 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nStrlen reversed\n10000 loops, best of 3: 123 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nIn operator\n100 loops, best of 3: 19.3 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCase insensitive\n10000 loops, best of 3: 104 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n"
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": "INPUT = inpt3\nprint \"Strlen\"\n%timeit compare_with_strcmp(INPUT)\nprint \"Strlen reversed\"\n%timeit compare_with_strcmp_reversed(INPUT)\nprint \"In operator\"\n%timeit compare_with_in_operator(INPUT)\nprint \"Case insensitive\"\n%timeit compare_with_strcasecmp(INPUT)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "Strlen\n10000 loops, best of 3: 105 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nStrlen reversed\n10000 loops, best of 3: 92.5 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nIn operator\n100 loops, best of 3: 19.2 ms per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCase insensitive\n10000 loops, best of 3: 89.1 us per loop"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n"
}
],
"prompt_number": 9
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment