Skip to content

Instantly share code, notes, and snippets.

@a-y-khan
Created January 30, 2020 07:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save a-y-khan/3468cf815bb9a95ef76f3c4dc3696f9c to your computer and use it in GitHub Desktop.
Save a-y-khan/3468cf815bb9a95ef76f3c4dc3696f9c to your computer and use it in GitHub Desktop.
Edit distance benchmarks
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import Levenshtein as Lev\n",
"import jellyfish as JF\n",
"import stringdist as SD\n",
"import pyxdameraulevenshtein as pyxLev\n",
"\n",
"\n",
"def output_test_distances(test_strings):\n",
" print(\"Levenshtein:\", Lev.distance(test_strings[0], test_strings[1]))\n",
" print(\"jellyfish levenshtein:\", JF.levenshtein_distance(test_strings[0], test_strings[1]))\n",
" print(\"stringdist levenshtein:\", SD.levenshtein(test_strings[0], test_strings[1]))\n",
" print(\"pyxdameraulevenshtein:\", pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1]))\n",
" print(\"jellyfish damerau-levenshtein:\", JF.damerau_levenshtein_distance(test_strings[0], test_strings[1]))\n",
" print(\"stringdist damerau-levenshtein:\", SD.rdlevenshtein(test_strings[0], test_strings[1]))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext memory_profiler"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Levenshtein: 5\n",
"jellyfish levenshtein: 5\n",
"stringdist levenshtein: 5\n",
"pyxdameraulevenshtein: 5\n",
"jellyfish damerau-levenshtein: 5\n",
"stringdist damerau-levenshtein: 5\n"
]
}
],
"source": [
"test_strings = (\"this was a test\", \"this is a coast\")\n",
"\n",
"output_test_distances(test_strings)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"287 ns ± 2.59 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"Lev.distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"683 ns ± 16.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"JF.levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.93 µs ± 16.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"SD.levenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7.86 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.02 µs ± 33 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.93 µs ± 35.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"SD.rdlevenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.66 MiB, increment: 0.20 MiB\n"
]
}
],
"source": [
"%memit Lev.distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.67 MiB, increment: 0.01 MiB\n"
]
}
],
"source": [
"%memit JF.levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.69 MiB, increment: 0.00 MiB\n"
]
}
],
"source": [
"%memit SD.levenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.70 MiB, increment: 0.00 MiB\n"
]
}
],
"source": [
"%memit pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.71 MiB, increment: 0.01 MiB\n"
]
}
],
"source": [
"%memit JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.71 MiB, increment: 0.00 MiB\n"
]
}
],
"source": [
"%memit SD.rdlevenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Levenshtein: 7\n",
"jellyfish levenshtein: 7\n",
"stringdist levenshtein: 7\n",
"pyxdameraulevenshtein: 5\n",
"jellyfish damerau-levenshtein: 5\n",
"stringdist damerau-levenshtein: 5\n"
]
}
],
"source": [
"test_strings = (\"the most difficult thing is the decision to act, the rest is merely tenacity\",\n",
" \"teh most difficult thing is the decsion to act, the reast is merely teancty\")\n",
"\n",
"output_test_distances(test_strings)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9.67 µs ± 105 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"Lev.distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15.1 µs ± 940 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"JF.levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"47.5 µs ± 791 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"SD.levenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"399 µs ± 4.64 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"35.2 µs ± 227 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"46.5 µs ± 516 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"SD.rdlevenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.87 MiB, increment: 0.02 MiB\n"
]
}
],
"source": [
"%memit Lev.distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.95 MiB, increment: 0.00 MiB\n"
]
}
],
"source": [
"%memit JF.levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.95 MiB, increment: 0.00 MiB\n"
]
}
],
"source": [
"%memit SD.levenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.95 MiB, increment: 0.00 MiB\n"
]
}
],
"source": [
"%memit pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.96 MiB, increment: 0.01 MiB\n"
]
}
],
"source": [
"%memit JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"peak memory: 57.96 MiB, increment: 0.00 MiB\n"
]
}
],
"source": [
"%memit SD.rdlevenshtein(test_strings[0], test_strings[1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment