Created
January 30, 2020 07:29
-
-
Save a-y-khan/3468cf815bb9a95ef76f3c4dc3696f9c to your computer and use it in GitHub Desktop.
Edit distance benchmarks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import Levenshtein as Lev\n", | |
"import jellyfish as JF\n", | |
"import stringdist as SD\n", | |
"import pyxdameraulevenshtein as pyxLev\n", | |
"\n", | |
"\n", | |
"def output_test_distances(test_strings):\n", | |
" print(\"Levenshtein:\", Lev.distance(test_strings[0], test_strings[1]))\n", | |
" print(\"jellyfish levenshtein:\", JF.levenshtein_distance(test_strings[0], test_strings[1]))\n", | |
" print(\"stringdist levenshtein:\", SD.levenshtein(test_strings[0], test_strings[1]))\n", | |
" print(\"pyxdameraulevenshtein:\", pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1]))\n", | |
" print(\"jellyfish damerau-levenshtein:\", JF.damerau_levenshtein_distance(test_strings[0], test_strings[1]))\n", | |
" print(\"stringdist damerau-levenshtein:\", SD.rdlevenshtein(test_strings[0], test_strings[1]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%load_ext memory_profiler" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Levenshtein: 5\n", | |
"jellyfish levenshtein: 5\n", | |
"stringdist levenshtein: 5\n", | |
"pyxdameraulevenshtein: 5\n", | |
"jellyfish damerau-levenshtein: 5\n", | |
"stringdist damerau-levenshtein: 5\n" | |
] | |
} | |
], | |
"source": [ | |
"test_strings = (\"this was a test\", \"this is a coast\")\n", | |
"\n", | |
"output_test_distances(test_strings)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"287 ns ± 2.59 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"Lev.distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"683 ns ± 16.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"JF.levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.93 µs ± 16.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"SD.levenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"7.86 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.02 µs ± 33 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.93 µs ± 35.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"SD.rdlevenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.66 MiB, increment: 0.20 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit Lev.distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.67 MiB, increment: 0.01 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit JF.levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.69 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit SD.levenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.70 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.71 MiB, increment: 0.01 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.71 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit SD.rdlevenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Levenshtein: 7\n", | |
"jellyfish levenshtein: 7\n", | |
"stringdist levenshtein: 7\n", | |
"pyxdameraulevenshtein: 5\n", | |
"jellyfish damerau-levenshtein: 5\n", | |
"stringdist damerau-levenshtein: 5\n" | |
] | |
} | |
], | |
"source": [ | |
"test_strings = (\"the most difficult thing is the decision to act, the rest is merely tenacity\",\n", | |
" \"teh most difficult thing is the decsion to act, the reast is merely teancty\")\n", | |
"\n", | |
"output_test_distances(test_strings)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"9.67 µs ± 105 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"Lev.distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"15.1 µs ± 940 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"JF.levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"47.5 µs ± 791 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"SD.levenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"399 µs ± 4.64 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"35.2 µs ± 227 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"46.5 µs ± 516 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"\n", | |
"SD.rdlevenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.87 MiB, increment: 0.02 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit Lev.distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.95 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit JF.levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.95 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit SD.levenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.95 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit pyxLev.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.96 MiB, increment: 0.01 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit JF.damerau_levenshtein_distance(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 57.96 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit SD.rdlevenshtein(test_strings[0], test_strings[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment