Skip to content

Instantly share code, notes, and snippets.

@DavidMStraub
Created October 6, 2018 18:00
Show Gist options
  • Save DavidMStraub/3fa7c90307635f49d43f2f5e55ea3c8c to your computer and use it in GitHub Desktop.
Save DavidMStraub/3fa7c90307635f49d43f2f5e55ea3c8c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import Levenshtein\n",
"import glob\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"wordlists = {}\n",
"fs = glob.glob('./*.txt')\n",
"for f in fs:\n",
" with open(f) as _f:\n",
" wordlists[f.split('.')[1].replace('/', '')] = _f.read().splitlines()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def distances(wordlist):\n",
" return [Levenshtein.distance(w1, w2) for w1 in wordlist for w2 in wordlist if w1 != w2]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"distances = {k: distances(v) for k, v in wordlists.items()}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAGktJREFUeJzt3X90VPW57/H3QwgEEeFU1KUJlJwFC0gbwThBCspC6cFw+XVK8QDqKbnLmvoDlqfWcrjtbU9aWZZ2oXJEasXKxbYoIniqEfyFlxQVOCY0oGCUX8ZjikeotCgFFPC5f8yQG2AmTDKT2cnm81orKzPf2Xs/z0R8svPs73y3uTsiIhJeHYJOQEREWpcKvYhIyKnQi4iEnAq9iEjIqdCLiIScCr2ISMip0IuIhJwKvYhIyKnQi4iEXMegEwDo2bOn9+nTJ+g0RETalU2bNv3Z3S8403aBFnozGw+M79u3L9XV1UGmIiLS7pjZ+8lsF2jrxt0r3L2se/fuQaYhIhJq6tGLiIScCr2ISMi1iYux8Rw9epT6+nqOHDkSdCptWk5ODnl5eWRnZwedioi0UW220NfX19OtWzf69OmDmQWdTpvk7nz88cfU19eTn58fdDoi0ka12dbNkSNHOP/881Xkm2BmnH/++fqrR0SaFGihN7PxZrbowIEDiV7PcEbtj35GInImml4pIhJybbZHf6o+s1el9Xh1c8eecZsHHniAhx56iKKiIpYuXZrW+ABLliyhurqaBx98MO3HlvSqHTAw7vjAd2oznIlI87WbQh+EX/7ylzz//PMnXeg8duwYHTvqxyYi7UebvRgbtFtuuYXdu3czYcIEunfvTllZGaNHj+Zb3/oWx48f5/vf/z7FxcVceumlPPzwwwBUVlYycuRIJk+ezIABA7jhhhtwdwCqqqoYNmwYgwYNYsiQIXz66acA7Nmzh5KSEvr168esWbMCe78iEl46NU3gV7/6FS+88AJr167lwQcfpKKigtdee40uXbqwaNEiunfvTlVVFZ999hnDhw9n9OjRANTU1LBt2zYuueQShg8fzuuvv86QIUOYMmUKTz75JMXFxXzyySd06dIFgM2bN1NTU0Pnzp3p378/M2fOpFevXkG+dREJGRX6JE2YMKGhOL/00ku8+eabrFixAoADBw6wY8cOOnXqxJAhQ8jLywNg8ODB1NXV0b17dy6++GKKi4sBOO+88xqOO2rUKE5cjC4oKOD9999XoReRtFKhT1LXrl0bHrs7CxYs4Nprrz1pm8rKSjp37tzwPCsri2PHjuHuCadBxtteBJp/ATjRhIVkJh5IuKnQt8C1117LQw89xDXXXEN2djbbt28nNzc34fYDBgxgz549VFVVUVxczKefftrw14EEqDzOtN7y+J/paK6ERTfn+tPGCvN7x912eVoySazwscLTxt6a/lYrR5UgpL3Qm1kH4G7gPKDa3R9Lx3Hb0lnJt7/9berq6igqKsLdueCCC/j973+fcPtOnTrx5JNPMnPmTA4fPkyXLl1Ys2ZNBjMWkbNZUoXezBYD44C97v7VRuMlwL8DWcCv3X0uMBHIBfYD9WnPOIPq6uoAKC8vP2m8Q4cO3HPPPdxzzz0njY8cOZKRI0c2PG88P764uJiNGzeetH1paSmlpaUNz5977rm05C0i0liy0yuXACWNB8wsC1gIjAEKgGlmVgD0Bza4+53ArelLVUREWiKpQu/u64ieoTc2BNjp7rvd/XNgGdGz+XrgL7FtjqcrURERaZlUPjCVC3zQ6Hl9bOxp4FozWwCsS7SzmZWZWbWZVe/bty+FNEREpCmpXIyNN1/Q3f0QcNOZdnb3RcAigEgk4inkISIiTUjljL4eaPzJnjxgT3MOcKZlikVEJHWpnNFXAf3MLB/4EzAVOH2ScBPcvQKoiEQiN6eQh5xltJKkSPMkO73yCWAk0NPM6oF/c/dHzWwG8CLR6ZWL3X1bc4Kb2XhgfN++fc+8cbwPt6QiTR+MSUZdXR3jxo1j69atVFdX85vf/IYHHngg7raVlZXMmzdPUy1FJG2SKvTuPi3B+GpgdUuDn41n9JFIhEgkEnQaInIWadO3EmwLfve73zFkyBAGDx7Md77zHY4fP865557LD3/4QwYNGsTQoUP56KOPANi1axdDhw6luLiYH//4x5x77rmnHa+yspJx48YB8Ic//IHBgwczePBgLrvssoaliw8ePBh3qePQKu8e/0tE0kK3EmxCbW0tTz75JK+//jqbN28mKyuLpUuX8re//Y2hQ4eyZcsWRowYwSOPPALAHXfcwR133EFVVRWXXHLJGY8/b948Fi5cyObNm3n11Vcb1r+pqalh/vz5vP322+zevZvXX3+9Vd+n/H+FjxXG/RJpz7SoWRNeeeUVNm3a1LC88OHDh7nwwgvp1KlTw1n55ZdfzssvvwzAhg0bGta8uf7667nrrruaPP7w4cO58847ueGGG5g0aVLD8sbxljq+8sorW+U9ng0SLzCW4UREAhJooW/WxdgAuDvTp0/nZz/72Unj8+bNa1h2OJWlhWfPns3YsWNZvXo1Q4cObVjoTEsXS1olaoMlWDVTwifQQt/WL8aOGjWKiRMn8t3vfpcLL7yQ/fv3N/TR4xk6dCgrV65kypQpLFu27IzH37VrF4WFhRQWFrJhwwbeeecdevTokc63INKqmrsGfqI2mJZHbl3tp3WTwemQJxQUFDBnzhxGjx7NF198QXZ2NgsXLky4/fz587nxxhu59957GTt2LGe69jB//nzWrl1LVlYWBQUFjBkzhg0bNqT7bYjIWa79FPqATJkyhSlTppw0dvDgwYbHkydPZvLkyQDk5uayceNGzIxly5Y1TKPs06cPW7duBU5eynjBggWnxWtqqWMRkZZQjz6NNm3axIwZM3B3evToweLFi4NOSUREPfp0uuqqq9iyZUvQaYiInCTQefQiItL6VOhFREJOSyCIiISclkAQEQm5djO9Mt3rjSTzAY1hw4axfv166urqWL9+Pddf3/Ry+81ZjlhEJFPUo2/C+vXrgWgBf/zxx5u1byQSUZEXkTZBhb4JJ5YZnj17Nq+++iqDBw/m/vvvp66ujquuuoqioiKKiooafiE01ng54jfeeINhw4Zx2WWXMWzYMN59910AlixZwqRJkygpKaFfv37MmjUrc29ORM4a+sBUEubOnXvSXZ8OHTrEyy+/TE5ODjt27GDatGlUV1cn3H/AgAGsW7eOjh07smbNGn7wgx+wcuVKADZv3kxNTQ2dO3emf//+zJw5k169eiU8lohIc+kDUy1w9OhRZsyY0bBG/fbt25vc/sCBA0yfPp0dO3ZgZhw9erThtVGjRjWsiVNQUMD777+vQi8iaaXWTQvcf//9XHTRRWzZsoXq6mo+//zzJrf/0Y9+xNVXX83WrVupqKjgyJEjDa9pSWIRaW0q9Eno1q3bScsTHzhwgIsvvpgOHTrw29/+luPHjze5/4EDB8jNzQWifXkRkUxqN9Mrg1yv+tJLL6Vjx44MGjSI0tJSbrvtNr75zW/y1FNPcfXVV9O1a9cm9581axbTp0/nvvvu45prrslQ1iIiUe2m0AfhxHLE2dnZvPLKKye99uabbzY8PnEHqkTLEX/ta187qY9/9913A1BaWkppaWnD+ImLvRKV6LMTyzOch0h7p9aNiEjIaa0bEZGQ01o3IiIhpx69nKw8wS/dBPfsrR0wMO74wHdq447Hu5l0XU5yqYlIy6hHLyIScir0IiIh125aN4laBC2VqLUgIhI2OqNvJVrKQETainZzRh+Uu+++m6VLl9KrVy969uzJ5Zdfzje+8Q1uv/129u3bxznnnMMjjzzCgAEDKC0t5Utf+hI1NTUUFRXRrVs33nvvPT788EO2b9/Offfdx8aNG3n++efJzc2loqKC7OxsfvrTn1JRUcHhw4cZNmwYDz/8MGbGyJEjueKKK1i7di1//etfefTRR7nqqquC/pGISDujM/omVFdXs3LlSmpqanj66acbliIuKytjwYIFbNq0iXnz5nHbbbc17LN9+3bWrFnDvffeC8CuXbtYtWoVzzzzDDfeeCNXX301b731Fl26dGHVqugMlBkzZlBVVcXWrVs5fPjwSZ+QPXbsGG+88Qbz58/nJz/5SQbfvYiERdrP6M1sJHA3sA1Y5u6V6Y6RKa+99hoTJ06kS5cuAIwfP54jR46wfv16rrvuuobtPvvss4bH1113HVlZWQ3Px4wZQ3Z2NoWFhRw/fpySkhIACgsLqaurA2Dt2rX84he/4NChQ+zfv5+vfOUrjB8/HoBJkyYBcPnllzdsLyLSHEkVejNbDIwD9rr7VxuNlwD/DmQBv3b3uYADB4EcoD7tGWeQu5829sUXX9CjRw82b94cd59TFzg7sQxxhw4dyM7Oxswanh87dowjR45w2223UV1dTa9evSgvL4+7jLGWMBaRlkq2dbMEKGk8YGZZwEJgDFAATDOzAuBVdx8D/CvQrnsNV155ZcP68QcPHmTVqlWcc8455Ofn89RTTwHRXwZbtmxpcYwTRb1nz54cPHiQFStWpCV3EZETkjqjd/d1ZtbnlOEhwE533w1gZsuAie7+duz1vwCdSZMgpkMWFxczYcIEBg0axJe//GUikQjdu3dn6dKl3HrrrcyZM4ejR48ydepUBg0a1KIYPXr04Oabb6awsJA+ffpQXFyc5nchIme7VHr0ucAHjZ7XA1eY2STgWqAH8GCinc2sDCgD6N27dwpptK677rqL8vJyDh06xIgRI/je975Hfn4+L7zwwmnbnnpTkfLy8pOen1j2+NTX5syZw5w5c047XmVlZcPjnj17qkcvIi2SSqG3OGPu7k8DT59pZ3dfBCwCiEQipzfD24iysjLefvttjhw5wvTp0ykqKgo6JRGRZkml0NcDje9inQfsac4BzGw8ML5v374ppNG6Hn/88aBTEBFJSSqFvgroZ2b5wJ+AqcD1zTmAu1cAFZFI5OYErzfMUpH44s0MEglcolVQ81Nv08ZbARWgbu7YlI8dVknNujGzJ4ANQH8zqzezm9z9GDADeBGoBZa7+7bmBG/qxiM5OTl8/PHHKmRNcHc+/vhjcnK0zq+IJJbsrJtpCcZXA6tbGrypM/q8vDzq6+vZt29fSw9/VsjJySEvLy/oNESkDWuza91kZ2eTn58fdBoiIu1eoIW+PVyMFZH2rfCxwrjjb01/K8OZBEf3jBURCTmtXikiEnKBFvqmZt2IiEh6qHUjIhJyat2IiIScWjciIiEX6PTKMy2BIG1HoilqyzOch4g0n1o3IiIhp0IvIhJy6tGLiIScpleKiIScWjciIiGnQi8iEnJtdpliaX3x7tRTp3uYiISOLsaKiIScLsaKiIScevQiIiGnQi8iEnK6GNuWlSdoaZXrmoaIJE9n9CIiIacz+hCpHTAw7vjAd2oznImItCU6oxcRCTnNoxcRCTnNoxcRCTm1bkREQk6FXkQk5FToRURCToVeRCTkVOhFREJOhV5EJORU6EVEQq5VCr2ZdTWzTWY2rjWOLyIiyUuq0JvZYjPba2ZbTxkvMbN3zWynmc1u9NK/AsvTmaiIiLRMsmf0S4CSxgNmlgUsBMYABcA0Mysws68DbwMfpTFPERFpoaRWr3T3dWbW55ThIcBOd98NYGbLgInAuUBXosX/sJmtdvcvTj2mmZUBZQC9e/duaf4iInIGqSxTnAt80Oh5PXCFu88AMLNS4M/xijyAuy8CFgFEIhFPIQ8REWlCKoXe4ow1FGx3X3LGA5iNB8b37ds3hTRERKQpqcy6qQd6NXqeB+xpzgG0eqWISOtLpdBXAf3MLN/MOgFTgWfTk5aIiKRLstMrnwA2AP3NrN7MbnL3Y8AM4EWgFlju7tuaE1w3HhERaX3JzrqZlmB8NbC6pcHdvQKoiEQiN7f0GCIi0jTdSlBEJOR0K0ERkZDTomYiIiGn1o2ISMipdSMiEnJq3YiIhFwqSyCkTEsgtEzhY4Vxx7UutIjEE2ih1zx6EWkP+sxeFXe8bu7YDGfSMmrdiIiEnAq9iEjIaXqliEjIqUffBiTs/+VkOBERCSW1bkREQk6FXkQk5FToRURCToVeRCTkNOtGRCTktKiZiEjIqXUjIhJyKvQiIiGnQi8iEnIq9CIiIadCLyIScppeKSIScppeKSIScmrdiIiEnAq9iEjIBboefbtSnqC9VB7/+kLtgIFxxwe+U5uujEREkqIzehGRkFOhFxEJORV6EZGQU6EXEQm5tBd6MxtoZr8ysxVmdmu6jy8iIs2TVKE3s8VmttfMtp4yXmJm75rZTjObDeDute5+C/BPQCT9KYuISHMke0a/BChpPGBmWcBCYAxQAEwzs4LYaxOA14BX0papiIi0SFKF3t3XAftPGR4C7HT33e7+ObAMmBjb/ll3HwbckM5kRUSk+VL5wFQu8EGj5/XAFWY2EpgEdAZWJ9rZzMqAMoDevXunkIaIiDQllUJvccbc3SuByjPt7O6LgEUAkUjEU8hDRESakMqsm3qgV6PnecCe5hxAyxSLiLS+VAp9FdDPzPLNrBMwFXi2OQfQMsUiIq0v2emVTwAbgP5mVm9mN7n7MWAG8CJQCyx3923NCa4zehGR1pdUj97dpyUYX00TF1yTOG4FUBGJRG5u6TFERKRpupWgiEjI6VaCIiIhp0XNRERCTq0bEZGQU+tGRCTk1LoREQk5FXoRkZBTj15EJOTUoxcRCTm1bkREQk6FXkQk5NSjFxEJOfXoRURCTq0bEZGQU6EXEQk5FXoRkZDTxVgRkZDTxVgRkZBT60ZEJORU6EVEQk6FXkQk5FToRURCToVeRCTkNL1SRCTkNL1SRCTk1LoREQk5FXoRkZBToRcRCTkVehGRkFOhFxEJORV6EZGQU6EXEQm5Vin0ZvaPZvaImT1jZqNbI4aIiCQn6UJvZovNbK+ZbT1lvMTM3jWznWY2G8Ddf+/uNwOlwJS0ZiwiIs3SnDP6JUBJ4wEzywIWAmOAAmCamRU02uR/x14XEZGAJF3o3X0dsP+U4SHATnff7e6fA8uAiRb1c+B5d/9j+tIVEZHmSrVHnwt80Oh5fWxsJvB1YLKZ3RJvRzMrM7NqM6vet29fimmIiEgiHVPc3+KMubs/ADzQ1I7uvghYBBCJRDzFPEREJIFUz+jrgV6NnucBe5LdWcsUi4i0vlQLfRXQz8zyzawTMBV4NtmdtUyxiEjra870yieADUB/M6s3s5vc/RgwA3gRqAWWu/u2ZhxTZ/QiIq0s6R69u09LML4aWN2S4O5eAVREIpGbW7K/iIicmZZAEBEJOd0zVkQk5HTPWBGRkFPrRkQk5NS6EREJObVuRERCTq0bEZGQU+tGRCTk1LoREQk5tW5EREJOhV5EJOTUoxcRCblUbzySkjAsalb4WGHc8eUZzkNEJBG1bkREQk6FXkQk5FToRURCThdjRURCTh+YEhEJObVuRERCToVeRCTkVOhFREJOhV5EJOTM3YPOATP7FHg3wBR6An8OMH5byOFsj98Wcjjb47eFHNpb/C+7+wVn2ijQJRAaedfdI0EFN7PqIOO3hRzO9vhtIYezPX5byCGs8dW6EREJORV6EZGQayuFftFZHh+Cz+Fsjw/B53C2x4fgcwhl/DZxMVZERFpPWzmjFxGRVhJ4oTezEjN718x2mtnsDMdebGZ7zWxrJuM2it/LzNaaWa2ZbTOzOwLIIcfM3jCzLbEcfpLpHGJ5ZJlZjZk9F0DsOjN7y8w2m1l1puPHcuhhZivM7J3Yv4evZTB2/9h7P/H1iZn9S6bix3L4buzf31Yze8LMcjIZP5bDHbH42zLx/uPVHzP7kpm9bGY7Yt//Li3B3D2wLyAL2AX8PdAJ2AIUZDD+CKAI2BrQ+78YKIo97gZsz+T7j8U14NzY42zgP4GhAfws7gQeB54LIHYd0DOIfwONcngM+HbscSegR0B5ZAH/TXR+dqZi5gLvAV1iz5cDpRl+318FtgLnEJ12vgbo18oxT6s/wC+A2bHHs4GfpyNW0Gf0Q4Cd7r7b3T8HlgETMxXc3dcB+zMVL078D939j7HHnwK1RP/RZzIHd/eDsafZsa+MXrgxszxgLPDrTMZtK8zsPKL/0z8K4O6fu/tfA0pnFLDL3d/PcNyOQBcz60i02O7JcPyBwEZ3P+Tux4A/AN9ozYAJ6s9Eor/0iX3/x3TECrrQ5wIfNHpeT4YLXVthZn2Ay4ieUWc6dpaZbQb2Ai+7e6ZzmA/MAr7IcNwTHHjJzDaZWVkA8f8e2Af8n1j76tdm1jWAPACmAk9kMqC7/wmYB/wX8CFwwN1fymQORM/mR5jZ+WZ2DvA/gF4ZzgHgInf/EKIngsCF6Tho0IXe4oydddOAzOxcYCXwL+7+Sabju/txdx8M5AFDzOyrmYptZuOAve6+KVMx4xju7kXAGOB2MxuR4fgdif4J/5C7Xwb8jeif7RllZp2ACcBTGY77d0TPZPOBS4CuZnZjJnNw91rg58DLwAtE28jHMplDawq60Ndz8m/NPDL/J1ugzCybaJFf6u5PB5lLrF1QCZRkMOxwYIKZ1RFt3V1jZr/LYHzcfU/s+17gP4i2FDOpHqhv9JfUCqKFP9PGAH90948yHPfrwHvuvs/djwJPA8MynAPu/qi7F7n7CKItlR2ZzgH4yMwuBoh935uOgwZd6KuAfmaWHzubmAo8G3BOGWNmRrQvW+vu9wWUwwVm1iP2uAvR/+neyVR8d/9f7p7n7n2I/vf/v+6esbM5M+tqZt1OPAZGE/0zPmPc/b+BD8ysf2xoFPB2JnOImUaG2zYx/wUMNbNzYv9PjCJ6vSqjzOzC2PfewCSC+Vk8C0yPPZ4OPJOOgwa6qJm7HzOzGcCLRK/2L3b3bZmKb2ZPACOBnmZWD/ybuz+aqfhEz2b/GXgr1iMH+IG7r85gDhcDj5lZFtFf/MvdPeNTHAN0EfAf0fpCR+Bxd38hgDxmAktjJzy7gf+ZyeCxvvQ/AN/JZFwAd/9PM1sB/JFou6SGYD6hutLMzgeOAre7+19aM1i8+gPMBZab2U1EfwFel5ZYsWk8IiISUkG3bkREpJWp0IuIhJwKvYhIyKnQi4iEnAq9iEjIqdCLiIScCr2ISMip0IuIhNz/A+T2o7v7IwhtAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f17e654bc50>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(distances.values(), label=distances.keys())\n",
"plt.legend()\n",
"plt.xticks(range(11));\n",
"plt.yscale('log')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment