Skip to content

Instantly share code, notes, and snippets.

@davidmcclure
Last active June 25, 2018 18:18
Show Gist options
  • Save davidmcclure/27eac3697b2c07f9c638086a382e8fc8 to your computer and use it in GitHub Desktop.
Save davidmcclure/27eac3697b2c07f9c638086a382e8fc8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LSTM classifier on 5-word a/the suffixes"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from geovec.models.a_the import Trainer"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"mpl.style.use('seaborn-muted')\n",
"sns.set(style=\"whitegrid\")\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"pd.options.display.max_rows = 1000"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100000it [00:00, 252791.50it/s]\n",
"10000it [00:00, 270937.62it/s]\n"
]
}
],
"source": [
"t = Trainer('../data/a-the/train0.json', '../data/a-the/test0.json', 100000, 10000) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t.train(10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 200/200 [00:11<00:00, 17.69it/s]\n"
]
}
],
"source": [
"df = t.pred_df()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAAI+CAYAAAB695QVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGfRJREFUeJzt3X+M5PV93/HX4QNOSo+LU+o4Kk5J1fRTt5HrdiogEOAq4WAcUyJUtRaJUPqDWiqt7RgJl0BsWmFbTm2cktZNexTRSjlVKcTCpiVOW8f0TGxRT5wqNORjgUoS2jpyiMHnKhDgrn/s0K6Pvb25ec/OfGfn8ZCQZr87+93Pzedu98nnM/OdPcePHw8AALM7Y9kDAABYdYIKAKBIUAEAFAkqAIAiQQUAUCSoAACK9i7zm4/HY9dsAABWxmg02rPV8aUGVZKMRqO5nm88Hs/9nNSZl+EyN8NkXobJvAzXIuZmPB6f9HO2/AAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFC0d5o7tdYuTPKR3vvB1trrkhxK8tokr0lyfe/9ydbaDUnemeSlJHf03h/cqUEDAOvr6pseeNWx2687bwkj+f9OuULVWrs5yd1J9k0O/VSSn+u9X5bktiR/prX2+iTvSnJJkiuTfLi1dvbODBkAYFim2fJ7Msm1mz6+JMl5rbX/lORHknwuyQVJHum9v9B7fy7JE0neNOexAgAM0im3/Hrv97fWzt906PwkX++9X9Fae3+S9yX5SpLnNt3naJID0wxgPB5PPdhp7cQ5qTMvw2Vuhsm8DJN5Ga5lzs1Uz6E6wTNJPjW5/ekkH0zypST7N91nf5JnpznZaDSaYQgnNx6P535O6szLcJmbYTIvw2ReBuLw01se3um52S7YZnmV3+eTvG1y+7Ik/z3Jo0kuba3ta60dSPLGJI/NcG4AgJUzS1DdlOT61tqvJHlrkg/13r+a5K4kR5J8Nsmtvffn5zdMAIDhmmrLr/f+VJKLJrd/K8lbtrjPoWxcTgEAYK24sCcAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAiqYKqtbaha21z51w7LrW2hc2fXxDa+1LrbUvttbePudxAgAM1imDqrV2c5K7k+zbdOwvJPlbSfZMPn59kncluSTJlUk+3Fo7eycGDAAwNNOsUD2Z5NpXPmit/dEkH0rynk33uSDJI733F3rvzyV5Ismb5jlQAIChOmVQ9d7vT/JikrTWXpPkXyV5b5Kjm+52TpLnNn18NMmB+Q0TAGC49p7m/UdJvjfJP8/GFuCfba39dJLPJtm/6X77kzw7zQnH4/FpDmE556TOvAyXuRkm8zJM5mW4ljk3pxVUvfdHk/y5JGmtnZ/k3/be3zN5DtUHW2v7kpyd5I1JHpvmnKPR6LQGfCrj8Xju56TOvAyXuRkm8zJM5mUgDj+95eGdnpvtgm0ul03ovX81yV1JjmRjterW3vvz8zg3AMDQTbVC1Xt/KslF2x3rvR9KcmiOYwMAWAku7AkAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAor3T3Km1dmGSj/TeD7bW3pzkZ5K8nOSFJNf33n+3tXZDkncmeSnJHb33B3dq0AAAQ3LKFarW2s1J7k6yb3LonyT5+733g0l+Icn7WmuvT/KuJJckuTLJh1trZ+/IiAEABmaaLb8nk1y76eN39N5/bXJ7b5Lnk1yQ5JHe+wu99+eSPJHkTXMdKQDAQJ0yqHrv9yd5cdPH/ztJWmsXJ/l7ST6e5Jwkz236sqNJDsx1pAAAAzXVc6hO1Fr760luTfJDvfevtda+kWT/prvsT/LsNOcaj8ezDGHh56TOvAyXuRkm8zJM5mW4ljk3px1UrbUfzcaTzw/23n9/cvjRJB9sre1LcnaSNyZ5bJrzjUaj0x3Ctsbj8dzPSZ15GS5zM0zmZZjMy0AcfnrLwzs9N9sF22kFVWvtNUnuSvLbSX6htZYkD/feP9BauyvJkWxsI97ae39+5hEDAKyQqYKq9/5UkosmH37HSe5zKMmh+QwLAGB1uLAnAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBA0d5p7tRauzDJR3rvB1trfyrJvUmOJ3ksyY2992OttQ8k+aEkLyV5T+/90R0aMwDAoJxyhaq1dnOSu5Psmxy6M8ltvfdLk+xJck1r7S8muTzJhUnekeSf7cxwAQCGZ5otvyeTXLvp41GShye3H0pyRZIfSPJLvffjvfffTrK3tfbH5jpSAICBOuWWX+/9/tba+ZsO7em9H5/cPprkQJJzkjyz6T6vHP/aqc4/Ho+nHuy0duKc1JmX4TI3w2Rehsm8DNcy52aq51Cd4Nim2/uTPJvkG5PbJx4/pdFoNMMQTm48Hs/9nNSZl+EyN8NkXobJvAzE4ae3PLzTc7NdsM3yKr8vt9YOTm5fleRIkkeSXNlaO6O19t1Jzui9/94M5wYAWDmzrFDdlORQa+2sJI8nua/3/nJr7UiSL2Qj0m6c4xgBAAZtqqDqvT+V5KLJ7a9k4xV9J97n9iS3z29oAACrwYU9AQCKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFC0d5Yvaq2dmeRfJzk/yctJbkjyUpJ7kxxP8liSG3vvx+YySgCAAZt1heptSfb23i9O8o+SfDDJnUlu671fmmRPkmvmM0QAgGGbNai+kmRva+2MJOckeTHJKMnDk88/lOSK+vAAAIZvpi2/JN/MxnbfbyY5N8nbk1zWez8++fzRJAfKowMAWAGzBtWPJ/lM7/2W1tobknw2yVmbPr8/ybPTnGg8Hs84hMWekzrzMlzmZpjMyzCZl+Fa5tzMGlRfz8Y2X5L8fpIzk3y5tXaw9/65JFcl+eVpTjQajWYcwtbG4/Hcz0mdeRkuczNM5mWYzMtAHH56y8M7PTfbBdusQfXxJPe01o5kY2XqJ5J8Kcmh1tpZSR5Pct+M5wYAWCkzBVXv/ZtJ/toWn7q8NhwAgNXjwp4AAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKNo76xe21m5J8leSnJXkE0keTnJvkuNJHktyY+/92BzGCAAwaDOtULXWDia5OMklSS5P8oYkdya5rfd+aZI9Sa6Z0xgBAAZt1i2/K5P8epJPJvl0kgeTjLKxSpUkDyW5ojw6AIAVMOuW37lJ/kSStyf5niSfSnJG7/345PNHkxyoDw8AYPhmDapnkvxm7/0Pk/TW2vPZ2PZ7xf4kz05zovF4POMQFntO6szLcJmbYTIvw2RehmuZczNrUH0+ybtba3cm+a4k35bkP7fWDvbeP5fkqiS/PM2JRqPRjEPY2ng8nvs5qTMvw2Vuhsm8DJN5GYjDT295eKfnZrtgmymoeu8PttYuS/JoNp6HdWOS/5HkUGvtrCSPJ7lvlnMDAKyamS+b0Hu/eYvDlxfGAgCwklzYEwCgSFABABTNvOUHALCTrr7pgWUPYWpWqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABTtXfYAgG919U0PvOrYpz92zRJGAsC0rFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIq8OTJrz5sRA1BlhQoAoEhQAQAUCSoAgCJBBQBQJKgAAIq8yg9WlFcnAgyHFSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARS6bwKDtpksDDOnP8qqxHH56ZR9XYHfY6mfkKrFCBQBQJKgAAIps+cESrfoSNwAbrFABABQJKgCAotKWX2vtdUnGSd6S5KUk9yY5nuSxJDf23o9VBwgAMHQzr1C11s5M8i+S/MHk0J1Jbuu9X5pkTxKvwQYA1kJly++jSX42yf+afDxK8vDk9kNJriicGwBgZcy05dda+7EkX+u9f6a1dsvk8J7e+/HJ7aNJDkxzrvF4PMsQFn5O6uY1L4uY362+x+2Hn57662+/7rx5DmfqP3Plsdnt/262mr95z9O87fY5WVXmpe50fp6ejmXOzazPofqbSY631q5I8uYk/ybJ6zZ9fn+SZ6c50Wg0mnEIWxuPx3M/J3Uzz8sW/+jmPr/Tfo/T+AFQ/fpZzzf1Y7OIx3VoVuzP7GfZMJmXOdmhoNrpudku2Gba8uu9X9Z7v7z3fjDJryW5PslDrbWDk7tcleTILOcGAFg187yw501JDrXWzkryeJL75nhuOG1Deu+8ZVrExUNP9j3W8fEG1lM5qCarVK+4vHo+AIBV48KeAABFggoAoMibI7NWdvubEa/Cn283P7dtN//ZgO1ZoQIAKBJUAABFggoAoEhQAQAUCSoAgCKv8mNXWIVXt+0WO/FYe3UcsOqsUAEAFAkqAIAiW37AjrEVC6zLzwErVAAARYIKAKDIlh9sYWhL1EMbDwDfygoVAECRoAIAKBJUAABFnkMFnJTnbp2cx4Z14t0MTs0KFQBAkaACACiy5QesNFsRwBBYoQIAKBJUAABFtvyAJF61tkpscy6Xx5+tWKECACgSVAAARbb8GIzdtOW0m/4sLNci/i4NbQtraOPZLTyuO8sKFQBAkaACACgSVAAARZ5DBcCO8/yd1eE5oLOxQgUAUCSoAACKbPmxFJaUgVXgZxXTskIFAFAkqAAAimz5AbvOtNs0XnnGbuDv8TBYoQIAKBJUAABFtvzWmGVieLVlvqrrVd/78NPLGcgaGNLPvyGNhdlZoQIAKBJUAABFtvwAlmARW4uVraRFbEO5aObJLeqxMQfzY4UKAKBIUAEAFAkqAIAiz6HiW+zE8yb+3zm9BJzT4KXkrDp/h9eLFSoAgCJBBQBQZMuPleNlvgDbW9ZlOdaZFSoAgCJBBQBQZMuPubIEzE7y96tu3o/hoN5MOl5Fx/JYoQIAKBJUAABFM235tdbOTHJPkvOTnJ3kjiS/keTeJMeTPJbkxt77sbmMckXtluXoky3pr+KfBRbNm9wu37SPjceQillXqH40yTO990uTvDXJP01yZ5LbJsf2JPHbFgBYC7MG1b9L8pOT23uSvJRklOThybGHklxRGxoAwGqYacuv9/7NJGmt7U9yX5Lbkny09358cpejSQ5Mc67xeDzLEBZ+znmpjO32Kd8L7/brzpv5a0+H5XFYL8v82Trt917WGIc+vnWxzMd35ssmtNbekOSTST7Rez/cWvupTZ/en+TZac4zGo1mHcKWxuPx3M85sy0ipjS2KaNoy+/hjYmBomX+bJn2ey9rjEMf37rY6d//2wXbTFt+rbXvTPJLSd7Xe79ncvjLrbWDk9tXJTkyy7kBAFbNrCtUP5HktUl+srX2ynOp3p3krtbaWUkez8ZWIADArjfrc6jenY2AOtHlteEAMFRDuyo6DIkLewIAFAkqAIAiQQUAUCSoAACKBBUAQNHMF/ZkNot4w2SvhgEYJj+fdy8rVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKXDYBgF3DZQlYFitUAABFggoAoMiW3wAs4urpAMDOsUIFAFAkqAAAimz5zcAWHQCbeXUhVqgAAIoEFQBAkaACACgSVAAARYIKAKDIq/wGyitGAGB1WKECACgSVAAARYIKAKDIc6jmxHOeAGB9WaECACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQNGuv7DnVhfc/PTHril9PQDAZlaoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABTt+ssmbKV6KQUAgM2sUAEAFAkqAIAiQQUAUCSoAACKBBUAQNFavspvK94EGQCYlRUqAIAiQQUAUCSoAACKBBUAQJGgAgAomuur/FprZyT5RJI/n+SFJH+79/7EPL8HAMDQzHuF6oeT7Ou9f3+Sf5DkY3M+PwDA4Mw7qH4gyS8mSe/9i0n+0pzPDwAwOPMOqnOSPLfp45dbay4eCgDsavOOnW8k2b/p4zN67y9t9wXj8XjOQ/jWc95+3XlzPz8AMDw70RTTmndQPZLk6iQ/31q7KMmvb3fn0Wi0Z87fHwBg4eYdVJ9M8pbW2q8k2ZPkb8z5/AAAg7Pn+PHjyx4DAMBKc2FPAIAiQQUAUCSoAACKVvYaUad6m5vW2g1J3pnkpSR39N4fXMpA18wU8/LjSd4x+fA/9N7/4eJHuX6meVuoyX3+fZIHeu8/u/hRrp8p/r1cleQD2XiRzzjJjb13T3xdgCnm5qYk1yU5luRDvfdPLmWga6q1dmGSj/TeD55w/Ook78/G7/57eu+HFjWmVV6hOunb3LTWXp/kXUkuSXJlkg+31s5eyijXz3bz8ieT/EiSi5NclOQHW2tvWsoo1880bwt1R5LXLnRUbPfvZX+Sf5zk7b33C5M8leTcZQxyTW03N9+e5N1Jvj/JDyb56aWMcE211m5OcneSfSccPzPJx7MxJ5cn+Tutte9c1LhWOai2e5ubC5I80nt/off+XJInkvjFvRjbzcvvJHlr7/3lyf9ln5nk+cUPcS1t+7ZQrbW/mo3/0/7FxQ9trW03Lxdn41p+H2utHUnyu733ry1+iGtru7n5P0l+K8m3Tf47tvDRrbcnk1y7xfE3Jnmi9/713vsfJvl8kssWNahVDqrt3ubmxM8dTXJgUQNbcyedl977i73332ut7WmtfTTJl3vvX1nKKNfPSeeltfZ92di6eP8yBrbmtvs5dm6Sv5zkfUmuSvKe1tqfXvD41tmp3krtd5L8RpJfTXLXIge27nrv9yd5cYtPLfV3/yoH1XZvc3Pi5/YneXZRA1tz2779UGttX5Kfm9zn7y54bOtsu3m5PskfT/LZJD+W5L2ttbcudnhra7t5eSbJf+29f7X3/s0k/yXJmxc9wDW23dxcleS7knxPku9O8sOttQsWPD5ebam/+1c5qB5J8rYk2eJtbh5NcmlrbV9r7UA2lgEfW/wQ19JJ56W1tifJA0n+W+/9nb33l5czxLV00nnpvd/ce79w8uTOe5Pc2Xu39bcY2/0c+9Uk39daO3eyMnJRNlZEWIzt5ubrSf4gyQu99+ez8Uv72xc+Qk70eJLvba19R2vtrGxs931hUd98ZV/lly3e5qa19t5s7J9+qrV2V5Ij2YjGWyd/6dl5J52XJK/JxhMFz568eilJbum9L+wv/Brb9t/Lcoe21k71c+yWJJ+Z3Pfne+/+x3BxTjU3VyT5YmvtWDaeq/MflzjWtdZauy7JH+m9/8vJHH0mG7/77+m9/89FjcNbzwAAFK3ylh8AwCAIKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoOj/AtYx4SOW96XWAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10,10))\n",
"_ = plt.hist(df[df.root=='a'].score, bins=100)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAAI+CAYAAAB695QVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFgpJREFUeJzt3X+M5PVdx/HXnQclqQetgdYftFYT/cSk0YY1paXAYUILtD1pjDEEjanGWuMllvaSViotp6kaW6CmkaaKIWgif1QMuVKlYGyLV6RBp40pKf00GBUx0QCWAqa/ONY/djDLsbc7x3t25js7j0dCsvud2bk3fLjb532+3/3OrtXV1QAA8PztnvcAAACLTlABABQJKgCAIkEFAFAkqAAAigQVAEDRnnn+4qPRyD0bAICFsbKysmuj43MNqiRZWVmZ6uuNRqOpvyZ11mWYrMtwWZthsi7DNKt1GY1Gx33MKT8AgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAU7Zn3AAAAk9p/8PBzjh26/Mw5TPJsdqgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACjas9UTWmsnJfmzJK9IcjTJ25I8leSmJKtJ7ktyoPf+dGvt6iRvGj9+Re/93u0ZGwBgOCbZoXpjkj2993OS/E6S301yXZKreu/nJdmV5NLW2llJ9iU5O8llSa7fnpEBAIZlkqD6apI9rbXdSU5N8p0kK0nuGj9+e5ILk5yb5M7e+2rv/cHx15yxDTMDAAzKlqf8kjyZtdN9X0lyepI3Jzm/9746fvyJJKdlLbYeXfd1zxx/eFrDAgAM0SRB9c4kd/Ter2ytvSzJp5OcvO7xvUkeS/L4+ONjj29qNBpNPu2EtuM1qbMuw2RdhsvaDJN1GaZ5r8skQfW1rJ3mS5L/SXJSki+21i7ovX82ySVJPpPkgSQfbK1dk+TMJLt7749s9eIrKyvPZ+7jGo1GU39N6qzLMFmX4bI2w2RdBuDmhzY8PIt12SzaJgmqDye5sbV2JGs7U+9N8k9JbmitnZzk/iS39N6Pjp9zT9auzTpQHRwAYBFsGVS99yeT/NwGD+3b4LmHkhwqTwUAsEDc2BMAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBA0Z5JntRauzLJTyc5OclHk9yV5KYkq0nuS3Kg9/50a+3qJG9K8lSSK3rv927H0AAAQ7LlDlVr7YIk5yR5XZJ9SV6W5LokV/Xez0uyK8mlrbWzxo+fneSyJNdv08wAAIMyySm/i5J8KcmtSW5L8skkK1nbpUqS25NcmOTcJHf23ld77w8m2dNaO2P6IwMADMskp/xOT/KDSd6c5IeSfCLJ7t776vjxJ5KcluTUJI+u+7pnjj+82YuPRqMTHHlr2/Ga1FmXYbIuw2Vthsm6DNO812WSoHo0yVd6799O0ltr38zaab9n7E3yWJLHxx8fe3xTKysrk087gdFoNPXXpM66DJN1GS5rM0zWZQBufmjDw7NYl82ibZJTfp9LcnFrbVdr7fuTvDDJ342vrUqSS5IcSXJ3kotaa7tbay/P2i7WI6XJAQAWwJY7VL33T7bWzk9yb9YC7ECSf01yQ2vt5CT3J7ml9360tXYkyT3rngcAsONNdNuE3vu7Nzi8b4PnHUpyqDYSAMBicWNPAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAo2jPJk1prL0kySvL6JE8luSnJapL7khzovT/dWrs6yZvGj1/Re793WyYGABiYLXeoWmsnJfnjJN8YH7ouyVW99/OS7EpyaWvtrCT7kpyd5LIk12/PuAAAwzPJDtU1ST6W5Mrx5ytJ7hp/fHuSNyTpSe7sva8mebC1tqe1dkbv/eFpDwwALIf9Bw/Pe4SJbbpD1Vp7a5KHe+93rDu8axxOSfJEktOSnJrk6+ue88xxAIAdb6sdql9OstpauzDJq5L8eZKXrHt8b5LHkjw+/vjY41sajUYTDzup7XhN6qzLMFmX4bI2w2Rdhmne67JpUPXez3/m49baZ5P8WpIPtdYu6L1/NsklST6T5IEkH2ytXZPkzCS7e++PTDLAysrK85v8OEaj0dRfkzrrMkzWZbiszTBZlxm7+aGJnzqLddks2ib6Kb9jHExyQ2vt5CT3J7ml9360tXYkyT1ZO4144PkMCgCwiCYOqt77Bes+3bfB44eSHCpPBACwYNzYEwCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARXvmPQAAwP6Dh+c9QokdKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABTtmfcAAMBy2X/w8LxHmDo7VAAARYIKAKBIUAEAFAkqAIAiQQUAUOSn/ACAbbMTf6JvI3aoAACKBBUAQJGgAgAoElQAAEWCCgCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARYIKAKBIUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAo2jPvAQCAYdt/8PBzjt127aUTPW9Z2KECACgSVAAARZue8mutnZTkxiSvSPKCJB9I8uUkNyVZTXJfkgO996dba1cneVOSp5Jc0Xu/d/vGBgAYjq12qH4hyaO99/OSXJzkj5Jcl+Sq8bFdSS5trZ2VZF+Ss5NcluT67RsZAGBYtgqqv0zyvvHHu7K2+7SS5K7xsduTXJjk3CR39t5Xe+8PJtnTWjtjG+YFABicTU/59d6fTJLW2t4ktyS5Ksk1vffV8VOeSHJaklOTPLruS585/vBWA4xGoxOfeg6vSZ11GSbrMlzWZph2+rocuvmhiZ43tP8O855ny9smtNZeluTWJB/tvd/cWvvguof3JnksyePjj489vqWVlZXJp53AaDSa+mtSZ12GyboMl7UZpqVYlwmDasP/DhN+7XaYxbpsFm2bnvJrrb00yZ1J3tN7v3F8+IuttQvGH1+S5EiSu5Nc1Frb3Vp7eZLdvfdHqoMDACyCrXao3pvkxUne11p75lqqdyT5SGvt5CT3J7ml9360tXYkyT1Zi7QD2zUwAMDQbHUN1TuyFlDH2rfBcw8lOTSVqQAAFoi3ngEATtgyv83MRtwpHQCgSFABABQJKgCAIkEFAFAkqAAAigQVAECRoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIoEFQBAkaACACgSVAAARXvmPQAA8PztP3j4Ocduu/bSOUyy3AQVACwB4bW9BBUA7DAbxRPbyzVUAABFdqgAYEnZyZoeO1QAAEWCCgCgSFABABS5hgoABsYtDhaPHSoAgCI7VABQZEcJQQUAC8AtDobNKT8AgCJBBQBQJKgAAIoEFQBAkYvSAWCOXGy+MwgqAJgR8bRzOeUHAFBkhwqApeNGnEybHSoAgCI7VABwAlwHxUbsUAEAFAkqAIAiQQUAUCSoAACKBBUAQJGgAgAoElQAAEXuQwXAoB3vvk8b3dl8SHdAd7+q5WKHCgCgSFABABQJKgCAItdQATAYrjtiUQkqAHY0kcYsCCoApmrSn7RbhNBZhBkZBkEFwPMmOGCNi9IBAIrsUAFA7LZRI6gAWEgCiCFxyg8AoEhQAQAUOeUHwLZzeo6dzg4VAECRoAIAKBJUAABFrqECYCKug4LjE1QAPId4ghPjlB8AQJGgAgAoElQAAEWuoQLYoTa6Duq2ay+dwySw8wkqgCUismB7CCqAJecn+qBOUAEsGLtMMDwuSgcAKBJUAABFggoAoMg1VAA7gAvLYb7sUAEAFNmhAhiADXeYbn7IT+/BgrBDBQBQZIcKYBu5ZxQsBztUAABFggoAoMgpP4Ap2Y5bF7gdAiwGO1QAAEWCCgCgyCk/gC1M+7Sb03iw89ihAgAoElQAAEVO+QGs43Qc8HwIKmChuRM5MARO+QEAFNmhAk7IpKfEhrZLZCcL2E6CCpiZ48XYtMNm0uhzvRQwLYIKSLIY91oSQMBQCSpg7oQSsOhclA4AUGSHChaUi6wBhkNQwZTN6sJrAIZDULFjVXZw5rn7U7meaFFvaQCw6AQVFCzqxdTPmvvmh+Y3CMAOIagYNNcJAbAIphpUrbXdST6a5CeSfCvJr/TeH5jmr7HMJo2LRY2Qys0YZ/HvV92NWtTdLAC2Nu0dqrckOaX3/trW2muSXJtk+N/Jj2M7vnEP6RqXaV9jtKV1p5bmFXiiBoDtMO2gOjfJp5Kk9/751tpPTvn1T9i0o2hW35B3+ltnLOrcALCRaQfVqUm+vu7zo621Pb33p6b865Qs4zdzgQYA22faQfV4kr3rPt+9VUyNRqMpj/Ds1zx0+ZlTf30AYFi2oydOxLSD6u4k+5N8fHwN1Zc2e/LKysquKf/6AAAzN+2gujXJ61tr/5BkV5JfmvLrAwAMzq7V1dV5zwAAsNB2z3sAAIBFJ6gAAIoEFQBA0UK+l99Wb3HTWntbkrcneSrJB3rvn5zLoEtmgnV5Z5LLxp/+Te/9t2c/5XKa5G2hxs/56ySHe+8fm/2Uy2eC3zOXJLk6az/kM0pyoPfuwtcZmGBtDia5PMnTSX6v937rXAZdUq21s5P8Qe/9gmOO70/y/qx9/7+x937DrGZa1B2q/3+LmyS/mbW3uEmStNa+N8lvJHldkouS/H5r7QVzmXL5bLYuP5zk55Ock+Q1Sd7QWvvxuUy5nI67Nut8IMmLZzoVm/2e2ZvkQ0ne3Hs/O8m/JTl9HkMuqc3W5kVJ3pHktUnekOQP5zLhkmqtvTvJnyY55ZjjJyX5cNbWZF+SX22tvXRWcy1qUD3rLW6SrH+Lm1cnubv3/q3e+9eTPJDEN+7Z2Gxd/iPJxb33o+O/YZ+U5JuzH3FpbbY2aa39bNb+pv2p2Y+21DZbl3Oydi+/a1trR5L8d+/94dmPuLQ2W5v/TfLvSV44/ufpmU+33P4lyc9scPzHkjzQe/9a7/3bST6X5PxZDbWoQbXhW9wc57Enkpw2q8GW3HHXpff+nd77I621Xa21a5J8sff+1blMuZyOuzattVdm7dTF++cx2JLb7M+y05P8VJL3JLkkyRWttR+d8XzLbLO1Sdb+kvjlJF9I8pFZDrbseu9/leQ7Gzw01+//ixpUm73FzbGP7U3y2KwGW3KbvvVQa+2UJH8xfs6vz3i2ZbfZ2vxikh9I8ukkb03yrtbaxbMdb2ltti6PJvnH3vt/9d6fTPL3SV416wGX2GZrc0mS70vyQ0lenuQtrbVXz3g+nmuu3/8XNajuTvLGJNngLW7uTXJea+2U1tppWdsCvG/2Iy6l465La21XksNJ/rn3/vbe+9H5jLi0jrs2vfd3997PHl/ceVOS63rvTv3NxmZ/ln0hyStba6ePd0Zek7UdEWZjs7X5WpJvJPlW7/2bWfum/aKZT8ix7k/yI62172mtnZy10333zOoXX8if8ssGb3HTWntX1s6dfqK19pEkR7IWjL81/h+e7XfcdUnyXVm7SPAF459cSpIre+8z+599yW36e2a+oy21rf4suzLJHePnfrz37i+Hs7PV2lyY5POttaezdq3O385x1qXWWrs8yXf33v9kvEZ3ZO37/4299/+c1RzeegYAoGhRT/kBAAyGoAIAKBJUAABFggoAoEhQAQAUCSoAgCJBBQBQJKgAAIr+DzyubumxlRDBAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10,10))\n",
"_ = plt.hist(df[df.root=='the'].score, bins=100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Most confident \"a\" suffixes"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>root</th>\n",
" <th>suffix</th>\n",
" <th>score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9289</th>\n",
" <td>a</td>\n",
" <td>few months ago . ”</td>\n",
" <td>0.999967</td>\n",
" </tr>\n",
" <tr>\n",
" <th>783</th>\n",
" <td>a</td>\n",
" <td>few seconds earlier . The</td>\n",
" <td>0.999876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3827</th>\n",
" <td>a</td>\n",
" <td>few years older than he</td>\n",
" <td>0.999855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8027</th>\n",
" <td>a</td>\n",
" <td>few seconds later she came</td>\n",
" <td>0.999826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2603</th>\n",
" <td>a</td>\n",
" <td>few months earlier . It</td>\n",
" <td>0.999825</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5441</th>\n",
" <td>a</td>\n",
" <td>few minutes later and stick</td>\n",
" <td>0.999769</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8084</th>\n",
" <td>a</td>\n",
" <td>thousand years . It had</td>\n",
" <td>0.999742</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3088</th>\n",
" <td>a</td>\n",
" <td>hundred years . They had</td>\n",
" <td>0.999729</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7039</th>\n",
" <td>a</td>\n",
" <td>few months . Chester would</td>\n",
" <td>0.999725</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4314</th>\n",
" <td>a</td>\n",
" <td>bit much . My devotion</td>\n",
" <td>0.999716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1157</th>\n",
" <td>a</td>\n",
" <td>long breath , then said</td>\n",
" <td>0.999709</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4004</th>\n",
" <td>a</td>\n",
" <td>hundred more a month than</td>\n",
" <td>0.999571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4757</th>\n",
" <td>a</td>\n",
" <td>few seconds , then a</td>\n",
" <td>0.999510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>359</th>\n",
" <td>a</td>\n",
" <td>few minutes to scramble some</td>\n",
" <td>0.999504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3814</th>\n",
" <td>a</td>\n",
" <td>few minutes , \" Garnock</td>\n",
" <td>0.999503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>981</th>\n",
" <td>a</td>\n",
" <td>few seconds longer , trying</td>\n",
" <td>0.999465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>a</td>\n",
" <td>half months ago , just</td>\n",
" <td>0.999464</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9519</th>\n",
" <td>a</td>\n",
" <td>hundred other surreal stories made</td>\n",
" <td>0.999448</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4430</th>\n",
" <td>a</td>\n",
" <td>long breath , and uttered</td>\n",
" <td>0.999444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5634</th>\n",
" <td>a</td>\n",
" <td>lot more . ” “</td>\n",
" <td>0.999386</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7672</th>\n",
" <td>a</td>\n",
" <td>hundred years . Raised cattle</td>\n",
" <td>0.999336</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1907</th>\n",
" <td>a</td>\n",
" <td>million people are going to</td>\n",
" <td>0.999277</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9936</th>\n",
" <td>a</td>\n",
" <td>couple of laps around United</td>\n",
" <td>0.999242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7536</th>\n",
" <td>a</td>\n",
" <td>bit further than I expected</td>\n",
" <td>0.999225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8611</th>\n",
" <td>a</td>\n",
" <td>long pause . “ The</td>\n",
" <td>0.999201</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7102</th>\n",
" <td>a</td>\n",
" <td>long pause . ‘ ‘</td>\n",
" <td>0.999182</td>\n",
" </tr>\n",
" <tr>\n",
" <th>817</th>\n",
" <td>a</td>\n",
" <td>slight frown , now waved</td>\n",
" <td>0.999128</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3959</th>\n",
" <td>a</td>\n",
" <td>few more minutes had passed</td>\n",
" <td>0.999088</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1002</th>\n",
" <td>a</td>\n",
" <td>couple of times . She</td>\n",
" <td>0.998981</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3154</th>\n",
" <td>a</td>\n",
" <td>hundred times as big as</td>\n",
" <td>0.998945</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5529</th>\n",
" <td>a</td>\n",
" <td>deep breath , gives Daniel</td>\n",
" <td>0.998916</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7383</th>\n",
" <td>a</td>\n",
" <td>couple of days . ”</td>\n",
" <td>0.998916</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9456</th>\n",
" <td>a</td>\n",
" <td>few hours on the drawing</td>\n",
" <td>0.998897</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2282</th>\n",
" <td>a</td>\n",
" <td>short scream . “ His</td>\n",
" <td>0.998874</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5625</th>\n",
" <td>a</td>\n",
" <td>mile from her home and</td>\n",
" <td>0.998867</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3739</th>\n",
" <td>a</td>\n",
" <td>few more minutes of peace</td>\n",
" <td>0.998783</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1109</th>\n",
" <td>a</td>\n",
" <td>deep breath , he straightened</td>\n",
" <td>0.998781</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1524</th>\n",
" <td>a</td>\n",
" <td>few inches . “ Get</td>\n",
" <td>0.998758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6853</th>\n",
" <td>a</td>\n",
" <td>few feet away from him</td>\n",
" <td>0.998736</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5361</th>\n",
" <td>a</td>\n",
" <td>dozen paces toward the Aes</td>\n",
" <td>0.998731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8942</th>\n",
" <td>a</td>\n",
" <td>month ago . There is</td>\n",
" <td>0.998728</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4136</th>\n",
" <td>a</td>\n",
" <td>little farther still from the</td>\n",
" <td>0.998670</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2399</th>\n",
" <td>a</td>\n",
" <td>hundred feet wide . A</td>\n",
" <td>0.998668</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4969</th>\n",
" <td>a</td>\n",
" <td>couple of homers and had</td>\n",
" <td>0.998651</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4123</th>\n",
" <td>a</td>\n",
" <td>thousand thoughts . But the</td>\n",
" <td>0.998632</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6558</th>\n",
" <td>a</td>\n",
" <td>little laugh . ) You</td>\n",
" <td>0.998591</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>a</td>\n",
" <td>mile past the Grange Hall</td>\n",
" <td>0.998551</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5474</th>\n",
" <td>a</td>\n",
" <td>wide smile . “ Welcome</td>\n",
" <td>0.998479</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5060</th>\n",
" <td>a</td>\n",
" <td>year or two ago ,</td>\n",
" <td>0.998434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8115</th>\n",
" <td>a</td>\n",
" <td>few hours . Fear of</td>\n",
" <td>0.998430</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1471</th>\n",
" <td>a</td>\n",
" <td>lot more crackbrained to let</td>\n",
" <td>0.998369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1460</th>\n",
" <td>a</td>\n",
" <td>little more serious than that</td>\n",
" <td>0.998329</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1996</th>\n",
" <td>a</td>\n",
" <td>few months in case you</td>\n",
" <td>0.998292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2001</th>\n",
" <td>a</td>\n",
" <td>bit of sense . She</td>\n",
" <td>0.998250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4163</th>\n",
" <td>a</td>\n",
" <td>couple of detectives up to</td>\n",
" <td>0.998238</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1380</th>\n",
" <td>a</td>\n",
" <td>little more about that sensation</td>\n",
" <td>0.998061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1933</th>\n",
" <td>a</td>\n",
" <td>little too much like something</td>\n",
" <td>0.998059</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9695</th>\n",
" <td>a</td>\n",
" <td>few hours on funeral days</td>\n",
" <td>0.998055</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8120</th>\n",
" <td>a</td>\n",
" <td>couple of nice books lay</td>\n",
" <td>0.998017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>900</th>\n",
" <td>a</td>\n",
" <td>hundred feet up , seven</td>\n",
" <td>0.997861</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2950</th>\n",
" <td>a</td>\n",
" <td>lot of fun . She</td>\n",
" <td>0.997626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7013</th>\n",
" <td>a</td>\n",
" <td>lot of fancy eating went</td>\n",
" <td>0.997614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8804</th>\n",
" <td>a</td>\n",
" <td>very long time . ”</td>\n",
" <td>0.997587</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1014</th>\n",
" <td>a</td>\n",
" <td>little while ago that Niki</td>\n",
" <td>0.997568</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5439</th>\n",
" <td>a</td>\n",
" <td>quick glance at the two</td>\n",
" <td>0.997533</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6396</th>\n",
" <td>a</td>\n",
" <td>lot better after it ’s</td>\n",
" <td>0.997513</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5396</th>\n",
" <td>a</td>\n",
" <td>little bit . This is</td>\n",
" <td>0.997492</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4171</th>\n",
" <td>a</td>\n",
" <td>lot of police officers are</td>\n",
" <td>0.997462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6905</th>\n",
" <td>a</td>\n",
" <td>dozen and a half of</td>\n",
" <td>0.997430</td>\n",
" </tr>\n",
" <tr>\n",
" <th>797</th>\n",
" <td>a</td>\n",
" <td>lot of clothes for him</td>\n",
" <td>0.997427</td>\n",
" </tr>\n",
" <tr>\n",
" <th>177</th>\n",
" <td>a</td>\n",
" <td>year ago . It will</td>\n",
" <td>0.997425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5102</th>\n",
" <td>a</td>\n",
" <td>lot of noise and a</td>\n",
" <td>0.997402</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8333</th>\n",
" <td>a</td>\n",
" <td>good deal , and everyone</td>\n",
" <td>0.997402</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5116</th>\n",
" <td>a</td>\n",
" <td>lot about music , about</td>\n",
" <td>0.997387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8216</th>\n",
" <td>a</td>\n",
" <td>lot of cuts , what</td>\n",
" <td>0.997369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6607</th>\n",
" <td>a</td>\n",
" <td>hundred and ten dollars in</td>\n",
" <td>0.997350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9038</th>\n",
" <td>a</td>\n",
" <td>ragged sigh and rolled her</td>\n",
" <td>0.997283</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1011</th>\n",
" <td>a</td>\n",
" <td>hundred and sixty - eight</td>\n",
" <td>0.997217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4744</th>\n",
" <td>a</td>\n",
" <td>bit of Spanish out here</td>\n",
" <td>0.997159</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2005</th>\n",
" <td>a</td>\n",
" <td>lot of compliments on my</td>\n",
" <td>0.997028</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1718</th>\n",
" <td>a</td>\n",
" <td>few minutes of fuming ,</td>\n",
" <td>0.997002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>201</th>\n",
" <td>a</td>\n",
" <td>mile before he went after</td>\n",
" <td>0.996910</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9049</th>\n",
" <td>a</td>\n",
" <td>deep breath , felt the</td>\n",
" <td>0.996772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4698</th>\n",
" <td>a</td>\n",
" <td>little extra bread . ”</td>\n",
" <td>0.996709</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7953</th>\n",
" <td>a</td>\n",
" <td>lot of laughs . And</td>\n",
" <td>0.996686</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5808</th>\n",
" <td>a</td>\n",
" <td>couple of years ago ,</td>\n",
" <td>0.996542</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1376</th>\n",
" <td>a</td>\n",
" <td>little time in the hospital</td>\n",
" <td>0.996441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6225</th>\n",
" <td>a</td>\n",
" <td>bit of fun with us</td>\n",
" <td>0.996436</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8635</th>\n",
" <td>a</td>\n",
" <td>half from high school —</td>\n",
" <td>0.996401</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9759</th>\n",
" <td>a</td>\n",
" <td>few big financial coups .</td>\n",
" <td>0.996296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3493</th>\n",
" <td>a</td>\n",
" <td>lot of difference between being</td>\n",
" <td>0.996219</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7291</th>\n",
" <td>a</td>\n",
" <td>groan and a scuffle from</td>\n",
" <td>0.996018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5254</th>\n",
" <td>a</td>\n",
" <td>little while before it actually</td>\n",
" <td>0.995970</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6813</th>\n",
" <td>a</td>\n",
" <td>couple of lives over the</td>\n",
" <td>0.995857</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8516</th>\n",
" <td>a</td>\n",
" <td>long time . Joe Quinn</td>\n",
" <td>0.995834</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8685</th>\n",
" <td>a</td>\n",
" <td>lot of work to do</td>\n",
" <td>0.995820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2949</th>\n",
" <td>a</td>\n",
" <td>bit av thinking av .</td>\n",
" <td>0.995721</td>\n",
" </tr>\n",
" <tr>\n",
" <th>923</th>\n",
" <td>a</td>\n",
" <td>little more concealer under the</td>\n",
" <td>0.995636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8255</th>\n",
" <td>a</td>\n",
" <td>Monet or a Manet .</td>\n",
" <td>0.995609</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7222</th>\n",
" <td>a</td>\n",
" <td>good time there ? ”</td>\n",
" <td>0.995538</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" root suffix score\n",
"9289 a few months ago . ” 0.999967\n",
"783 a few seconds earlier . The 0.999876\n",
"3827 a few years older than he 0.999855\n",
"8027 a few seconds later she came 0.999826\n",
"2603 a few months earlier . It 0.999825\n",
"5441 a few minutes later and stick 0.999769\n",
"8084 a thousand years . It had 0.999742\n",
"3088 a hundred years . They had 0.999729\n",
"7039 a few months . Chester would 0.999725\n",
"4314 a bit much . My devotion 0.999716\n",
"1157 a long breath , then said 0.999709\n",
"4004 a hundred more a month than 0.999571\n",
"4757 a few seconds , then a 0.999510\n",
"359 a few minutes to scramble some 0.999504\n",
"3814 a few minutes , \" Garnock 0.999503\n",
"981 a few seconds longer , trying 0.999465\n",
"96 a half months ago , just 0.999464\n",
"9519 a hundred other surreal stories made 0.999448\n",
"4430 a long breath , and uttered 0.999444\n",
"5634 a lot more . ” “ 0.999386\n",
"7672 a hundred years . Raised cattle 0.999336\n",
"1907 a million people are going to 0.999277\n",
"9936 a couple of laps around United 0.999242\n",
"7536 a bit further than I expected 0.999225\n",
"8611 a long pause . “ The 0.999201\n",
"7102 a long pause . ‘ ‘ 0.999182\n",
"817 a slight frown , now waved 0.999128\n",
"3959 a few more minutes had passed 0.999088\n",
"1002 a couple of times . She 0.998981\n",
"3154 a hundred times as big as 0.998945\n",
"5529 a deep breath , gives Daniel 0.998916\n",
"7383 a couple of days . ” 0.998916\n",
"9456 a few hours on the drawing 0.998897\n",
"2282 a short scream . “ His 0.998874\n",
"5625 a mile from her home and 0.998867\n",
"3739 a few more minutes of peace 0.998783\n",
"1109 a deep breath , he straightened 0.998781\n",
"1524 a few inches . “ Get 0.998758\n",
"6853 a few feet away from him 0.998736\n",
"5361 a dozen paces toward the Aes 0.998731\n",
"8942 a month ago . There is 0.998728\n",
"4136 a little farther still from the 0.998670\n",
"2399 a hundred feet wide . A 0.998668\n",
"4969 a couple of homers and had 0.998651\n",
"4123 a thousand thoughts . But the 0.998632\n",
"6558 a little laugh . ) You 0.998591\n",
"7 a mile past the Grange Hall 0.998551\n",
"5474 a wide smile . “ Welcome 0.998479\n",
"5060 a year or two ago , 0.998434\n",
"8115 a few hours . Fear of 0.998430\n",
"1471 a lot more crackbrained to let 0.998369\n",
"1460 a little more serious than that 0.998329\n",
"1996 a few months in case you 0.998292\n",
"2001 a bit of sense . She 0.998250\n",
"4163 a couple of detectives up to 0.998238\n",
"1380 a little more about that sensation 0.998061\n",
"1933 a little too much like something 0.998059\n",
"9695 a few hours on funeral days 0.998055\n",
"8120 a couple of nice books lay 0.998017\n",
"900 a hundred feet up , seven 0.997861\n",
"2950 a lot of fun . She 0.997626\n",
"7013 a lot of fancy eating went 0.997614\n",
"8804 a very long time . ” 0.997587\n",
"1014 a little while ago that Niki 0.997568\n",
"5439 a quick glance at the two 0.997533\n",
"6396 a lot better after it ’s 0.997513\n",
"5396 a little bit . This is 0.997492\n",
"4171 a lot of police officers are 0.997462\n",
"6905 a dozen and a half of 0.997430\n",
"797 a lot of clothes for him 0.997427\n",
"177 a year ago . It will 0.997425\n",
"5102 a lot of noise and a 0.997402\n",
"8333 a good deal , and everyone 0.997402\n",
"5116 a lot about music , about 0.997387\n",
"8216 a lot of cuts , what 0.997369\n",
"6607 a hundred and ten dollars in 0.997350\n",
"9038 a ragged sigh and rolled her 0.997283\n",
"1011 a hundred and sixty - eight 0.997217\n",
"4744 a bit of Spanish out here 0.997159\n",
"2005 a lot of compliments on my 0.997028\n",
"1718 a few minutes of fuming , 0.997002\n",
"201 a mile before he went after 0.996910\n",
"9049 a deep breath , felt the 0.996772\n",
"4698 a little extra bread . ” 0.996709\n",
"7953 a lot of laughs . And 0.996686\n",
"5808 a couple of years ago , 0.996542\n",
"1376 a little time in the hospital 0.996441\n",
"6225 a bit of fun with us 0.996436\n",
"8635 a half from high school — 0.996401\n",
"9759 a few big financial coups . 0.996296\n",
"3493 a lot of difference between being 0.996219\n",
"7291 a groan and a scuffle from 0.996018\n",
"5254 a little while before it actually 0.995970\n",
"6813 a couple of lives over the 0.995857\n",
"8516 a long time . Joe Quinn 0.995834\n",
"8685 a lot of work to do 0.995820\n",
"2949 a bit av thinking av . 0.995721\n",
"923 a little more concealer under the 0.995636\n",
"8255 a Monet or a Manet . 0.995609\n",
"7222 a good time there ? ” 0.995538"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.root=='a'].sort_values('score', ascending=False).head(100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Most confident \"the\" suffixes"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>root</th>\n",
" <th>suffix</th>\n",
" <th>score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8248</th>\n",
" <td>the</td>\n",
" <td>oceans are being fouled so</td>\n",
" <td>0.999990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8073</th>\n",
" <td>the</td>\n",
" <td>dogs were barking again ,</td>\n",
" <td>0.999977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3816</th>\n",
" <td>the</td>\n",
" <td>old trees , the men</td>\n",
" <td>0.999972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2061</th>\n",
" <td>the</td>\n",
" <td>women of the sewing circle</td>\n",
" <td>0.999967</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8435</th>\n",
" <td>the</td>\n",
" <td>other side of the island</td>\n",
" <td>0.999963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2287</th>\n",
" <td>the</td>\n",
" <td>other kids . Poke had</td>\n",
" <td>0.999962</td>\n",
" </tr>\n",
" <tr>\n",
" <th>194</th>\n",
" <td>the</td>\n",
" <td>soldiers were pushing the remaining</td>\n",
" <td>0.999961</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8718</th>\n",
" <td>the</td>\n",
" <td>other side of the craft</td>\n",
" <td>0.999936</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6435</th>\n",
" <td>the</td>\n",
" <td>two children were alone in</td>\n",
" <td>0.999932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9683</th>\n",
" <td>the</td>\n",
" <td>other side of the wavy</td>\n",
" <td>0.999928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2880</th>\n",
" <td>the</td>\n",
" <td>girls were home and there</td>\n",
" <td>0.999927</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1149</th>\n",
" <td>the</td>\n",
" <td>other . About the most</td>\n",
" <td>0.999912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7864</th>\n",
" <td>the</td>\n",
" <td>women wore hats . The</td>\n",
" <td>0.999911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3619</th>\n",
" <td>the</td>\n",
" <td>children are all big now</td>\n",
" <td>0.999911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7061</th>\n",
" <td>the</td>\n",
" <td>other side of youth .</td>\n",
" <td>0.999907</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7415</th>\n",
" <td>the</td>\n",
" <td>sleeves of his rumpled jacket</td>\n",
" <td>0.999899</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3242</th>\n",
" <td>the</td>\n",
" <td>old ones had known what</td>\n",
" <td>0.999897</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7740</th>\n",
" <td>the</td>\n",
" <td>old man were locked in</td>\n",
" <td>0.999891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7869</th>\n",
" <td>the</td>\n",
" <td>other side of his plate</td>\n",
" <td>0.999875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4032</th>\n",
" <td>the</td>\n",
" <td>guards were also in formation</td>\n",
" <td>0.999875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2708</th>\n",
" <td>the</td>\n",
" <td>other children we ’d played</td>\n",
" <td>0.999873</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5978</th>\n",
" <td>the</td>\n",
" <td>breasts of the beauties ,</td>\n",
" <td>0.999873</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130</th>\n",
" <td>the</td>\n",
" <td>other . The key rattles</td>\n",
" <td>0.999872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1687</th>\n",
" <td>the</td>\n",
" <td>old man ’s eyes grow</td>\n",
" <td>0.999863</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5658</th>\n",
" <td>the</td>\n",
" <td>other hand , lived for</td>\n",
" <td>0.999860</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8306</th>\n",
" <td>the</td>\n",
" <td>rooftops , the younger ones</td>\n",
" <td>0.999855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>929</th>\n",
" <td>the</td>\n",
" <td>Italians were too busy to</td>\n",
" <td>0.999855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4755</th>\n",
" <td>the</td>\n",
" <td>colors of the sunset ,</td>\n",
" <td>0.999855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1137</th>\n",
" <td>the</td>\n",
" <td>other side of the river</td>\n",
" <td>0.999853</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1635</th>\n",
" <td>the</td>\n",
" <td>other side of the table</td>\n",
" <td>0.999852</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4018</th>\n",
" <td>the</td>\n",
" <td>old man was alert now</td>\n",
" <td>0.999851</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3895</th>\n",
" <td>the</td>\n",
" <td>bodies . He spoke while</td>\n",
" <td>0.999849</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6849</th>\n",
" <td>the</td>\n",
" <td>old man spat into the</td>\n",
" <td>0.999828</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6857</th>\n",
" <td>the</td>\n",
" <td>other a stable for two</td>\n",
" <td>0.999819</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5489</th>\n",
" <td>the</td>\n",
" <td>ones they ’re worrying about</td>\n",
" <td>0.999817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2220</th>\n",
" <td>the</td>\n",
" <td>old man was n’t daft</td>\n",
" <td>0.999811</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4219</th>\n",
" <td>the</td>\n",
" <td>old man asked . John</td>\n",
" <td>0.999806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5777</th>\n",
" <td>the</td>\n",
" <td>men because their beards grew</td>\n",
" <td>0.999806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3880</th>\n",
" <td>the</td>\n",
" <td>first time was last year</td>\n",
" <td>0.999798</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7022</th>\n",
" <td>the</td>\n",
" <td>birds and the tourists .</td>\n",
" <td>0.999794</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4801</th>\n",
" <td>the</td>\n",
" <td>arms of his cord suit</td>\n",
" <td>0.999793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4170</th>\n",
" <td>the</td>\n",
" <td>Earth residents are rather frontier</td>\n",
" <td>0.999792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2693</th>\n",
" <td>the</td>\n",
" <td>old woman sighed and shook</td>\n",
" <td>0.999782</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5168</th>\n",
" <td>the</td>\n",
" <td>men and the drooping heads</td>\n",
" <td>0.999774</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8352</th>\n",
" <td>the</td>\n",
" <td>children , the servants ,</td>\n",
" <td>0.999754</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7378</th>\n",
" <td>the</td>\n",
" <td>old woman said . “</td>\n",
" <td>0.999753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9926</th>\n",
" <td>the</td>\n",
" <td>old woman said . “</td>\n",
" <td>0.999753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6555</th>\n",
" <td>the</td>\n",
" <td>two boys of that hearth</td>\n",
" <td>0.999742</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8588</th>\n",
" <td>the</td>\n",
" <td>other end of the line</td>\n",
" <td>0.999729</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1710</th>\n",
" <td>the</td>\n",
" <td>last thirty years . But</td>\n",
" <td>0.999723</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1183</th>\n",
" <td>the</td>\n",
" <td>old man . “ You</td>\n",
" <td>0.999719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4335</th>\n",
" <td>the</td>\n",
" <td>old man there , standing</td>\n",
" <td>0.999716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>939</th>\n",
" <td>the</td>\n",
" <td>children off in their wing</td>\n",
" <td>0.999714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5391</th>\n",
" <td>the</td>\n",
" <td>others were easy , throwing</td>\n",
" <td>0.999714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6452</th>\n",
" <td>the</td>\n",
" <td>people are starving . They</td>\n",
" <td>0.999711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8753</th>\n",
" <td>the</td>\n",
" <td>women who were so brutally</td>\n",
" <td>0.999710</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5991</th>\n",
" <td>the</td>\n",
" <td>men seemed to be playing</td>\n",
" <td>0.999703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1092</th>\n",
" <td>the</td>\n",
" <td>other hand , my natural</td>\n",
" <td>0.999703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7527</th>\n",
" <td>the</td>\n",
" <td>two men got into the</td>\n",
" <td>0.999701</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7714</th>\n",
" <td>the</td>\n",
" <td>opposite side of the ravine</td>\n",
" <td>0.999700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1484</th>\n",
" <td>the</td>\n",
" <td>other two behind . Seldon</td>\n",
" <td>0.999696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2235</th>\n",
" <td>the</td>\n",
" <td>worthless creatures themselves , laughing</td>\n",
" <td>0.999691</td>\n",
" </tr>\n",
" <tr>\n",
" <th>571</th>\n",
" <td>the</td>\n",
" <td>other children , the grown</td>\n",
" <td>0.999686</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3905</th>\n",
" <td>the</td>\n",
" <td>walls and towers of the</td>\n",
" <td>0.999686</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8342</th>\n",
" <td>the</td>\n",
" <td>four members of his family</td>\n",
" <td>0.999680</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5342</th>\n",
" <td>the</td>\n",
" <td>rear doors of the truck</td>\n",
" <td>0.999676</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7594</th>\n",
" <td>the</td>\n",
" <td>early days of his connection</td>\n",
" <td>0.999676</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7890</th>\n",
" <td>the</td>\n",
" <td>other women around here who</td>\n",
" <td>0.999672</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8201</th>\n",
" <td>the</td>\n",
" <td>trees , weighing down their</td>\n",
" <td>0.999668</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2155</th>\n",
" <td>the</td>\n",
" <td>other side , in the</td>\n",
" <td>0.999664</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8158</th>\n",
" <td>the</td>\n",
" <td>other half . The children</td>\n",
" <td>0.999627</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8877</th>\n",
" <td>the</td>\n",
" <td>mountains were clear enough for</td>\n",
" <td>0.999626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8112</th>\n",
" <td>the</td>\n",
" <td>first time he ’d done</td>\n",
" <td>0.999620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2485</th>\n",
" <td>the</td>\n",
" <td>first time in her three</td>\n",
" <td>0.999613</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8349</th>\n",
" <td>the</td>\n",
" <td>ones she loved . Uba</td>\n",
" <td>0.999608</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3340</th>\n",
" <td>the</td>\n",
" <td>men and keep off the</td>\n",
" <td>0.999606</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2225</th>\n",
" <td>the</td>\n",
" <td>north side of the lake</td>\n",
" <td>0.999604</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2527</th>\n",
" <td>the</td>\n",
" <td>three children , Wesley ,</td>\n",
" <td>0.999595</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9899</th>\n",
" <td>the</td>\n",
" <td>sides of her body to</td>\n",
" <td>0.999586</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6155</th>\n",
" <td>the</td>\n",
" <td>first week of October .</td>\n",
" <td>0.999585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9109</th>\n",
" <td>the</td>\n",
" <td>girls , the other two</td>\n",
" <td>0.999582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8568</th>\n",
" <td>the</td>\n",
" <td>old building would burst into</td>\n",
" <td>0.999582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5185</th>\n",
" <td>the</td>\n",
" <td>people of the sitra achra</td>\n",
" <td>0.999579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9021</th>\n",
" <td>the</td>\n",
" <td>only person I know who</td>\n",
" <td>0.999576</td>\n",
" </tr>\n",
" <tr>\n",
" <th>946</th>\n",
" <td>the</td>\n",
" <td>exposed edge of the carpet</td>\n",
" <td>0.999574</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>the</td>\n",
" <td>men did figure out their</td>\n",
" <td>0.999574</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2164</th>\n",
" <td>the</td>\n",
" <td>teeth of a comb ,</td>\n",
" <td>0.999572</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1311</th>\n",
" <td>the</td>\n",
" <td>two men ; they turned</td>\n",
" <td>0.999570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>207</th>\n",
" <td>the</td>\n",
" <td>other two times . ”</td>\n",
" <td>0.999566</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2783</th>\n",
" <td>the</td>\n",
" <td>other . “ ‘ Men’</td>\n",
" <td>0.999561</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2976</th>\n",
" <td>the</td>\n",
" <td>girls began to produce their</td>\n",
" <td>0.999561</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4848</th>\n",
" <td>the</td>\n",
" <td>soldiers . Police headquarters behind</td>\n",
" <td>0.999559</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7646</th>\n",
" <td>the</td>\n",
" <td>crenellated walls and battlements of</td>\n",
" <td>0.999551</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4699</th>\n",
" <td>the</td>\n",
" <td>palms of his hands ,</td>\n",
" <td>0.999550</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6244</th>\n",
" <td>the</td>\n",
" <td>opposite side of the street</td>\n",
" <td>0.999549</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6933</th>\n",
" <td>the</td>\n",
" <td>heat of the fire was</td>\n",
" <td>0.999545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6998</th>\n",
" <td>the</td>\n",
" <td>old lord continued : \"</td>\n",
" <td>0.999544</td>\n",
" </tr>\n",
" <tr>\n",
" <th>979</th>\n",
" <td>the</td>\n",
" <td>seven men he sought while</td>\n",
" <td>0.999543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6797</th>\n",
" <td>the</td>\n",
" <td>other with watchful , frightened</td>\n",
" <td>0.999529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6041</th>\n",
" <td>the</td>\n",
" <td>Other People were said to</td>\n",
" <td>0.999529</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" root suffix score\n",
"8248 the oceans are being fouled so 0.999990\n",
"8073 the dogs were barking again , 0.999977\n",
"3816 the old trees , the men 0.999972\n",
"2061 the women of the sewing circle 0.999967\n",
"8435 the other side of the island 0.999963\n",
"2287 the other kids . Poke had 0.999962\n",
"194 the soldiers were pushing the remaining 0.999961\n",
"8718 the other side of the craft 0.999936\n",
"6435 the two children were alone in 0.999932\n",
"9683 the other side of the wavy 0.999928\n",
"2880 the girls were home and there 0.999927\n",
"1149 the other . About the most 0.999912\n",
"7864 the women wore hats . The 0.999911\n",
"3619 the children are all big now 0.999911\n",
"7061 the other side of youth . 0.999907\n",
"7415 the sleeves of his rumpled jacket 0.999899\n",
"3242 the old ones had known what 0.999897\n",
"7740 the old man were locked in 0.999891\n",
"7869 the other side of his plate 0.999875\n",
"4032 the guards were also in formation 0.999875\n",
"2708 the other children we ’d played 0.999873\n",
"5978 the breasts of the beauties , 0.999873\n",
"130 the other . The key rattles 0.999872\n",
"1687 the old man ’s eyes grow 0.999863\n",
"5658 the other hand , lived for 0.999860\n",
"8306 the rooftops , the younger ones 0.999855\n",
"929 the Italians were too busy to 0.999855\n",
"4755 the colors of the sunset , 0.999855\n",
"1137 the other side of the river 0.999853\n",
"1635 the other side of the table 0.999852\n",
"4018 the old man was alert now 0.999851\n",
"3895 the bodies . He spoke while 0.999849\n",
"6849 the old man spat into the 0.999828\n",
"6857 the other a stable for two 0.999819\n",
"5489 the ones they ’re worrying about 0.999817\n",
"2220 the old man was n’t daft 0.999811\n",
"4219 the old man asked . John 0.999806\n",
"5777 the men because their beards grew 0.999806\n",
"3880 the first time was last year 0.999798\n",
"7022 the birds and the tourists . 0.999794\n",
"4801 the arms of his cord suit 0.999793\n",
"4170 the Earth residents are rather frontier 0.999792\n",
"2693 the old woman sighed and shook 0.999782\n",
"5168 the men and the drooping heads 0.999774\n",
"8352 the children , the servants , 0.999754\n",
"7378 the old woman said . “ 0.999753\n",
"9926 the old woman said . “ 0.999753\n",
"6555 the two boys of that hearth 0.999742\n",
"8588 the other end of the line 0.999729\n",
"1710 the last thirty years . But 0.999723\n",
"1183 the old man . “ You 0.999719\n",
"4335 the old man there , standing 0.999716\n",
"939 the children off in their wing 0.999714\n",
"5391 the others were easy , throwing 0.999714\n",
"6452 the people are starving . They 0.999711\n",
"8753 the women who were so brutally 0.999710\n",
"5991 the men seemed to be playing 0.999703\n",
"1092 the other hand , my natural 0.999703\n",
"7527 the two men got into the 0.999701\n",
"7714 the opposite side of the ravine 0.999700\n",
"1484 the other two behind . Seldon 0.999696\n",
"2235 the worthless creatures themselves , laughing 0.999691\n",
"571 the other children , the grown 0.999686\n",
"3905 the walls and towers of the 0.999686\n",
"8342 the four members of his family 0.999680\n",
"5342 the rear doors of the truck 0.999676\n",
"7594 the early days of his connection 0.999676\n",
"7890 the other women around here who 0.999672\n",
"8201 the trees , weighing down their 0.999668\n",
"2155 the other side , in the 0.999664\n",
"8158 the other half . The children 0.999627\n",
"8877 the mountains were clear enough for 0.999626\n",
"8112 the first time he ’d done 0.999620\n",
"2485 the first time in her three 0.999613\n",
"8349 the ones she loved . Uba 0.999608\n",
"3340 the men and keep off the 0.999606\n",
"2225 the north side of the lake 0.999604\n",
"2527 the three children , Wesley , 0.999595\n",
"9899 the sides of her body to 0.999586\n",
"6155 the first week of October . 0.999585\n",
"9109 the girls , the other two 0.999582\n",
"8568 the old building would burst into 0.999582\n",
"5185 the people of the sitra achra 0.999579\n",
"9021 the only person I know who 0.999576\n",
"946 the exposed edge of the carpet 0.999574\n",
"56 the men did figure out their 0.999574\n",
"2164 the teeth of a comb , 0.999572\n",
"1311 the two men ; they turned 0.999570\n",
"207 the other two times . ” 0.999566\n",
"2783 the other . “ ‘ Men’ 0.999561\n",
"2976 the girls began to produce their 0.999561\n",
"4848 the soldiers . Police headquarters behind 0.999559\n",
"7646 the crenellated walls and battlements of 0.999551\n",
"4699 the palms of his hands , 0.999550\n",
"6244 the opposite side of the street 0.999549\n",
"6933 the heat of the fire was 0.999545\n",
"6998 the old lord continued : \" 0.999544\n",
"979 the seven men he sought while 0.999543\n",
"6797 the other with watchful , frightened 0.999529\n",
"6041 the Other People were said to 0.999529"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.root=='the'].sort_values('score', ascending=False).head(100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Most confident \"a\" suffixes, under 0.95"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/dclure/Projects/geovec/env/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>root</th>\n",
" <th>suffix</th>\n",
" <th>score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4984</th>\n",
" <td>a</td>\n",
" <td>little blow like this?’ He</td>\n",
" <td>0.949833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5433</th>\n",
" <td>a</td>\n",
" <td>piece of yellow road machinery</td>\n",
" <td>0.949770</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3784</th>\n",
" <td>a</td>\n",
" <td>subtle , sarcastic smile appeared</td>\n",
" <td>0.949343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5108</th>\n",
" <td>a</td>\n",
" <td>glint of pride flashing in</td>\n",
" <td>0.949291</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2842</th>\n",
" <td>a</td>\n",
" <td>young man named Hans Ulbricht</td>\n",
" <td>0.949106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8597</th>\n",
" <td>a</td>\n",
" <td>moment later he was crawling</td>\n",
" <td>0.948123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4310</th>\n",
" <td>a</td>\n",
" <td>little small talk in way</td>\n",
" <td>0.947356</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8552</th>\n",
" <td>a</td>\n",
" <td>shrewd professional gaze : a</td>\n",
" <td>0.946827</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2270</th>\n",
" <td>a</td>\n",
" <td>good story about how they</td>\n",
" <td>0.946243</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2815</th>\n",
" <td>a</td>\n",
" <td>little secret , George ?</td>\n",
" <td>0.945934</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6494</th>\n",
" <td>a</td>\n",
" <td>sigh of relief when I</td>\n",
" <td>0.945407</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5613</th>\n",
" <td>a</td>\n",
" <td>good wife for her favorite</td>\n",
" <td>0.944975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6652</th>\n",
" <td>a</td>\n",
" <td>smile , and he said</td>\n",
" <td>0.943743</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1385</th>\n",
" <td>a</td>\n",
" <td>very pretty house in Brook</td>\n",
" <td>0.943356</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1863</th>\n",
" <td>a</td>\n",
" <td>look at that myself ,</td>\n",
" <td>0.942566</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4008</th>\n",
" <td>a</td>\n",
" <td>slight dribbling sound . Losario</td>\n",
" <td>0.942566</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8471</th>\n",
" <td>a</td>\n",
" <td>hundred dollars does n’t buy</td>\n",
" <td>0.942005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9394</th>\n",
" <td>a</td>\n",
" <td>smile . “ And thank</td>\n",
" <td>0.941848</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3187</th>\n",
" <td>a</td>\n",
" <td>good thing for all the</td>\n",
" <td>0.940853</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4991</th>\n",
" <td>a</td>\n",
" <td>quick workout , and got</td>\n",
" <td>0.940776</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9062</th>\n",
" <td>a</td>\n",
" <td>nouveau riche merchant named Brocklesby</td>\n",
" <td>0.939918</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3270</th>\n",
" <td>a</td>\n",
" <td>little tang , like the</td>\n",
" <td>0.939661</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8373</th>\n",
" <td>a</td>\n",
" <td>while . Sue does the</td>\n",
" <td>0.939060</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8474</th>\n",
" <td>a</td>\n",
" <td>smile , and he went</td>\n",
" <td>0.938915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3946</th>\n",
" <td>a</td>\n",
" <td>woman scholar , loving scholarship</td>\n",
" <td>0.938646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6011</th>\n",
" <td>a</td>\n",
" <td>piece of Sèvres ware ,</td>\n",
" <td>0.938301</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4678</th>\n",
" <td>a</td>\n",
" <td>long , slender object .</td>\n",
" <td>0.936864</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5083</th>\n",
" <td>a</td>\n",
" <td>vicious headache , and I</td>\n",
" <td>0.936646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4978</th>\n",
" <td>a</td>\n",
" <td>wink — she knew it</td>\n",
" <td>0.936197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4588</th>\n",
" <td>a</td>\n",
" <td>good deal that the Captain</td>\n",
" <td>0.936197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1823</th>\n",
" <td>a</td>\n",
" <td>moment more and he was</td>\n",
" <td>0.935374</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6567</th>\n",
" <td>a</td>\n",
" <td>moment . ” Then he</td>\n",
" <td>0.935372</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3932</th>\n",
" <td>a</td>\n",
" <td>half dozen or more pairs</td>\n",
" <td>0.935354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7921</th>\n",
" <td>a</td>\n",
" <td>beautiful strange animal , himself</td>\n",
" <td>0.934359</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7827</th>\n",
" <td>a</td>\n",
" <td>halt . When he realized</td>\n",
" <td>0.933972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8980</th>\n",
" <td>a</td>\n",
" <td>fine figure in a gown</td>\n",
" <td>0.933487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5801</th>\n",
" <td>a</td>\n",
" <td>form of laughter . I</td>\n",
" <td>0.933253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2548</th>\n",
" <td>a</td>\n",
" <td>nod , Jack took Dwayne</td>\n",
" <td>0.932986</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5794</th>\n",
" <td>a</td>\n",
" <td>deeply disconcerted frown , and</td>\n",
" <td>0.932864</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8156</th>\n",
" <td>a</td>\n",
" <td>series of especially nasty battles</td>\n",
" <td>0.932412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6509</th>\n",
" <td>a</td>\n",
" <td>moment , then handed the</td>\n",
" <td>0.931905</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>a</td>\n",
" <td>moment to fetch an odd</td>\n",
" <td>0.931635</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4380</th>\n",
" <td>a</td>\n",
" <td>better carver than I am</td>\n",
" <td>0.930844</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3551</th>\n",
" <td>a</td>\n",
" <td>real person . I was</td>\n",
" <td>0.930414</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4577</th>\n",
" <td>a</td>\n",
" <td>white silk blouse . She</td>\n",
" <td>0.930383</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7845</th>\n",
" <td>a</td>\n",
" <td>bottle of milk and tore</td>\n",
" <td>0.930157</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5084</th>\n",
" <td>a</td>\n",
" <td>shocked voice . Arch was</td>\n",
" <td>0.929950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9669</th>\n",
" <td>a</td>\n",
" <td>moment , and he said</td>\n",
" <td>0.929857</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6372</th>\n",
" <td>a</td>\n",
" <td>better position to judge as</td>\n",
" <td>0.929775</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9853</th>\n",
" <td>a</td>\n",
" <td>real job . But I</td>\n",
" <td>0.929551</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1555</th>\n",
" <td>a</td>\n",
" <td>small child , with brows</td>\n",
" <td>0.929303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8782</th>\n",
" <td>a</td>\n",
" <td>pencil and an exercise book</td>\n",
" <td>0.929134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2561</th>\n",
" <td>a</td>\n",
" <td>Kett , or a Cade</td>\n",
" <td>0.928444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8014</th>\n",
" <td>a</td>\n",
" <td>small sense of victory in</td>\n",
" <td>0.927543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5766</th>\n",
" <td>a</td>\n",
" <td>single diamond , ” he</td>\n",
" <td>0.927444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6371</th>\n",
" <td>a</td>\n",
" <td>great deal since the day</td>\n",
" <td>0.927357</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6743</th>\n",
" <td>a</td>\n",
" <td>good man . He ’ll</td>\n",
" <td>0.926947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>907</th>\n",
" <td>a</td>\n",
" <td>thunderous roar . He did</td>\n",
" <td>0.926877</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2625</th>\n",
" <td>a</td>\n",
" <td>beautiful mouth , \" he</td>\n",
" <td>0.926769</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4937</th>\n",
" <td>a</td>\n",
" <td>former detective . He might</td>\n",
" <td>0.926376</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1740</th>\n",
" <td>a</td>\n",
" <td>good push and skated away</td>\n",
" <td>0.926225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>970</th>\n",
" <td>a</td>\n",
" <td>large diamond set in emeralds</td>\n",
" <td>0.925235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7035</th>\n",
" <td>a</td>\n",
" <td>grimace of disgust , conscious</td>\n",
" <td>0.924614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1854</th>\n",
" <td>a</td>\n",
" <td>while , neither could the</td>\n",
" <td>0.924505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6487</th>\n",
" <td>a</td>\n",
" <td>nice place there . Cabinet</td>\n",
" <td>0.923784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2440</th>\n",
" <td>a</td>\n",
" <td>careful feminine touch . I</td>\n",
" <td>0.923332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3211</th>\n",
" <td>a</td>\n",
" <td>couple butane lighters , case</td>\n",
" <td>0.923199</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8779</th>\n",
" <td>a</td>\n",
" <td>fool . If he thought</td>\n",
" <td>0.922833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4255</th>\n",
" <td>a</td>\n",
" <td>dark suit and vest ,</td>\n",
" <td>0.922138</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3169</th>\n",
" <td>a</td>\n",
" <td>moment and looked in through</td>\n",
" <td>0.922041</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1222</th>\n",
" <td>a</td>\n",
" <td>brand - new baby .</td>\n",
" <td>0.921554</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8767</th>\n",
" <td>a</td>\n",
" <td>good whack to try to</td>\n",
" <td>0.921478</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2748</th>\n",
" <td>a</td>\n",
" <td>hand to her heart to</td>\n",
" <td>0.921425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5302</th>\n",
" <td>a</td>\n",
" <td>flock of extras dressed as</td>\n",
" <td>0.920985</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5145</th>\n",
" <td>a</td>\n",
" <td>week , just long enough</td>\n",
" <td>0.920944</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6095</th>\n",
" <td>a</td>\n",
" <td>motorcycle accident . ” He</td>\n",
" <td>0.920715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4687</th>\n",
" <td>a</td>\n",
" <td>while and you can have</td>\n",
" <td>0.920532</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8549</th>\n",
" <td>a</td>\n",
" <td>nice neighborhood . ” “</td>\n",
" <td>0.920393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3361</th>\n",
" <td>a</td>\n",
" <td>dozen appointments that made up</td>\n",
" <td>0.920297</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2845</th>\n",
" <td>a</td>\n",
" <td>hurry . Cotton Blossom sat</td>\n",
" <td>0.918670</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1231</th>\n",
" <td>a</td>\n",
" <td>groan that forced his eyes</td>\n",
" <td>0.918364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9094</th>\n",
" <td>a</td>\n",
" <td>very different type of hire</td>\n",
" <td>0.918222</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7988</th>\n",
" <td>a</td>\n",
" <td>week behind wit’ the payoff</td>\n",
" <td>0.917953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>747</th>\n",
" <td>a</td>\n",
" <td>new question , then .</td>\n",
" <td>0.917800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5765</th>\n",
" <td>a</td>\n",
" <td>smile . Willoughby was an</td>\n",
" <td>0.917708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7658</th>\n",
" <td>a</td>\n",
" <td>black guy named Benny .</td>\n",
" <td>0.917578</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9050</th>\n",
" <td>a</td>\n",
" <td>bottle of refrigerated wine uncapped</td>\n",
" <td>0.917310</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2259</th>\n",
" <td>a</td>\n",
" <td>moment . She was teasing</td>\n",
" <td>0.917076</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>a</td>\n",
" <td>sad and shameful thing .</td>\n",
" <td>0.916802</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9100</th>\n",
" <td>a</td>\n",
" <td>sip . It was lukewarm</td>\n",
" <td>0.916800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6152</th>\n",
" <td>a</td>\n",
" <td>shudder through the length of</td>\n",
" <td>0.916744</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1389</th>\n",
" <td>a</td>\n",
" <td>good hot breakfast ready on</td>\n",
" <td>0.916584</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9595</th>\n",
" <td>a</td>\n",
" <td>private pool capped with a</td>\n",
" <td>0.916489</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4239</th>\n",
" <td>a</td>\n",
" <td>straight face . “ Foxy</td>\n",
" <td>0.915012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1653</th>\n",
" <td>a</td>\n",
" <td>small cake . Though it</td>\n",
" <td>0.914208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7635</th>\n",
" <td>a</td>\n",
" <td>moment and then relaxed again</td>\n",
" <td>0.914080</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6645</th>\n",
" <td>a</td>\n",
" <td>moment , then softened as</td>\n",
" <td>0.913196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5682</th>\n",
" <td>a</td>\n",
" <td>long , fine nose ,</td>\n",
" <td>0.913157</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8036</th>\n",
" <td>a</td>\n",
" <td>friend of mine , Curtis</td>\n",
" <td>0.912835</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3490</th>\n",
" <td>a</td>\n",
" <td>friend of mine , Huron</td>\n",
" <td>0.912835</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" root suffix score\n",
"4984 a little blow like this?’ He 0.949833\n",
"5433 a piece of yellow road machinery 0.949770\n",
"3784 a subtle , sarcastic smile appeared 0.949343\n",
"5108 a glint of pride flashing in 0.949291\n",
"2842 a young man named Hans Ulbricht 0.949106\n",
"8597 a moment later he was crawling 0.948123\n",
"4310 a little small talk in way 0.947356\n",
"8552 a shrewd professional gaze : a 0.946827\n",
"2270 a good story about how they 0.946243\n",
"2815 a little secret , George ? 0.945934\n",
"6494 a sigh of relief when I 0.945407\n",
"5613 a good wife for her favorite 0.944975\n",
"6652 a smile , and he said 0.943743\n",
"1385 a very pretty house in Brook 0.943356\n",
"1863 a look at that myself , 0.942566\n",
"4008 a slight dribbling sound . Losario 0.942566\n",
"8471 a hundred dollars does n’t buy 0.942005\n",
"9394 a smile . “ And thank 0.941848\n",
"3187 a good thing for all the 0.940853\n",
"4991 a quick workout , and got 0.940776\n",
"9062 a nouveau riche merchant named Brocklesby 0.939918\n",
"3270 a little tang , like the 0.939661\n",
"8373 a while . Sue does the 0.939060\n",
"8474 a smile , and he went 0.938915\n",
"3946 a woman scholar , loving scholarship 0.938646\n",
"6011 a piece of Sèvres ware , 0.938301\n",
"4678 a long , slender object . 0.936864\n",
"5083 a vicious headache , and I 0.936646\n",
"4978 a wink — she knew it 0.936197\n",
"4588 a good deal that the Captain 0.936197\n",
"1823 a moment more and he was 0.935374\n",
"6567 a moment . ” Then he 0.935372\n",
"3932 a half dozen or more pairs 0.935354\n",
"7921 a beautiful strange animal , himself 0.934359\n",
"7827 a halt . When he realized 0.933972\n",
"8980 a fine figure in a gown 0.933487\n",
"5801 a form of laughter . I 0.933253\n",
"2548 a nod , Jack took Dwayne 0.932986\n",
"5794 a deeply disconcerted frown , and 0.932864\n",
"8156 a series of especially nasty battles 0.932412\n",
"6509 a moment , then handed the 0.931905\n",
"15 a moment to fetch an odd 0.931635\n",
"4380 a better carver than I am 0.930844\n",
"3551 a real person . I was 0.930414\n",
"4577 a white silk blouse . She 0.930383\n",
"7845 a bottle of milk and tore 0.930157\n",
"5084 a shocked voice . Arch was 0.929950\n",
"9669 a moment , and he said 0.929857\n",
"6372 a better position to judge as 0.929775\n",
"9853 a real job . But I 0.929551\n",
"1555 a small child , with brows 0.929303\n",
"8782 a pencil and an exercise book 0.929134\n",
"2561 a Kett , or a Cade 0.928444\n",
"8014 a small sense of victory in 0.927543\n",
"5766 a single diamond , ” he 0.927444\n",
"6371 a great deal since the day 0.927357\n",
"6743 a good man . He ’ll 0.926947\n",
"907 a thunderous roar . He did 0.926877\n",
"2625 a beautiful mouth , \" he 0.926769\n",
"4937 a former detective . He might 0.926376\n",
"1740 a good push and skated away 0.926225\n",
"970 a large diamond set in emeralds 0.925235\n",
"7035 a grimace of disgust , conscious 0.924614\n",
"1854 a while , neither could the 0.924505\n",
"6487 a nice place there . Cabinet 0.923784\n",
"2440 a careful feminine touch . I 0.923332\n",
"3211 a couple butane lighters , case 0.923199\n",
"8779 a fool . If he thought 0.922833\n",
"4255 a dark suit and vest , 0.922138\n",
"3169 a moment and looked in through 0.922041\n",
"1222 a brand - new baby . 0.921554\n",
"8767 a good whack to try to 0.921478\n",
"2748 a hand to her heart to 0.921425\n",
"5302 a flock of extras dressed as 0.920985\n",
"5145 a week , just long enough 0.920944\n",
"6095 a motorcycle accident . ” He 0.920715\n",
"4687 a while and you can have 0.920532\n",
"8549 a nice neighborhood . ” “ 0.920393\n",
"3361 a dozen appointments that made up 0.920297\n",
"2845 a hurry . Cotton Blossom sat 0.918670\n",
"1231 a groan that forced his eyes 0.918364\n",
"9094 a very different type of hire 0.918222\n",
"7988 a week behind wit’ the payoff 0.917953\n",
"747 a new question , then . 0.917800\n",
"5765 a smile . Willoughby was an 0.917708\n",
"7658 a black guy named Benny . 0.917578\n",
"9050 a bottle of refrigerated wine uncapped 0.917310\n",
"2259 a moment . She was teasing 0.917076\n",
"1022 a sad and shameful thing . 0.916802\n",
"9100 a sip . It was lukewarm 0.916800\n",
"6152 a shudder through the length of 0.916744\n",
"1389 a good hot breakfast ready on 0.916584\n",
"9595 a private pool capped with a 0.916489\n",
"4239 a straight face . “ Foxy 0.915012\n",
"1653 a small cake . Though it 0.914208\n",
"7635 a moment and then relaxed again 0.914080\n",
"6645 a moment , then softened as 0.913196\n",
"5682 a long , fine nose , 0.913157\n",
"8036 a friend of mine , Curtis 0.912835\n",
"3490 a friend of mine , Huron 0.912835"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.root=='a'][df.score < 0.95].sort_values('score', ascending=False).head(100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# # Most confident \"the\" suffixes, under 0.95"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/dclure/Projects/geovec/env/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>root</th>\n",
" <th>suffix</th>\n",
" <th>score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8345</th>\n",
" <td>the</td>\n",
" <td>hospital , and Merv the</td>\n",
" <td>0.949972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3786</th>\n",
" <td>the</td>\n",
" <td>field of battle . The</td>\n",
" <td>0.949964</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4121</th>\n",
" <td>the</td>\n",
" <td>English sheepdog , was walking</td>\n",
" <td>0.949903</td>\n",
" </tr>\n",
" <tr>\n",
" <th>814</th>\n",
" <td>the</td>\n",
" <td>rear seat . They could</td>\n",
" <td>0.949901</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5820</th>\n",
" <td>the</td>\n",
" <td>island ; now they knew</td>\n",
" <td>0.949884</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5484</th>\n",
" <td>the</td>\n",
" <td>opposite wall with his knees</td>\n",
" <td>0.949875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1626</th>\n",
" <td>the</td>\n",
" <td>bed and locked his hands</td>\n",
" <td>0.949868</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3065</th>\n",
" <td>the</td>\n",
" <td>English emissaries . A full</td>\n",
" <td>0.949807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7846</th>\n",
" <td>the</td>\n",
" <td>antennae of Her Majesty .</td>\n",
" <td>0.949701</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9898</th>\n",
" <td>the</td>\n",
" <td>answers to Max ’s questions</td>\n",
" <td>0.949672</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4939</th>\n",
" <td>the</td>\n",
" <td>table , struggled to keep</td>\n",
" <td>0.949664</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7362</th>\n",
" <td>the</td>\n",
" <td>silence was stunning . The</td>\n",
" <td>0.949654</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6676</th>\n",
" <td>the</td>\n",
" <td>furniture and pictures ,-- were</td>\n",
" <td>0.949647</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3060</th>\n",
" <td>the</td>\n",
" <td>risk had to be run</td>\n",
" <td>0.949642</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2002</th>\n",
" <td>the</td>\n",
" <td>body bags . And the</td>\n",
" <td>0.949622</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5142</th>\n",
" <td>the</td>\n",
" <td>office to answer the phone</td>\n",
" <td>0.949554</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5710</th>\n",
" <td>the</td>\n",
" <td>holes matters . As with</td>\n",
" <td>0.949532</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8077</th>\n",
" <td>the</td>\n",
" <td>creed of the status quo</td>\n",
" <td>0.949504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5178</th>\n",
" <td>the</td>\n",
" <td>Rasne believed the gods spoke</td>\n",
" <td>0.949466</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8023</th>\n",
" <td>the</td>\n",
" <td>dispersal of the Jews ,</td>\n",
" <td>0.949389</td>\n",
" </tr>\n",
" <tr>\n",
" <th>941</th>\n",
" <td>the</td>\n",
" <td>air where it had been</td>\n",
" <td>0.949371</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2079</th>\n",
" <td>the</td>\n",
" <td>buttoning of her old -</td>\n",
" <td>0.949354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1185</th>\n",
" <td>the</td>\n",
" <td>sky with a diameter about</td>\n",
" <td>0.949348</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5480</th>\n",
" <td>the</td>\n",
" <td>dog a meal , he</td>\n",
" <td>0.949330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6590</th>\n",
" <td>the</td>\n",
" <td>state . “ Well ,</td>\n",
" <td>0.949242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1533</th>\n",
" <td>the</td>\n",
" <td>mythology of his boyhood .</td>\n",
" <td>0.949217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7264</th>\n",
" <td>the</td>\n",
" <td>hell happened to you ?</td>\n",
" <td>0.949155</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1373</th>\n",
" <td>the</td>\n",
" <td>screen , skeleton men wearing</td>\n",
" <td>0.949134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4495</th>\n",
" <td>the</td>\n",
" <td>streets of Standhope , Connecticut</td>\n",
" <td>0.949124</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8300</th>\n",
" <td>the</td>\n",
" <td>elbow , his right arm</td>\n",
" <td>0.949085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9223</th>\n",
" <td>the</td>\n",
" <td>man said : “ I</td>\n",
" <td>0.949071</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2183</th>\n",
" <td>the</td>\n",
" <td>bathroom are pink . There</td>\n",
" <td>0.948927</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3140</th>\n",
" <td>the</td>\n",
" <td>restaurant . “ Who cares</td>\n",
" <td>0.948904</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8139</th>\n",
" <td>the</td>\n",
" <td>hell would I know ,</td>\n",
" <td>0.948901</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3615</th>\n",
" <td>the</td>\n",
" <td>stove . The stove has</td>\n",
" <td>0.948704</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6295</th>\n",
" <td>the</td>\n",
" <td>phone at her desk ,</td>\n",
" <td>0.948690</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>the</td>\n",
" <td>P.I. said was , “</td>\n",
" <td>0.948670</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6685</th>\n",
" <td>the</td>\n",
" <td>woman said . “ Do</td>\n",
" <td>0.948569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91</th>\n",
" <td>the</td>\n",
" <td>bubbles stopped . Ach ,</td>\n",
" <td>0.948532</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3968</th>\n",
" <td>the</td>\n",
" <td>United States and what had</td>\n",
" <td>0.948424</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3049</th>\n",
" <td>the</td>\n",
" <td>adobe soil , he spent</td>\n",
" <td>0.948423</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8042</th>\n",
" <td>the</td>\n",
" <td>covers . “ Yes ,</td>\n",
" <td>0.948412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1283</th>\n",
" <td>the</td>\n",
" <td>reactions of the strangers to</td>\n",
" <td>0.948402</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9943</th>\n",
" <td>the</td>\n",
" <td>seventh apparition of the Virgin</td>\n",
" <td>0.948287</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4924</th>\n",
" <td>the</td>\n",
" <td>town , had long since</td>\n",
" <td>0.948174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5695</th>\n",
" <td>the</td>\n",
" <td>more curtailed are the regenerative</td>\n",
" <td>0.948141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9079</th>\n",
" <td>the</td>\n",
" <td>voices that now whispered in</td>\n",
" <td>0.948136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5690</th>\n",
" <td>the</td>\n",
" <td>open ground to the outer</td>\n",
" <td>0.948092</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8539</th>\n",
" <td>the</td>\n",
" <td>Temple Brick . No matter</td>\n",
" <td>0.948070</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6121</th>\n",
" <td>the</td>\n",
" <td>world . ” Stella was</td>\n",
" <td>0.948009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1085</th>\n",
" <td>the</td>\n",
" <td>powerful magic of the Labyrinth</td>\n",
" <td>0.947993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1692</th>\n",
" <td>the</td>\n",
" <td>Council meeting until Mayor Branno</td>\n",
" <td>0.947993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2450</th>\n",
" <td>the</td>\n",
" <td>solicitors began to speculate as</td>\n",
" <td>0.947989</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5468</th>\n",
" <td>the</td>\n",
" <td>lobbyist was not willing to</td>\n",
" <td>0.947912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7692</th>\n",
" <td>the</td>\n",
" <td>lawn , in through the</td>\n",
" <td>0.947857</td>\n",
" </tr>\n",
" <tr>\n",
" <th>466</th>\n",
" <td>the</td>\n",
" <td>papers gripped in his hand</td>\n",
" <td>0.947635</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8348</th>\n",
" <td>the</td>\n",
" <td>woman said . “ And</td>\n",
" <td>0.947585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1405</th>\n",
" <td>the</td>\n",
" <td>space . She had kidded</td>\n",
" <td>0.947581</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4270</th>\n",
" <td>the</td>\n",
" <td>cupboard , opened the door</td>\n",
" <td>0.947497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9782</th>\n",
" <td>the</td>\n",
" <td>rancor was soon forgotten .</td>\n",
" <td>0.947483</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2963</th>\n",
" <td>the</td>\n",
" <td>deck and spend the morning</td>\n",
" <td>0.947379</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7631</th>\n",
" <td>the</td>\n",
" <td>world back on his shoulders</td>\n",
" <td>0.947343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7717</th>\n",
" <td>the</td>\n",
" <td>place was bustling . Early</td>\n",
" <td>0.947287</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8434</th>\n",
" <td>the</td>\n",
" <td>jewel of the colonies .</td>\n",
" <td>0.947192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>974</th>\n",
" <td>the</td>\n",
" <td>counter . “ That car</td>\n",
" <td>0.947184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>775</th>\n",
" <td>the</td>\n",
" <td>house . She ’d seen</td>\n",
" <td>0.947184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4225</th>\n",
" <td>the</td>\n",
" <td>door locked forever . ”</td>\n",
" <td>0.947054</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7318</th>\n",
" <td>the</td>\n",
" <td>kingdom , wearing the silver</td>\n",
" <td>0.947047</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2732</th>\n",
" <td>the</td>\n",
" <td>hill . When they had</td>\n",
" <td>0.947021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>449</th>\n",
" <td>the</td>\n",
" <td>garage - door opener and</td>\n",
" <td>0.946955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>759</th>\n",
" <td>the</td>\n",
" <td>marble staircase . “ Lagos</td>\n",
" <td>0.946780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6415</th>\n",
" <td>the</td>\n",
" <td>sidewalk of an intersecting road</td>\n",
" <td>0.946779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>526</th>\n",
" <td>the</td>\n",
" <td>windshield : PROPERTY OF THE</td>\n",
" <td>0.946550</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>the</td>\n",
" <td>sitting room , they returned</td>\n",
" <td>0.946547</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4913</th>\n",
" <td>the</td>\n",
" <td>wheel did n’t move .</td>\n",
" <td>0.946422</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3796</th>\n",
" <td>the</td>\n",
" <td>lieutenant was unsure in his</td>\n",
" <td>0.946403</td>\n",
" </tr>\n",
" <tr>\n",
" <th>877</th>\n",
" <td>the</td>\n",
" <td>police come ’cause I live</td>\n",
" <td>0.946302</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2690</th>\n",
" <td>the</td>\n",
" <td>table and she felt an</td>\n",
" <td>0.946285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8228</th>\n",
" <td>the</td>\n",
" <td>carriage swung into narrow Halsted</td>\n",
" <td>0.946276</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7873</th>\n",
" <td>the</td>\n",
" <td>narrowness , the conceit ,</td>\n",
" <td>0.946197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4827</th>\n",
" <td>the</td>\n",
" <td>crucifier , the deceiver ,</td>\n",
" <td>0.946197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2822</th>\n",
" <td>the</td>\n",
" <td>unclaimed , the anonymous ,</td>\n",
" <td>0.946197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4617</th>\n",
" <td>the</td>\n",
" <td>referee had just counted ten</td>\n",
" <td>0.946139</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3822</th>\n",
" <td>the</td>\n",
" <td>door opened , and Ellen</td>\n",
" <td>0.946101</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5410</th>\n",
" <td>the</td>\n",
" <td>neighborhood cats and had been</td>\n",
" <td>0.946044</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7081</th>\n",
" <td>the</td>\n",
" <td>money that used to go</td>\n",
" <td>0.946028</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6357</th>\n",
" <td>the</td>\n",
" <td>back corner of the store</td>\n",
" <td>0.946007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9095</th>\n",
" <td>the</td>\n",
" <td>town , with pitchers mounted</td>\n",
" <td>0.945989</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9752</th>\n",
" <td>the</td>\n",
" <td>top . Now , however</td>\n",
" <td>0.945956</td>\n",
" </tr>\n",
" <tr>\n",
" <th>716</th>\n",
" <td>the</td>\n",
" <td>actual terror , but I</td>\n",
" <td>0.945928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6531</th>\n",
" <td>the</td>\n",
" <td>burgundy banners of the Arbor</td>\n",
" <td>0.945857</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1383</th>\n",
" <td>the</td>\n",
" <td>fundamental energies of the Milky</td>\n",
" <td>0.945857</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7002</th>\n",
" <td>the</td>\n",
" <td>possibility of getting their faces</td>\n",
" <td>0.945845</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6772</th>\n",
" <td>the</td>\n",
" <td>lake . That view was</td>\n",
" <td>0.945836</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4431</th>\n",
" <td>the</td>\n",
" <td>train entered her mind ,</td>\n",
" <td>0.945790</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6599</th>\n",
" <td>the</td>\n",
" <td>river . ” He strode</td>\n",
" <td>0.945764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7599</th>\n",
" <td>the</td>\n",
" <td>living room “ Dag ,</td>\n",
" <td>0.945642</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5559</th>\n",
" <td>the</td>\n",
" <td>household jumped at her command</td>\n",
" <td>0.945621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6819</th>\n",
" <td>the</td>\n",
" <td>lobby of the Cunard Hotel</td>\n",
" <td>0.945564</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1617</th>\n",
" <td>the</td>\n",
" <td>apartment they had rented ,</td>\n",
" <td>0.945482</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" root suffix score\n",
"8345 the hospital , and Merv the 0.949972\n",
"3786 the field of battle . The 0.949964\n",
"4121 the English sheepdog , was walking 0.949903\n",
"814 the rear seat . They could 0.949901\n",
"5820 the island ; now they knew 0.949884\n",
"5484 the opposite wall with his knees 0.949875\n",
"1626 the bed and locked his hands 0.949868\n",
"3065 the English emissaries . A full 0.949807\n",
"7846 the antennae of Her Majesty . 0.949701\n",
"9898 the answers to Max ’s questions 0.949672\n",
"4939 the table , struggled to keep 0.949664\n",
"7362 the silence was stunning . The 0.949654\n",
"6676 the furniture and pictures ,-- were 0.949647\n",
"3060 the risk had to be run 0.949642\n",
"2002 the body bags . And the 0.949622\n",
"5142 the office to answer the phone 0.949554\n",
"5710 the holes matters . As with 0.949532\n",
"8077 the creed of the status quo 0.949504\n",
"5178 the Rasne believed the gods spoke 0.949466\n",
"8023 the dispersal of the Jews , 0.949389\n",
"941 the air where it had been 0.949371\n",
"2079 the buttoning of her old - 0.949354\n",
"1185 the sky with a diameter about 0.949348\n",
"5480 the dog a meal , he 0.949330\n",
"6590 the state . “ Well , 0.949242\n",
"1533 the mythology of his boyhood . 0.949217\n",
"7264 the hell happened to you ? 0.949155\n",
"1373 the screen , skeleton men wearing 0.949134\n",
"4495 the streets of Standhope , Connecticut 0.949124\n",
"8300 the elbow , his right arm 0.949085\n",
"9223 the man said : “ I 0.949071\n",
"2183 the bathroom are pink . There 0.948927\n",
"3140 the restaurant . “ Who cares 0.948904\n",
"8139 the hell would I know , 0.948901\n",
"3615 the stove . The stove has 0.948704\n",
"6295 the phone at her desk , 0.948690\n",
"64 the P.I. said was , “ 0.948670\n",
"6685 the woman said . “ Do 0.948569\n",
"91 the bubbles stopped . Ach , 0.948532\n",
"3968 the United States and what had 0.948424\n",
"3049 the adobe soil , he spent 0.948423\n",
"8042 the covers . “ Yes , 0.948412\n",
"1283 the reactions of the strangers to 0.948402\n",
"9943 the seventh apparition of the Virgin 0.948287\n",
"4924 the town , had long since 0.948174\n",
"5695 the more curtailed are the regenerative 0.948141\n",
"9079 the voices that now whispered in 0.948136\n",
"5690 the open ground to the outer 0.948092\n",
"8539 the Temple Brick . No matter 0.948070\n",
"6121 the world . ” Stella was 0.948009\n",
"1085 the powerful magic of the Labyrinth 0.947993\n",
"1692 the Council meeting until Mayor Branno 0.947993\n",
"2450 the solicitors began to speculate as 0.947989\n",
"5468 the lobbyist was not willing to 0.947912\n",
"7692 the lawn , in through the 0.947857\n",
"466 the papers gripped in his hand 0.947635\n",
"8348 the woman said . “ And 0.947585\n",
"1405 the space . She had kidded 0.947581\n",
"4270 the cupboard , opened the door 0.947497\n",
"9782 the rancor was soon forgotten . 0.947483\n",
"2963 the deck and spend the morning 0.947379\n",
"7631 the world back on his shoulders 0.947343\n",
"7717 the place was bustling . Early 0.947287\n",
"8434 the jewel of the colonies . 0.947192\n",
"974 the counter . “ That car 0.947184\n",
"775 the house . She ’d seen 0.947184\n",
"4225 the door locked forever . ” 0.947054\n",
"7318 the kingdom , wearing the silver 0.947047\n",
"2732 the hill . When they had 0.947021\n",
"449 the garage - door opener and 0.946955\n",
"759 the marble staircase . “ Lagos 0.946780\n",
"6415 the sidewalk of an intersecting road 0.946779\n",
"526 the windshield : PROPERTY OF THE 0.946550\n",
"0 the sitting room , they returned 0.946547\n",
"4913 the wheel did n’t move . 0.946422\n",
"3796 the lieutenant was unsure in his 0.946403\n",
"877 the police come ’cause I live 0.946302\n",
"2690 the table and she felt an 0.946285\n",
"8228 the carriage swung into narrow Halsted 0.946276\n",
"7873 the narrowness , the conceit , 0.946197\n",
"4827 the crucifier , the deceiver , 0.946197\n",
"2822 the unclaimed , the anonymous , 0.946197\n",
"4617 the referee had just counted ten 0.946139\n",
"3822 the door opened , and Ellen 0.946101\n",
"5410 the neighborhood cats and had been 0.946044\n",
"7081 the money that used to go 0.946028\n",
"6357 the back corner of the store 0.946007\n",
"9095 the town , with pitchers mounted 0.945989\n",
"9752 the top . Now , however 0.945956\n",
"716 the actual terror , but I 0.945928\n",
"6531 the burgundy banners of the Arbor 0.945857\n",
"1383 the fundamental energies of the Milky 0.945857\n",
"7002 the possibility of getting their faces 0.945845\n",
"6772 the lake . That view was 0.945836\n",
"4431 the train entered her mind , 0.945790\n",
"6599 the river . ” He strode 0.945764\n",
"7599 the living room “ Dag , 0.945642\n",
"5559 the household jumped at her command 0.945621\n",
"6819 the lobby of the Cunard Hotel 0.945564\n",
"1617 the apartment they had rented , 0.945482"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.root=='the'][df.score < 0.95].sort_values('score', ascending=False).head(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment