JonathanReeve/gray-keats.ipynb

## gray-keats.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "gray-keats.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyMXGPMmMaLCzrZRUMZztHnT",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/JonathanReeve/5b7e1c43e5f561743fdfc84ab97aa9e2/gray-keats.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "m_oHp-7hh45a"
      },
      "source": [
        "# Do John Gray and John Keats Share Poetic Vocabulary? "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IZtliVTmbum1"
      },
      "source": [
        "import json\n",
        "import requests\n",
        "import nltk\n",
        "import pandas as pd"
      ],
      "execution_count": 49,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sM3H4xURfIY5",
        "outputId": "d44adf06-c30e-4144-d270-4acf50a96c27"
      },
      "source": [
        "nltk.download('punkt') # Download data needed for tokenizer"
      ],
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
            "[nltk_data]   Package punkt is already up-to-date!\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "oz_R9AaMgNex",
        "outputId": "479591ff-74be-4c9e-bb5d-f0f27a03c4d6"
      },
      "source": [
        "nltk.download('stopwords') # Download list of English stop words"
      ],
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
            "[nltk_data]   Package stopwords is already up-to-date!\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 58
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qN9wuQRMgg0m"
      },
      "source": [
        "stops = nltk.corpus.stopwords.words('english')"
      ],
      "execution_count": 45,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RodzKzDmiKVR"
      },
      "source": [
        "Get two test texts from Project Gutenberg. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_4V2cf5IcQGI"
      },
      "source": [
        "grayRaw = requests.get('https://www.gutenberg.org/ebooks/30357.txt.utf-8').text\n",
        "keatsRaw = requests.get('https://www.gutenberg.org/cache/epub/23684/pg23684.txt').text"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bCHMi3bTiOEP"
      },
      "source": [
        "Trim them so that we're only dealing with poems, not paratext. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "82QvqkAJdOLy"
      },
      "source": [
        "keats = keatsRaw[keatsRaw.find('LAMIA,'):keatsRaw.find('Celestial     *')]\n",
        "gray = grayRaw[grayRaw.find('ELEGY WRITTEN IN A COUNTRY CHURCHYARD.'):grayRaw.find('_The Bard_, 53.')]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zwdgKIKjiSb6"
      },
      "source": [
        "Count the words, also lowercasing them and removing punctuation. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VU8-MSMVdqLJ"
      },
      "source": [
        "keatsFreqs = nltk.FreqDist([w.lower() for w in nltk.word_tokenize(keats) if w.isalpha()])\n",
        "grayFreqs = nltk.FreqDist([w.lower() for w in nltk.word_tokenize(gray) if w.isalpha()])"
      ],
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4PHYiBNXibx0"
      },
      "source": [
        "Find the words they have in common. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "y44OdWiafm9D"
      },
      "source": [
        "commonVocab = set(keatsFreqs).intersection(set(grayFreqs))"
      ],
      "execution_count": 34,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "uQf-QWN0ifu-"
      },
      "source": [
        "Put it in a nicer format. Remove stop words. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Tf56nWFefpSt"
      },
      "source": [
        "commonCounts = {w: {\"keats\": keatsFreqs[w], \"gray\": grayFreqs[w]} for w in commonVocab if w not in stops}"
      ],
      "execution_count": 47,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "pi24ZgTRipmc"
      },
      "source": [
        "Sort by frequencies, descending, for both. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 669
        },
        "id": "Q_7_yQ6dgYOc",
        "outputId": "df3592a0-91fd-4942-ba96-0b287cb983cc"
      },
      "source": [
        "df = pd.DataFrame(commonCounts).T\n",
        "df.sort_values('keats', ascending=False).head(20)"
      ],
      "execution_count": 55,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>keats</th>\n",
              "      <th>gray</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>thou</th>\n",
              "      <td>98</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>upon</th>\n",
              "      <td>80</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>thy</th>\n",
              "      <td>79</td>\n",
              "      <td>36</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>like</th>\n",
              "      <td>72</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>eyes</th>\n",
              "      <td>70</td>\n",
              "      <td>9</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>one</th>\n",
              "      <td>65</td>\n",
              "      <td>4</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>love</th>\n",
              "      <td>59</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>still</th>\n",
              "      <td>56</td>\n",
              "      <td>6</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>sweet</th>\n",
              "      <td>53</td>\n",
              "      <td>3</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>thee</th>\n",
              "      <td>51</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>fair</th>\n",
              "      <td>46</td>\n",
              "      <td>4</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>ye</th>\n",
              "      <td>46</td>\n",
              "      <td>12</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>many</th>\n",
              "      <td>40</td>\n",
              "      <td>9</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>let</th>\n",
              "      <td>39</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>old</th>\n",
              "      <td>38</td>\n",
              "      <td>4</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>see</th>\n",
              "      <td>38</td>\n",
              "      <td>4</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>thus</th>\n",
              "      <td>37</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>heart</th>\n",
              "      <td>37</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>soft</th>\n",
              "      <td>37</td>\n",
              "      <td>4</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>came</th>\n",
              "      <td>36</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "       keats  gray\n",
              "thou      98     8\n",
              "upon      80     7\n",
              "thy       79    36\n",
              "like      72     2\n",
              "eyes      70     9\n",
              "one       65     4\n",
              "love      59     5\n",
              "still     56     6\n",
              "sweet     53     3\n",
              "thee      51     8\n",
              "fair      46     4\n",
              "ye        46    12\n",
              "many      40     9\n",
              "let       39     1\n",
              "old       38     4\n",
              "see       38     4\n",
              "thus      37     1\n",
              "heart     37     7\n",
              "soft      37     4\n",
              "came      36     2"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 55
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 669
        },
        "id": "PTtuK3t_hMIu",
        "outputId": "0acdc36e-b984-4227-fa2b-562d3e8646eb"
      },
      "source": [
        "df.sort_values('gray', ascending=False).head(20)"
      ],
      "execution_count": 56,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>keats</th>\n",
              "      <th>gray</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>thy</th>\n",
              "      <td>79</td>\n",
              "      <td>36</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>shall</th>\n",
              "      <td>26</td>\n",
              "      <td>13</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>ye</th>\n",
              "      <td>46</td>\n",
              "      <td>12</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>yet</th>\n",
              "      <td>29</td>\n",
              "      <td>10</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>many</th>\n",
              "      <td>40</td>\n",
              "      <td>9</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>soul</th>\n",
              "      <td>15</td>\n",
              "      <td>9</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>eyes</th>\n",
              "      <td>70</td>\n",
              "      <td>9</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>voice</th>\n",
              "      <td>31</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>thee</th>\n",
              "      <td>51</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>air</th>\n",
              "      <td>21</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>beneath</th>\n",
              "      <td>12</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>thou</th>\n",
              "      <td>98</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>eye</th>\n",
              "      <td>14</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>shade</th>\n",
              "      <td>13</td>\n",
              "      <td>8</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>upon</th>\n",
              "      <td>80</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>whose</th>\n",
              "      <td>29</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>ii</th>\n",
              "      <td>4</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>fate</th>\n",
              "      <td>5</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>youth</th>\n",
              "      <td>10</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>day</th>\n",
              "      <td>23</td>\n",
              "      <td>7</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "         keats  gray\n",
              "thy         79    36\n",
              "shall       26    13\n",
              "ye          46    12\n",
              "yet         29    10\n",
              "many        40     9\n",
              "soul        15     9\n",
              "eyes        70     9\n",
              "voice       31     8\n",
              "thee        51     8\n",
              "air         21     8\n",
              "beneath     12     8\n",
              "thou        98     8\n",
              "eye         14     8\n",
              "shade       13     8\n",
              "upon        80     7\n",
              "whose       29     7\n",
              "ii           4     7\n",
              "fate         5     7\n",
              "youth       10     7\n",
              "day         23     7"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 56
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "B70lv52VhwQ0"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "gray-keats.ipynb",
	"provenance": [],
	"authorship_tag": "ABX9TyMXGPMmMaLCzrZRUMZztHnT",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/JonathanReeve/5b7e1c43e5f561743fdfc84ab97aa9e2/gray-keats.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "m_oHp-7hh45a"
	},
	"source": [
	"# Do John Gray and John Keats Share Poetic Vocabulary? "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "IZtliVTmbum1"
	},
	"source": [
	"import json\n",
	"import requests\n",
	"import nltk\n",
	"import pandas as pd"
	],
	"execution_count": 49,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "sM3H4xURfIY5",
	"outputId": "d44adf06-c30e-4144-d270-4acf50a96c27"
	},
	"source": [
	"nltk.download('punkt') # Download data needed for tokenizer"
	],
	"execution_count": 57,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
	"[nltk_data] Package punkt is already up-to-date!\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"True"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 57
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "oz_R9AaMgNex",
	"outputId": "479591ff-74be-4c9e-bb5d-f0f27a03c4d6"
	},
	"source": [
	"nltk.download('stopwords') # Download list of English stop words"
	],
	"execution_count": 58,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
	"[nltk_data] Package stopwords is already up-to-date!\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"True"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 58
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "qN9wuQRMgg0m"
	},
	"source": [
	"stops = nltk.corpus.stopwords.words('english')"
	],
	"execution_count": 45,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "RodzKzDmiKVR"
	},
	"source": [
	"Get two test texts from Project Gutenberg. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "_4V2cf5IcQGI"
	},
	"source": [
	"grayRaw = requests.get('https://www.gutenberg.org/ebooks/30357.txt.utf-8').text\n",
	"keatsRaw = requests.get('https://www.gutenberg.org/cache/epub/23684/pg23684.txt').text"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "bCHMi3bTiOEP"
	},
	"source": [
	"Trim them so that we're only dealing with poems, not paratext. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "82QvqkAJdOLy"
	},
	"source": [
	"keats = keatsRaw[keatsRaw.find('LAMIA,'):keatsRaw.find('Celestial *')]\n",
	"gray = grayRaw[grayRaw.find('ELEGY WRITTEN IN A COUNTRY CHURCHYARD.'):grayRaw.find('_The Bard_, 53.')]"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "zwdgKIKjiSb6"
	},
	"source": [
	"Count the words, also lowercasing them and removing punctuation. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "VU8-MSMVdqLJ"
	},
	"source": [
	"keatsFreqs = nltk.FreqDist([w.lower() for w in nltk.word_tokenize(keats) if w.isalpha()])\n",
	"grayFreqs = nltk.FreqDist([w.lower() for w in nltk.word_tokenize(gray) if w.isalpha()])"
	],
	"execution_count": 27,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "4PHYiBNXibx0"
	},
	"source": [
	"Find the words they have in common. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "y44OdWiafm9D"
	},
	"source": [
	"commonVocab = set(keatsFreqs).intersection(set(grayFreqs))"
	],
	"execution_count": 34,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "uQf-QWN0ifu-"
	},
	"source": [
	"Put it in a nicer format. Remove stop words. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Tf56nWFefpSt"
	},
	"source": [
	"commonCounts = {w: {\"keats\": keatsFreqs[w], \"gray\": grayFreqs[w]} for w in commonVocab if w not in stops}"
	],
	"execution_count": 47,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "pi24ZgTRipmc"
	},
	"source": [
	"Sort by frequencies, descending, for both. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 669
	},
	"id": "Q_7_yQ6dgYOc",
	"outputId": "df3592a0-91fd-4942-ba96-0b287cb983cc"
	},
	"source": [
	"df = pd.DataFrame(commonCounts).T\n",
	"df.sort_values('keats', ascending=False).head(20)"
	],
	"execution_count": 55,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>keats</th>\n",
	" <th>gray</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>thou</th>\n",
	" <td>98</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>upon</th>\n",
	" <td>80</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>thy</th>\n",
	" <td>79</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>like</th>\n",
	" <td>72</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>eyes</th>\n",
	" <td>70</td>\n",
	" <td>9</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>one</th>\n",
	" <td>65</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>love</th>\n",
	" <td>59</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>still</th>\n",
	" <td>56</td>\n",
	" <td>6</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>sweet</th>\n",
	" <td>53</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>thee</th>\n",
	" <td>51</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>fair</th>\n",
	" <td>46</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>ye</th>\n",
	" <td>46</td>\n",
	" <td>12</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>many</th>\n",
	" <td>40</td>\n",
	" <td>9</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>let</th>\n",
	" <td>39</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>old</th>\n",
	" <td>38</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>see</th>\n",
	" <td>38</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>thus</th>\n",
	" <td>37</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>heart</th>\n",
	" <td>37</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>soft</th>\n",
	" <td>37</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>came</th>\n",
	" <td>36</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" keats gray\n",
	"thou 98 8\n",
	"upon 80 7\n",
	"thy 79 36\n",
	"like 72 2\n",
	"eyes 70 9\n",
	"one 65 4\n",
	"love 59 5\n",
	"still 56 6\n",
	"sweet 53 3\n",
	"thee 51 8\n",
	"fair 46 4\n",
	"ye 46 12\n",
	"many 40 9\n",
	"let 39 1\n",
	"old 38 4\n",
	"see 38 4\n",
	"thus 37 1\n",
	"heart 37 7\n",
	"soft 37 4\n",
	"came 36 2"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 55
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 669
	},
	"id": "PTtuK3t_hMIu",
	"outputId": "0acdc36e-b984-4227-fa2b-562d3e8646eb"
	},
	"source": [
	"df.sort_values('gray', ascending=False).head(20)"
	],
	"execution_count": 56,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>keats</th>\n",
	" <th>gray</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>thy</th>\n",
	" <td>79</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>shall</th>\n",
	" <td>26</td>\n",
	" <td>13</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>ye</th>\n",
	" <td>46</td>\n",
	" <td>12</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>yet</th>\n",
	" <td>29</td>\n",
	" <td>10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>many</th>\n",
	" <td>40</td>\n",
	" <td>9</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>soul</th>\n",
	" <td>15</td>\n",
	" <td>9</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>eyes</th>\n",
	" <td>70</td>\n",
	" <td>9</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>voice</th>\n",
	" <td>31</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>thee</th>\n",
	" <td>51</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>air</th>\n",
	" <td>21</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>beneath</th>\n",
	" <td>12</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>thou</th>\n",
	" <td>98</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>eye</th>\n",
	" <td>14</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>shade</th>\n",
	" <td>13</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>upon</th>\n",
	" <td>80</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>whose</th>\n",
	" <td>29</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>ii</th>\n",
	" <td>4</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>fate</th>\n",
	" <td>5</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>youth</th>\n",
	" <td>10</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>day</th>\n",
	" <td>23</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" keats gray\n",
	"thy 79 36\n",
	"shall 26 13\n",
	"ye 46 12\n",
	"yet 29 10\n",
	"many 40 9\n",
	"soul 15 9\n",
	"eyes 70 9\n",
	"voice 31 8\n",
	"thee 51 8\n",
	"air 21 8\n",
	"beneath 12 8\n",
	"thou 98 8\n",
	"eye 14 8\n",
	"shade 13 8\n",
	"upon 80 7\n",
	"whose 29 7\n",
	"ii 4 7\n",
	"fate 5 7\n",
	"youth 10 7\n",
	"day 23 7"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 56
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "B70lv52VhwQ0"
	},
	"source": [
	""
	],
	"execution_count": null,
	"outputs": []
	}
	]
	}