Skip to content

Instantly share code, notes, and snippets.

@JonathanReeve
Created November 20, 2021 00:37
Show Gist options
  • Save JonathanReeve/c252a42af305e887886f0267897a153b to your computer and use it in GitHub Desktop.
Save JonathanReeve/c252a42af305e887886f0267897a153b to your computer and use it in GitHub Desktop.
Find patterns in text
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b73ea10c-dec7-4ece-9e31-e2cfa1ed4434",
"metadata": {},
"outputs": [],
"source": [
"import nltk"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "244d0f39-5a8c-4dbd-8ef3-a4f2b7596bb0",
"metadata": {},
"outputs": [],
"source": [
"from nltk.corpus import gutenberg"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "f9c6d567-81b3-4f68-85fb-355338ea18be",
"metadata": {},
"outputs": [],
"source": [
"emmaTokens = gutenberg.words('austen-emma.txt')"
]
},
{
"cell_type": "code",
"execution_count": 164,
"id": "001fdc82-9a0c-47e8-bf2d-5506fa1220fd",
"metadata": {},
"outputs": [],
"source": [
"emmaTokens = [w for w in emmaTokens if w.isalpha()] # Restrict to alphabetic words"
]
},
{
"cell_type": "code",
"execution_count": 165,
"id": "a28c7999-48ff-459b-84c2-00c2686f7c3b",
"metadata": {},
"outputs": [],
"source": [
"emmaTags = nltk.pos_tag(emmaTokens)"
]
},
{
"cell_type": "code",
"execution_count": 190,
"id": "1307c76b-b1c8-48cb-8cca-51190ca24699",
"metadata": {},
"outputs": [],
"source": [
"def findPattern(needles, haystack):\n",
" \"\"\" \n",
" Input: \n",
" - POS-tagged tokens \"haystack\", [(\"a\", \"DET\"), (\"list\", \"NN\") ... ]\n",
" - List of words/POSes, \"needles\", e.g. [\"a\", \"very\", \"JJ\", \"man\"]\n",
" \n",
" Output: \n",
" - a list of matching patterns, e.g., [('a', 'DT'), ('comfortable', 'JJ'), ('home', 'NN')]\n",
" \"\"\" \n",
" matches = []\n",
" for i in range(len(haystack)-len(needles)):\n",
" # Gets a ngram of the same length as our pattern words. \n",
" miniHaystack = [haystack[i+j] for j in range(len(needles))]\n",
" # Now let's compare all of our items, pairwise\n",
" for i in range(len(needles)): \n",
" word, pos = miniHaystack[i]\n",
" needle = needles[i]\n",
" if needle != word and needle != pos: \n",
" break # Stop if something doesn't match\n",
" if i == len(needles)-1: # We're at the end. Success! \n",
" matches.append(miniHaystack)\n",
" return matches\n",
"\n",
"def getPatternStats(patterns): \n",
" \"\"\" \n",
" Input: A list of tuples as returned by findPattern()\n",
" \"\"\"\n",
" pats = [str(pat) for pat in patterns]\n",
" return nltk.FreqDist(pats).most_common()\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 191,
"id": "1f01c7f3-b080-4c37-95d4-a10726579c7c",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[[('the', 'DT'), ('greatest', 'JJS'), ('amusement', 'NN')],\n",
" [('the', 'DT'), ('pleasantest', 'JJS'), ('proof', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')],\n",
" [('the', 'DT'), ('fairest', 'JJS'), ('way', 'NN')],\n",
" [('the', 'DT'), ('earnest', 'JJS'), ('pressing', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('size', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('intermission', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('suspecting', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('judge', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('friend', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n",
" [('the', 'DT'), ('luckiest', 'JJS'), ('woman', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('antidote', 'NN')],\n",
" [('the', 'DT'), ('completest', 'JJS'), ('proof', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('charade', 'NN')],\n",
" [('the', 'DT'), ('tenderest', 'JJS'), ('spirit', 'NN')],\n",
" [('the', 'DT'), ('handsomest', 'JJS'), ('Henry', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n",
" [('the', 'DT'), ('proudest', 'JJS'), ('moment', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('delay', 'NN')],\n",
" [('the', 'DT'), ('longest', 'JJS'), ('part', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('service', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('inconvenience', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('inclination', 'NN')],\n",
" [('the', 'DT'), ('happiest', 'JJS'), ('humour', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('absurdity', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('alacrity', 'NN')],\n",
" [('the', 'DT'), ('worst', 'JJS'), ('weather', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('comfort', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('compassion', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('difficulty', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('attention', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('society', 'NN')],\n",
" [('the', 'DT'), ('worst', 'JJS'), ('lay', 'NN')],\n",
" [('the', 'DT'), ('worst', 'JJS'), ('judge', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n",
" [('the', 'DT'), ('warmest', 'JJS'), ('corner', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('backward', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('presence', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('value', 'NN')],\n",
" [('the', 'DT'), ('likeliest', 'JJS'), ('evil', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('grease', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('way', 'NN')],\n",
" [('the', 'DT'), ('merest', 'JJS'), ('commonplace', 'NN')],\n",
" [('the', 'DT'), ('commonest', 'JJS'), ('process', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('curiosity', 'NN')],\n",
" [('the', 'DT'), ('wisest', 'JJS'), ('measure', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('obligation', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('purpose', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('difficulty', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('information', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('skill', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('man', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('good', 'NN')],\n",
" [('the', 'DT'), ('busiest', 'JJS'), ('part', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('attention', 'NN')],\n",
" [('the', 'DT'), ('fondest', 'JJS'), ('parent', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('promise', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('ashamed', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('zeal', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('judge', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('scheme', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('draught', 'NN')],\n",
" [('the', 'DT'), ('oddest', 'JJS'), ('creature', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('compliment', 'NN')],\n",
" [('the', 'DT'), ('latest', 'JJS'), ('interest', 'NN')],\n",
" [('the', 'DT'), ('strongest', 'JJS'), ('reproach', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('friend', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('offer', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('society', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('satisfaction', 'NN')],\n",
" [('the', 'DT'), ('safest', 'JJS'), ('model', 'NN')],\n",
" [('the', 'DT'), ('plainest', 'JJS'), ('spoken', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('satisfaction', 'NN')],\n",
" [('the', 'DT'), ('strongest', 'JJS'), ('Isabella', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('distress', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('gentleman', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('dislike', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n",
" [('the', 'DT'), ('longest', 'JJS'), ('weather', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('confidence', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('conceit', 'NN')],\n",
" [('the', 'DT'), ('coldest', 'JJS'), ('heart', 'NN')],\n",
" [('the', 'DT'), ('steadiest', 'JJS'), ('brain', 'NN')],\n",
" [('the', 'DT'), ('slightest', 'JJS'), ('particular', 'NN')],\n",
" [('the', 'DT'), ('softest', 'JJS'), ('cotton', 'NN')],\n",
" [('the', 'DT'), ('oddest', 'JJS'), ('dreams', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('thing', 'NN')],\n",
" [('the', 'DT'), ('warmest', 'JJS'), ('heart', 'NN')],\n",
" [('the', 'DT'), ('honest', 'JJS'), ('pride', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('time', 'NN')],\n",
" [('the', 'DT'), ('pleasantest', 'JJS'), ('part', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('part', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('companion', 'NN')],\n",
" [('the', 'DT'), ('warmest', 'JJS'), ('concern', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('service', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('degree', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n",
" [('the', 'DT'), ('deepest', 'JJS'), ('hue', 'NN')],\n",
" [('the', 'DT'), ('oddest', 'JJS'), ('news', 'NN')],\n",
" [('the', 'DT'), ('slightest', 'JJS'), ('suspicion', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('regard', 'NN')],\n",
" [('the', 'DT'), ('closest', 'JJS'), ('agreement', 'NN')],\n",
" [('the', 'DT'), ('closest', 'JJS'), ('observance', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('relief', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('advantage', 'NN')],\n",
" [('the', 'DT'), ('quickest', 'JJS'), ('arrangement', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('nature', 'NN')],\n",
" [('the', 'DT'), ('warmest', 'JJS'), ('friendship', 'NN')],\n",
" [('the', 'DT'), ('happiest', 'JJS'), ('dream', 'NN')],\n",
" [('the', 'DT'), ('slightest', 'JJS'), ('perception', 'NN')],\n",
" [('the', 'DT'), ('slightest', 'JJS'), ('inclination', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('respect', 'NN')],\n",
" [('the', 'DT'), ('warmest', 'JJS'), ('friendship', 'NN')],\n",
" [('the', 'DT'), ('deepest', 'JJS'), ('humiliation', 'NN')],\n",
" [('the', 'DT'), ('least', 'JJS'), ('suspicion', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('unhappiness', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('reply', 'NN')],\n",
" [('the', 'DT'), ('smallest', 'JJS'), ('remark', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('credit', 'NN')],\n",
" [('the', 'DT'), ('tenderest', 'JJS'), ('affection', 'NN')],\n",
" [('the', 'DT'), ('greatest', 'JJS'), ('humanity', 'NN')],\n",
" [('the', 'DT'), ('strongest', 'JJS'), ('approbation', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('importance', 'NN')],\n",
" [('the', 'DT'), ('highest', 'JJS'), ('promise', 'NN')],\n",
" [('the', 'DT'), ('best', 'JJS'), ('treatment', 'NN')],\n",
" [('the', 'DT'), ('warmest', 'JJS'), ('interest', 'NN')],\n",
" [('the', 'DT'), ('slightest', 'JJS'), ('degree', 'NN')],\n",
" [('the', 'DT'), ('fullest', 'JJS'), ('exultation', 'NN')],\n",
" [('the', 'DT'), ('luckiest', 'JJS'), ('creature', 'NN')],\n",
" [('the', 'DT'), ('latest', 'JJS'), ('couple', 'NN')]]"
]
},
"execution_count": 191,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pats = findPattern(['the', 'JJS', 'NN'], emmaTags)\n",
"pats"
]
},
{
"cell_type": "code",
"execution_count": 192,
"id": "ac645a82-88ab-4cb5-a44f-f77b77e5a48a",
"metadata": {
"jupyter": {
"source_hidden": true
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[(\"[('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')]\", 4),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')]\", 4),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')]\", 3),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('judge', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('friend', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('service', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('difficulty', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('society', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('highest', 'JJS'), ('promise', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('satisfaction', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('friendship', 'NN')]\", 2),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('amusement', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('pleasantest', 'JJS'), ('proof', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('fairest', 'JJS'), ('way', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('earnest', 'JJS'), ('pressing', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('size', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('intermission', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('suspecting', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('luckiest', 'JJS'), ('woman', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('antidote', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('completest', 'JJS'), ('proof', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('charade', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('tenderest', 'JJS'), ('spirit', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('handsomest', 'JJS'), ('Henry', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('proudest', 'JJS'), ('moment', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('delay', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('longest', 'JJS'), ('part', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('inconvenience', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('inclination', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('happiest', 'JJS'), ('humour', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('absurdity', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('alacrity', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('worst', 'JJS'), ('weather', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('comfort', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('compassion', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('attention', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('worst', 'JJS'), ('lay', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('worst', 'JJS'), ('judge', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('corner', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('backward', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('presence', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('highest', 'JJS'), ('value', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('likeliest', 'JJS'), ('evil', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('grease', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('way', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('merest', 'JJS'), ('commonplace', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('commonest', 'JJS'), ('process', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('curiosity', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('wisest', 'JJS'), ('measure', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('highest', 'JJS'), ('obligation', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('highest', 'JJS'), ('purpose', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('information', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('skill', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('man', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('good', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('busiest', 'JJS'), ('part', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('attention', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('fondest', 'JJS'), ('parent', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('ashamed', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('zeal', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('scheme', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('draught', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('oddest', 'JJS'), ('creature', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('compliment', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('latest', 'JJS'), ('interest', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('strongest', 'JJS'), ('reproach', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('offer', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('safest', 'JJS'), ('model', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('plainest', 'JJS'), ('spoken', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('strongest', 'JJS'), ('Isabella', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('distress', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('gentleman', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('dislike', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('longest', 'JJS'), ('weather', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('confidence', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('conceit', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('coldest', 'JJS'), ('heart', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('steadiest', 'JJS'), ('brain', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('particular', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('softest', 'JJS'), ('cotton', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('oddest', 'JJS'), ('dreams', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('thing', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('heart', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('honest', 'JJS'), ('pride', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('time', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('pleasantest', 'JJS'), ('part', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('highest', 'JJS'), ('part', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('companion', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('concern', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('degree', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('deepest', 'JJS'), ('hue', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('oddest', 'JJS'), ('news', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('suspicion', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('regard', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('closest', 'JJS'), ('agreement', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('closest', 'JJS'), ('observance', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('relief', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('advantage', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('quickest', 'JJS'), ('arrangement', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('nature', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('happiest', 'JJS'), ('dream', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('perception', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('inclination', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('respect', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('deepest', 'JJS'), ('humiliation', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('least', 'JJS'), ('suspicion', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('unhappiness', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('reply', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('remark', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('highest', 'JJS'), ('credit', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('tenderest', 'JJS'), ('affection', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('humanity', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('strongest', 'JJS'), ('approbation', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('highest', 'JJS'), ('importance', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('best', 'JJS'), ('treatment', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('interest', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('degree', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('fullest', 'JJS'), ('exultation', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('luckiest', 'JJS'), ('creature', 'NN')]\", 1),\n",
" (\"[('the', 'DT'), ('latest', 'JJS'), ('couple', 'NN')]\", 1)]"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"getPatternStats(pats)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment