Created
November 20, 2021 00:37
-
-
Save JonathanReeve/c252a42af305e887886f0267897a153b to your computer and use it in GitHub Desktop.
Find patterns in text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "b73ea10c-dec7-4ece-9e31-e2cfa1ed4434", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import nltk" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"id": "244d0f39-5a8c-4dbd-8ef3-a4f2b7596bb0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from nltk.corpus import gutenberg" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"id": "f9c6d567-81b3-4f68-85fb-355338ea18be", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"emmaTokens = gutenberg.words('austen-emma.txt')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"id": "001fdc82-9a0c-47e8-bf2d-5506fa1220fd", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"emmaTokens = [w for w in emmaTokens if w.isalpha()] # Restrict to alphabetic words" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"id": "a28c7999-48ff-459b-84c2-00c2686f7c3b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"emmaTags = nltk.pos_tag(emmaTokens)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 190, | |
"id": "1307c76b-b1c8-48cb-8cca-51190ca24699", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def findPattern(needles, haystack):\n", | |
" \"\"\" \n", | |
" Input: \n", | |
" - POS-tagged tokens \"haystack\", [(\"a\", \"DET\"), (\"list\", \"NN\") ... ]\n", | |
" - List of words/POSes, \"needles\", e.g. [\"a\", \"very\", \"JJ\", \"man\"]\n", | |
" \n", | |
" Output: \n", | |
" - a list of matching patterns, e.g., [('a', 'DT'), ('comfortable', 'JJ'), ('home', 'NN')]\n", | |
" \"\"\" \n", | |
" matches = []\n", | |
" for i in range(len(haystack)-len(needles)):\n", | |
" # Gets a ngram of the same length as our pattern words. \n", | |
" miniHaystack = [haystack[i+j] for j in range(len(needles))]\n", | |
" # Now let's compare all of our items, pairwise\n", | |
" for i in range(len(needles)): \n", | |
" word, pos = miniHaystack[i]\n", | |
" needle = needles[i]\n", | |
" if needle != word and needle != pos: \n", | |
" break # Stop if something doesn't match\n", | |
" if i == len(needles)-1: # We're at the end. Success! \n", | |
" matches.append(miniHaystack)\n", | |
" return matches\n", | |
"\n", | |
"def getPatternStats(patterns): \n", | |
" \"\"\" \n", | |
" Input: A list of tuples as returned by findPattern()\n", | |
" \"\"\"\n", | |
" pats = [str(pat) for pat in patterns]\n", | |
" return nltk.FreqDist(pats).most_common()\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 191, | |
"id": "1f01c7f3-b080-4c37-95d4-a10726579c7c", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[[('the', 'DT'), ('greatest', 'JJS'), ('amusement', 'NN')],\n", | |
" [('the', 'DT'), ('pleasantest', 'JJS'), ('proof', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')],\n", | |
" [('the', 'DT'), ('fairest', 'JJS'), ('way', 'NN')],\n", | |
" [('the', 'DT'), ('earnest', 'JJS'), ('pressing', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('size', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('intermission', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('suspecting', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('judge', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('friend', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n", | |
" [('the', 'DT'), ('luckiest', 'JJS'), ('woman', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('antidote', 'NN')],\n", | |
" [('the', 'DT'), ('completest', 'JJS'), ('proof', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('charade', 'NN')],\n", | |
" [('the', 'DT'), ('tenderest', 'JJS'), ('spirit', 'NN')],\n", | |
" [('the', 'DT'), ('handsomest', 'JJS'), ('Henry', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n", | |
" [('the', 'DT'), ('proudest', 'JJS'), ('moment', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('delay', 'NN')],\n", | |
" [('the', 'DT'), ('longest', 'JJS'), ('part', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('service', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('inconvenience', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('inclination', 'NN')],\n", | |
" [('the', 'DT'), ('happiest', 'JJS'), ('humour', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('absurdity', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('alacrity', 'NN')],\n", | |
" [('the', 'DT'), ('worst', 'JJS'), ('weather', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('comfort', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('compassion', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('difficulty', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('attention', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('society', 'NN')],\n", | |
" [('the', 'DT'), ('worst', 'JJS'), ('lay', 'NN')],\n", | |
" [('the', 'DT'), ('worst', 'JJS'), ('judge', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n", | |
" [('the', 'DT'), ('warmest', 'JJS'), ('corner', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('backward', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('presence', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('value', 'NN')],\n", | |
" [('the', 'DT'), ('likeliest', 'JJS'), ('evil', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('grease', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('way', 'NN')],\n", | |
" [('the', 'DT'), ('merest', 'JJS'), ('commonplace', 'NN')],\n", | |
" [('the', 'DT'), ('commonest', 'JJS'), ('process', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('curiosity', 'NN')],\n", | |
" [('the', 'DT'), ('wisest', 'JJS'), ('measure', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('obligation', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('purpose', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('difficulty', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('information', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('skill', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('man', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('good', 'NN')],\n", | |
" [('the', 'DT'), ('busiest', 'JJS'), ('part', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('attention', 'NN')],\n", | |
" [('the', 'DT'), ('fondest', 'JJS'), ('parent', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('promise', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('ashamed', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('zeal', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('judge', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('scheme', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('draught', 'NN')],\n", | |
" [('the', 'DT'), ('oddest', 'JJS'), ('creature', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('compliment', 'NN')],\n", | |
" [('the', 'DT'), ('latest', 'JJS'), ('interest', 'NN')],\n", | |
" [('the', 'DT'), ('strongest', 'JJS'), ('reproach', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('friend', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('offer', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('society', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('satisfaction', 'NN')],\n", | |
" [('the', 'DT'), ('safest', 'JJS'), ('model', 'NN')],\n", | |
" [('the', 'DT'), ('plainest', 'JJS'), ('spoken', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('satisfaction', 'NN')],\n", | |
" [('the', 'DT'), ('strongest', 'JJS'), ('Isabella', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('distress', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('gentleman', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('dislike', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')],\n", | |
" [('the', 'DT'), ('longest', 'JJS'), ('weather', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('confidence', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('conceit', 'NN')],\n", | |
" [('the', 'DT'), ('coldest', 'JJS'), ('heart', 'NN')],\n", | |
" [('the', 'DT'), ('steadiest', 'JJS'), ('brain', 'NN')],\n", | |
" [('the', 'DT'), ('slightest', 'JJS'), ('particular', 'NN')],\n", | |
" [('the', 'DT'), ('softest', 'JJS'), ('cotton', 'NN')],\n", | |
" [('the', 'DT'), ('oddest', 'JJS'), ('dreams', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('thing', 'NN')],\n", | |
" [('the', 'DT'), ('warmest', 'JJS'), ('heart', 'NN')],\n", | |
" [('the', 'DT'), ('honest', 'JJS'), ('pride', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('time', 'NN')],\n", | |
" [('the', 'DT'), ('pleasantest', 'JJS'), ('part', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('part', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('companion', 'NN')],\n", | |
" [('the', 'DT'), ('warmest', 'JJS'), ('concern', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('service', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('degree', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')],\n", | |
" [('the', 'DT'), ('deepest', 'JJS'), ('hue', 'NN')],\n", | |
" [('the', 'DT'), ('oddest', 'JJS'), ('news', 'NN')],\n", | |
" [('the', 'DT'), ('slightest', 'JJS'), ('suspicion', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('regard', 'NN')],\n", | |
" [('the', 'DT'), ('closest', 'JJS'), ('agreement', 'NN')],\n", | |
" [('the', 'DT'), ('closest', 'JJS'), ('observance', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('relief', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('advantage', 'NN')],\n", | |
" [('the', 'DT'), ('quickest', 'JJS'), ('arrangement', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('nature', 'NN')],\n", | |
" [('the', 'DT'), ('warmest', 'JJS'), ('friendship', 'NN')],\n", | |
" [('the', 'DT'), ('happiest', 'JJS'), ('dream', 'NN')],\n", | |
" [('the', 'DT'), ('slightest', 'JJS'), ('perception', 'NN')],\n", | |
" [('the', 'DT'), ('slightest', 'JJS'), ('inclination', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('respect', 'NN')],\n", | |
" [('the', 'DT'), ('warmest', 'JJS'), ('friendship', 'NN')],\n", | |
" [('the', 'DT'), ('deepest', 'JJS'), ('humiliation', 'NN')],\n", | |
" [('the', 'DT'), ('least', 'JJS'), ('suspicion', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('unhappiness', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('reply', 'NN')],\n", | |
" [('the', 'DT'), ('smallest', 'JJS'), ('remark', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('credit', 'NN')],\n", | |
" [('the', 'DT'), ('tenderest', 'JJS'), ('affection', 'NN')],\n", | |
" [('the', 'DT'), ('greatest', 'JJS'), ('humanity', 'NN')],\n", | |
" [('the', 'DT'), ('strongest', 'JJS'), ('approbation', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('importance', 'NN')],\n", | |
" [('the', 'DT'), ('highest', 'JJS'), ('promise', 'NN')],\n", | |
" [('the', 'DT'), ('best', 'JJS'), ('treatment', 'NN')],\n", | |
" [('the', 'DT'), ('warmest', 'JJS'), ('interest', 'NN')],\n", | |
" [('the', 'DT'), ('slightest', 'JJS'), ('degree', 'NN')],\n", | |
" [('the', 'DT'), ('fullest', 'JJS'), ('exultation', 'NN')],\n", | |
" [('the', 'DT'), ('luckiest', 'JJS'), ('creature', 'NN')],\n", | |
" [('the', 'DT'), ('latest', 'JJS'), ('couple', 'NN')]]" | |
] | |
}, | |
"execution_count": 191, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pats = findPattern(['the', 'JJS', 'NN'], emmaTags)\n", | |
"pats" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 192, | |
"id": "ac645a82-88ab-4cb5-a44f-f77b77e5a48a", | |
"metadata": { | |
"jupyter": { | |
"source_hidden": true | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[(\"[('the', 'DT'), ('smallest', 'JJS'), ('doubt', 'NN')]\", 4),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('idea', 'NN')]\", 4),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('pleasure', 'NN')]\", 3),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('judge', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('friend', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('service', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('difficulty', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('society', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('highest', 'JJS'), ('promise', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('satisfaction', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('friendship', 'NN')]\", 2),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('amusement', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('pleasantest', 'JJS'), ('proof', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('fairest', 'JJS'), ('way', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('earnest', 'JJS'), ('pressing', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('size', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('intermission', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('suspecting', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('luckiest', 'JJS'), ('woman', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('antidote', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('completest', 'JJS'), ('proof', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('charade', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('tenderest', 'JJS'), ('spirit', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('handsomest', 'JJS'), ('Henry', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('proudest', 'JJS'), ('moment', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('delay', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('longest', 'JJS'), ('part', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('inconvenience', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('inclination', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('happiest', 'JJS'), ('humour', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('absurdity', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('alacrity', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('worst', 'JJS'), ('weather', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('comfort', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('compassion', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('attention', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('worst', 'JJS'), ('lay', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('worst', 'JJS'), ('judge', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('corner', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('backward', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('presence', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('highest', 'JJS'), ('value', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('likeliest', 'JJS'), ('evil', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('grease', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('way', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('merest', 'JJS'), ('commonplace', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('commonest', 'JJS'), ('process', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('curiosity', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('wisest', 'JJS'), ('measure', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('highest', 'JJS'), ('obligation', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('highest', 'JJS'), ('purpose', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('information', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('skill', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('man', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('good', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('busiest', 'JJS'), ('part', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('attention', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('fondest', 'JJS'), ('parent', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('ashamed', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('zeal', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('scheme', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('draught', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('oddest', 'JJS'), ('creature', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('compliment', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('latest', 'JJS'), ('interest', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('strongest', 'JJS'), ('reproach', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('offer', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('safest', 'JJS'), ('model', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('plainest', 'JJS'), ('spoken', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('strongest', 'JJS'), ('Isabella', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('distress', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('gentleman', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('dislike', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('longest', 'JJS'), ('weather', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('confidence', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('conceit', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('coldest', 'JJS'), ('heart', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('steadiest', 'JJS'), ('brain', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('particular', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('softest', 'JJS'), ('cotton', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('oddest', 'JJS'), ('dreams', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('thing', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('heart', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('honest', 'JJS'), ('pride', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('time', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('pleasantest', 'JJS'), ('part', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('highest', 'JJS'), ('part', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('companion', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('concern', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('degree', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('deepest', 'JJS'), ('hue', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('oddest', 'JJS'), ('news', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('suspicion', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('regard', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('closest', 'JJS'), ('agreement', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('closest', 'JJS'), ('observance', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('relief', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('advantage', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('quickest', 'JJS'), ('arrangement', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('nature', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('happiest', 'JJS'), ('dream', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('perception', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('inclination', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('respect', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('deepest', 'JJS'), ('humiliation', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('least', 'JJS'), ('suspicion', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('unhappiness', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('reply', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('smallest', 'JJS'), ('remark', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('highest', 'JJS'), ('credit', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('tenderest', 'JJS'), ('affection', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('greatest', 'JJS'), ('humanity', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('strongest', 'JJS'), ('approbation', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('highest', 'JJS'), ('importance', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('best', 'JJS'), ('treatment', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('warmest', 'JJS'), ('interest', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('slightest', 'JJS'), ('degree', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('fullest', 'JJS'), ('exultation', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('luckiest', 'JJS'), ('creature', 'NN')]\", 1),\n", | |
" (\"[('the', 'DT'), ('latest', 'JJS'), ('couple', 'NN')]\", 1)]" | |
] | |
}, | |
"execution_count": 192, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"getPatternStats(pats)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment