Created
August 4, 2021 16:54
-
-
Save JonathanReeve/ba9fb2f0fb71b60101f3495c0031efe4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "available-kazakhstan", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import spacy\n", | |
"from IPython.display import display, HTML # For fancy displays" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "boolean-classification", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nlp = spacy.load('en_core_web_lg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "practical-boxing", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dubliners = open('../Readings/content/texts/dubliners.md').read()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "stone-xerox", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Replace line breaks with spaces, to get cleaner looking output\n", | |
"dublinersClean = dubliners.replace('\\n', ' ')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "adult-circus", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dublinersDoc = nlp(dublinersClean)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"id": "advanced-winter", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def red(text): \n", | |
" return f'<span style=\"color: red\">{text}</span>'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"id": "focused-uniform", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"certain circumstances or <span style=\"color: red\">such </span> <span style=\"color: red\">and </span> <span style=\"color: red\">such </span>sins were mortal " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"mind was slowly <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>in the same " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"He repeated his <span style=\"color: red\">over </span> <span style=\"color: red\">and </span> <span style=\"color: red\">over </span>again, varying " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"seemed to circle <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>its new centre" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"turned a silver <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>her wrist. " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"something about the <span style=\"color: red\">over </span> <span style=\"color: red\">and </span> <span style=\"color: red\">over </span>again. The " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"child's sobbing <span style=\"color: red\">less </span> <span style=\"color: red\">and </span> <span style=\"color: red\">less</span>; and tears " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"the bandage, <span style=\"color: red\">laughed </span> <span style=\"color: red\">and </span> <span style=\"color: red\">laughed </span>again till the " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"nature of his <span style=\"color: red\">more </span> <span style=\"color: red\">and </span> <span style=\"color: red\">more </span>closely to him" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"read: Love <span style=\"color: red\">man </span> <span style=\"color: red\">and </span> <span style=\"color: red\">man </span>is impossible because " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"and read the <span style=\"color: red\">over </span> <span style=\"color: red\">and </span> <span style=\"color: red\">over </span>again. The " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"in the hall <span style=\"color: red\">fewer </span> <span style=\"color: red\">and </span> <span style=\"color: red\">fewer</span>, she began " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
", who had <span style=\"color: red\">arguing </span> <span style=\"color: red\">and </span> <span style=\"color: red\">arguing </span>against it, " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
", I will <span style=\"color: red\">this </span> <span style=\"color: red\">and </span> <span style=\"color: red\">this</span>. I will " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"fallen flat. <span style=\"color: red\">years </span> <span style=\"color: red\">and </span> <span style=\"color: red\">years </span>it had gone " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"may do so <span style=\"color: red\">many </span> <span style=\"color: red\">and </span> <span style=\"color: red\">many </span>a long year " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"would be an <span style=\"color: red\">one </span> <span style=\"color: red\">and </span> <span style=\"color: red\">one </span>beyond my poor " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"'s mill, <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>in order to " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"for w in dublinersDoc: \n", | |
" if w.lemma_ == \"and\": \n", | |
" prevWord, nextWord = dublinersDoc[w.i-1], dublinersDoc[w.i+1]\n", | |
" if prevWord.text == nextWord.text and prevWord.is_alpha: \n", | |
" #print(prevWord, w, nextWord)\n", | |
" preContext = dublinersDoc[w.i-5:w.i-2].text_with_ws\n", | |
" postContext = dublinersDoc[w.i+2:w.i+5].text_with_ws\n", | |
" highlighted = ' '.join([red(token.text_with_ws) for token in [prevWord, w, nextWord]])\n", | |
" output = preContext + highlighted + postContext\n", | |
" display(HTML(output))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "synthetic-policy", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment