Skip to content

Instantly share code, notes, and snippets.

@JonathanReeve
Created August 4, 2021 16:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JonathanReeve/ba9fb2f0fb71b60101f3495c0031efe4 to your computer and use it in GitHub Desktop.
Save JonathanReeve/ba9fb2f0fb71b60101f3495c0031efe4 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"id": "available-kazakhstan",
"metadata": {},
"outputs": [],
"source": [
"import spacy\n",
"from IPython.display import display, HTML # For fancy displays"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "boolean-classification",
"metadata": {},
"outputs": [],
"source": [
"nlp = spacy.load('en_core_web_lg')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "practical-boxing",
"metadata": {},
"outputs": [],
"source": [
"dubliners = open('../Readings/content/texts/dubliners.md').read()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "stone-xerox",
"metadata": {},
"outputs": [],
"source": [
"# Replace line breaks with spaces, to get cleaner looking output\n",
"dublinersClean = dubliners.replace('\\n', ' ')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "adult-circus",
"metadata": {},
"outputs": [],
"source": [
"dublinersDoc = nlp(dublinersClean)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "advanced-winter",
"metadata": {},
"outputs": [],
"source": [
"def red(text): \n",
" return f'<span style=\"color: red\">{text}</span>'"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "focused-uniform",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"certain circumstances or <span style=\"color: red\">such </span> <span style=\"color: red\">and </span> <span style=\"color: red\">such </span>sins were mortal "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"mind was slowly <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>in the same "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"He repeated his <span style=\"color: red\">over </span> <span style=\"color: red\">and </span> <span style=\"color: red\">over </span>again, varying "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"seemed to circle <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>its new centre"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"turned a silver <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>her wrist. "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"something about the <span style=\"color: red\">over </span> <span style=\"color: red\">and </span> <span style=\"color: red\">over </span>again. The "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"child's sobbing <span style=\"color: red\">less </span> <span style=\"color: red\">and </span> <span style=\"color: red\">less</span>; and tears "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"the bandage, <span style=\"color: red\">laughed </span> <span style=\"color: red\">and </span> <span style=\"color: red\">laughed </span>again till the "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"nature of his <span style=\"color: red\">more </span> <span style=\"color: red\">and </span> <span style=\"color: red\">more </span>closely to him"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"read: Love <span style=\"color: red\">man </span> <span style=\"color: red\">and </span> <span style=\"color: red\">man </span>is impossible because "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"and read the <span style=\"color: red\">over </span> <span style=\"color: red\">and </span> <span style=\"color: red\">over </span>again. The "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"in the hall <span style=\"color: red\">fewer </span> <span style=\"color: red\">and </span> <span style=\"color: red\">fewer</span>, she began "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
", who had <span style=\"color: red\">arguing </span> <span style=\"color: red\">and </span> <span style=\"color: red\">arguing </span>against it, "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
", I will <span style=\"color: red\">this </span> <span style=\"color: red\">and </span> <span style=\"color: red\">this</span>. I will "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"fallen flat. <span style=\"color: red\">years </span> <span style=\"color: red\">and </span> <span style=\"color: red\">years </span>it had gone "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"may do so <span style=\"color: red\">many </span> <span style=\"color: red\">and </span> <span style=\"color: red\">many </span>a long year "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"would be an <span style=\"color: red\">one </span> <span style=\"color: red\">and </span> <span style=\"color: red\">one </span>beyond my poor "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"'s mill, <span style=\"color: red\">round </span> <span style=\"color: red\">and </span> <span style=\"color: red\">round </span>in order to "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for w in dublinersDoc: \n",
" if w.lemma_ == \"and\": \n",
" prevWord, nextWord = dublinersDoc[w.i-1], dublinersDoc[w.i+1]\n",
" if prevWord.text == nextWord.text and prevWord.is_alpha: \n",
" #print(prevWord, w, nextWord)\n",
" preContext = dublinersDoc[w.i-5:w.i-2].text_with_ws\n",
" postContext = dublinersDoc[w.i+2:w.i+5].text_with_ws\n",
" highlighted = ' '.join([red(token.text_with_ws) for token in [prevWord, w, nextWord]])\n",
" output = preContext + highlighted + postContext\n",
" display(HTML(output))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "synthetic-policy",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment