Skip to content

Instantly share code, notes, and snippets.

@kspicer80
Created December 24, 2019 17:40
Show Gist options
  • Save kspicer80/f78d0cfccc43a9e07b05efca6b652b96 to your computer and use it in GitHub Desktop.
Save kspicer80/f78d0cfccc43a9e07b05efca6b652b96 to your computer and use it in GitHub Desktop.
Messing around with spaCy and Kerouac
import matplotlib.pyplot as plt
import spacy
nlp = spacy.load('en_core_web_lg')
import pandas as pd
from collections import Counter
kerouac_raw = open('kerouac.txt').read()
kerouac = nlp(kerouac_raw)
Counter([w.string.strip() for w in kerouac.ents if w.label_ == 'PERSON']).most_common(10)
#Function to give us all the adjectives describing a character
def adjectivesDescribingCharacters(text, character):
sents = [sent for sent in kerouac.sents if character in sent.string]
adjectives = []
for sent in sents:
for word in sent:
if character in word.string:
for child in word.children:
if child.pos_ == 'ADJ':
adjectives.append(child.string.strip())
return Counter(adjectives).most_common(10)
adjectivesDescribingCharacters(kerouac, 'Sal')
adjectivesDescribingCharacters(kerouac, 'Dean')
adjectivesDescribingCharacters(kerouac, 'Marylou')
#Define a function to give us all the verbs for describing a character
def verbsForCharacters(text, character):
sents = [sent for sent in kerouac.sents if character in sent.string]
charWords = []
for sent in sents:
for word in sent:
if character in word.string:
charWords.append(word)
charAdjectives = []
for word in charWords:
for ancestor in word.ancestors:
if ancestor.pos_.startswith('V'):
charAdjectives.append(ancestor.lemma_.strip())
return Counter(charAdjectives).most_common(20)
marylouVerbs = verbsForCharacters(kerouac, 'Marylou')
deanVerbs = verbsForCharacters(kerouac, 'Dean')
salVerbs = verbsForCharacters(kerouac, 'Sal')
#Combine and visualize with Pandas
def verbsToMatrix(verbCounts):
return pd.Series({t[0]: t[1] for t in verbCounts})
verbsDF = pd.DataFrame({'Marylou': verbsToMatrix(marylouVerbs),
'Dean': verbsToMatrix(deanVerbs),
'Sal': verbsToMatrix(salVerbs)}).fillna(0)
verbsDF.plot(kind='bar', figsize=(16,8))
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment