kspicer80/verbsKerouac.py

## verbsKerouac.py
import matplotlib.pyplot as plt
import spacy
nlp = spacy.load('en_core_web_lg')
import pandas as pd
from collections import Counter

kerouac_raw = open('kerouac.txt').read()
kerouac = nlp(kerouac_raw)

Counter([w.string.strip() for w in kerouac.ents if w.label_ == 'PERSON']).most_common(10)

#Function to give us all the adjectives describing a character
def adjectivesDescribingCharacters(text, character):
    sents = [sent for sent in kerouac.sents if character in sent.string]
    adjectives = []
    for sent in sents:
        for word in sent:
            if character in word.string:
                for child in word.children:
                    if child.pos_ == 'ADJ':
                        adjectives.append(child.string.strip())
    return Counter(adjectives).most_common(10)

adjectivesDescribingCharacters(kerouac, 'Sal')
adjectivesDescribingCharacters(kerouac, 'Dean')
adjectivesDescribingCharacters(kerouac, 'Marylou')

#Define a function to give us all the verbs for describing a character
def verbsForCharacters(text, character):
    sents = [sent for sent in kerouac.sents if character in sent.string]
    charWords = []
    for sent in sents:
        for word in sent:
            if character in word.string:
                charWords.append(word)
    charAdjectives = []
    for word in charWords:
      for ancestor in word.ancestors:
            if ancestor.pos_.startswith('V'):
                charAdjectives.append(ancestor.lemma_.strip())
    return Counter(charAdjectives).most_common(20)

marylouVerbs = verbsForCharacters(kerouac, 'Marylou')
deanVerbs = verbsForCharacters(kerouac, 'Dean')
salVerbs = verbsForCharacters(kerouac, 'Sal')

#Combine and visualize with Pandas
def verbsToMatrix(verbCounts):
    return pd.Series({t[0]: t[1] for t in verbCounts})

verbsDF = pd.DataFrame({'Marylou': verbsToMatrix(marylouVerbs),
                        'Dean': verbsToMatrix(deanVerbs),
                        'Sal': verbsToMatrix(salVerbs)}).fillna(0)
verbsDF.plot(kind='bar', figsize=(16,8))
plt.show()
	import matplotlib.pyplot as plt
	import spacy
	nlp = spacy.load('en_core_web_lg')
	import pandas as pd
	from collections import Counter

	kerouac_raw = open('kerouac.txt').read()
	kerouac = nlp(kerouac_raw)

	Counter([w.string.strip() for w in kerouac.ents if w.label_ == 'PERSON']).most_common(10)

	#Function to give us all the adjectives describing a character
	def adjectivesDescribingCharacters(text, character):
	sents = [sent for sent in kerouac.sents if character in sent.string]
	adjectives = []
	for sent in sents:
	for word in sent:
	if character in word.string:
	for child in word.children:
	if child.pos_ == 'ADJ':
	adjectives.append(child.string.strip())
	return Counter(adjectives).most_common(10)

	adjectivesDescribingCharacters(kerouac, 'Sal')
	adjectivesDescribingCharacters(kerouac, 'Dean')
	adjectivesDescribingCharacters(kerouac, 'Marylou')

	#Define a function to give us all the verbs for describing a character
	def verbsForCharacters(text, character):
	sents = [sent for sent in kerouac.sents if character in sent.string]
	charWords = []
	for sent in sents:
	for word in sent:
	if character in word.string:
	charWords.append(word)
	charAdjectives = []
	for word in charWords:
	for ancestor in word.ancestors:
	if ancestor.pos_.startswith('V'):
	charAdjectives.append(ancestor.lemma_.strip())
	return Counter(charAdjectives).most_common(20)

	marylouVerbs = verbsForCharacters(kerouac, 'Marylou')
	deanVerbs = verbsForCharacters(kerouac, 'Dean')
	salVerbs = verbsForCharacters(kerouac, 'Sal')

	#Combine and visualize with Pandas
	def verbsToMatrix(verbCounts):
	return pd.Series({t[0]: t[1] for t in verbCounts})

	verbsDF = pd.DataFrame({'Marylou': verbsToMatrix(marylouVerbs),
	'Dean': verbsToMatrix(deanVerbs),
	'Sal': verbsToMatrix(salVerbs)}).fillna(0)
	verbsDF.plot(kind='bar', figsize=(16,8))
	plt.show()