Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created September 7, 2020 11:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/ffc0be8e3751e3ff004c07785c9fe91a to your computer and use it in GitHub Desktop.
Save codecademydev/ffc0be8e3751e3ff004c07785c9fe91a to your computer and use it in GitHub Desktop.
Codecademy export
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words
# get list of all speech files
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])
#print(files)
# read each speech file
speeches = [read_file(file) for file in files]
# preprocess each speech
processed_speeches = process_speeches(speeches)
#print(processed_speeches)
# merge speeches
all_sentences = merge_speeches(processed_speeches)
#print(all_sentences)
# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
#print(most_freq_words)
# create gensim model of all speeches
all_prez_embeddings = gensim.models.Word2Vec(all_sentences, size=96, window=5, min_count=1, workers=2, sg=1)
# view words similar to freedom
similar_to_freedom = all_prez_embeddings.most_similar('freedom', topn=20)
#print(similar_to_freedom)
# get President Roosevelt sentences
roosevelt_sentences = get_president_sentences("franklin-d-roosevelt")
#print(roosevelt_sentences)
# view most frequently used words of Roosevelt
roosevelt_most_freq_words = most_frequent_words(roosevelt_sentences)
#print(roosevelt_most_freq_words)
# create gensim model for Roosevelt
roosevelt_embeddings = gensim.models.Word2Vec(roosevelt_sentences, size=96, window=5, min_count=1, workers=2, sg=1)
# view words similar to freedom for Roosevelt
roosevelt_similar_to_freedom = roosevelt_embeddings.most_similar('freedom', topn=20)
#print(roosevelt_similar_to_freedom)
# get sentences of multiple presidents
rushmore_prez_sentences = get_president_sentences(["washington","jefferson","lincoln","theodore-roosevelt"])
#print(rushmore_prez_sentences)
# view most frequently used words of presidents
rushmore_most_freq_words = most_frequent_words(rushmore_prez_sentences)
print('Rushmore Most Freq Words: ')
print(rushmore_most_freq_words)
# create gensim model for the presidents
# view words similar to freedom for presidents
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment