This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create functions to get polarity and subjectivity using TextBlob | |
def getPolarity(text): | |
return round(TextBlob(text).sentiment[0],4) | |
def getSubjectivity(text): | |
return round(TextBlob(text).sentiment[1],4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wells_fargo_df['Top_ORGs'] = wells_fargo_df.Content.apply(get_org_counts) | |
wells_fargo_df['Top_PERSONs'] = wells_fargo_df.Content.apply(get_person_counts) | |
wells_fargo_df['Polarity'] = wells_fargo_df.Description.apply(getPolarity) | |
wells_fargo_df['Subjectivity'] = wells_fargo_df.Content.apply(getSubjectivity) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a function to get a count of the top n people mentioned in the article with counts | |
def get_person_counts(text): | |
# Remove linebreaks from the text | |
text = text.replace("\n"," paragraph break ") | |
doc = nlp(text) | |
# Loop through the doc object and extract PERSON (people) entities | |
res = [] | |
for ent in doc.ents: | |
if ent.label_ == 'PERSON': |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a function to get a count of the top n organizations mentioned in the article with counts | |
def get_org_counts(text): | |
# Remove linebreaks from the text | |
text = text.replace("\n"," paragraph break ") | |
doc = nlp(text) | |
# Loop through the doc object and extract ORG (organization) entities | |
res = [] | |
for ent in doc.ents: | |
if ent.label_ == 'ORG': |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Best format for published date actually comes from the newsAPI pull object, so let's make a list of those dates | |
wells_fargo_publishedAt = [i['publishedAt'][0:10] for i in wells_fargo] # [0:10] returns just YYYY-MM-DD | |
# Create our function that takes a list of Article objects and returns one dataframe | |
def get_details(article_list): | |
# Initialize empty lists for dictionaries later on | |
titles = [] | |
urls = [] | |
authors = [] | |
keywords =[] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Assign just the information on the articles to our wells_fargo obj | |
wells_fargo = wells_fargo['articles'] # 100 is the max length of articles to return | |
# Extract the urls for each article returned by newsAPI | |
wells_fargo_urls = [i['url'] for i in wells_fargo] | |
# Using newspaper3k, create a function to return an article given its URL | |
# See https://newspaper.readthedocs.io/en/latest/user_guide/quickstart.html for more detail | |
def get_article(url): | |
article = Article(url, fetch_images=False, memoize_articles = False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Init | |
newsapi = NewsApiClient(api_key=key) | |
# Pull articles with "Wells Fargo" in title. Exclude forbes because of subscription req. | |
# See https://newsapi.org/docs/endpoints/everything for more details | |
wells_fargo = newsapi.get_everything(qintitle='Wells Fargo', | |
exclude_domains='forbes.com', | |
language='en', | |
from_param='2022-08-29', # Date Range (YYYY-MM-DD) | |
to='2022-09-03', # Date Range (YYYY-MM-DD) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# installations | |
## Spacy | |
pip install -U pip setuptools wheel | |
pip install -U spacy | |
python -m spacy download en_core_web_sm | |
## NewsAPI | |
pip install newsapi-python | |
## Newspaper3k |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X,y = df.drop(columns=['Transported']), df.loc[:, ['Transported']] #Target Variable | |
# Replacing the predicted true/false from a string to a boolean | |
y.replace({True:'True', False:'False'}, inplace=True) | |
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33) | |
# Using ravel to silence a user warning. No big deal though. Doesn't change anything. | |
full_pipe.fit(X_train, y_train.values.ravel()); | |
y_pred = full_pipe.predict(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using ColumnTransformer and Selector to choose which columns to use | |
preprocessor = ColumnTransformer([ | |
('cat', cat_pipe, Selector(dtype_exclude=np.number)), | |
('num', num_pipe, Selector(dtype_include=np.number)) | |
]) | |
# Combining the processor with a model for a happy ending :) | |
full_pipe = Pipeline(steps=[ | |
('features', preprocessor), | |
('model', SVC()) |
NewerOlder