Skip to content

Instantly share code, notes, and snippets.

@s0g00d
Created May 13, 2020 15:45
Show Gist options
  • Save s0g00d/264ec9244230fc312666bc2f4fa4f102 to your computer and use it in GitHub Desktop.
Save s0g00d/264ec9244230fc312666bc2f4fa4f102 to your computer and use it in GitHub Desktop.
Stock Headline Sentiment Analysis
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from os import path
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from pandas import DataFrame
from urllib.request import Request, urlopen
import re # import Regular expression library
DATA_DIR = 'C:/Users/xbsqu/Desktop/Python Learning/Projects/Premarket Stock Price'
#Connecting to G Sheet...
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(path.join(DATA_DIR, 'client_secret.json'), scope)
client = gspread.authorize(creds)
sheet = client.open('Stock Watcher')
worksheet = sheet.get_worksheet(2) #Will need to update this to the live sheet
#Now connected to the G Sheet.
#We need to send a header so as to not get 403 errors
stock_url = 'https://finviz.com/quote.ashx?t=RCL'
req = Request(stock_url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
#Make the soup call & scrape the table
page_soup = bs(webpage,'html.parser')
headline_table = page_soup.find('table',{'class': 'fullview-news-outer'})
#Remove the publishing blog name
for span_tag in headline_table.findAll('span'):
span_tag.decompose()
#Turn the Soup data into a df and add headers
headline_table = pd.read_html(str(headline_table))[0]
headline_table.columns = ['Date', 'Title']
#print(headline_table.head())
#Now we need to fix the date column
date_column = headline_table['Date']
short_date = date_column.str.slice(stop=9)
for date in date_column:
if re.search(r'^[A-Z]', date):
date_column.replace(date, short_date)
continue
else:
date == ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment