s0g00d/gist:264ec9244230fc312666bc2f4fa4f102

## gistfile1.txt
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from os import path
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from pandas import DataFrame
from urllib.request import Request, urlopen
import re # import Regular expression library


DATA_DIR = 'C:/Users/xbsqu/Desktop/Python Learning/Projects/Premarket Stock Price'

#Connecting to G Sheet...

scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(path.join(DATA_DIR, 'client_secret.json'), scope)
client = gspread.authorize(creds)

sheet = client.open('Stock Watcher')
worksheet = sheet.get_worksheet(2) #Will need to update this to the live sheet

#Now connected to the G Sheet.

#We need to send a header so as to not get 403 errors
stock_url = 'https://finviz.com/quote.ashx?t=RCL'
req = Request(stock_url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()

#Make the soup call & scrape the table
page_soup = bs(webpage,'html.parser')
headline_table = page_soup.find('table',{'class': 'fullview-news-outer'})

#Remove the publishing blog name
for span_tag in headline_table.findAll('span'):
    span_tag.decompose()


#Turn the Soup data into a df and add headers
headline_table = pd.read_html(str(headline_table))[0]
headline_table.columns = ['Date', 'Title']

#print(headline_table.head())
#Now we need to fix the date column

date_column = headline_table['Date']
short_date = date_column.str.slice(stop=9)

for date in date_column:

    if re.search(r'^[A-Z]', date):

        date_column.replace(date, short_date)

        continue

    else:
        date == ""
	import gspread
	from oauth2client.service_account import ServiceAccountCredentials
	from os import path
	from bs4 import BeautifulSoup as bs
	import requests
	import pandas as pd
	from pandas import DataFrame
	from urllib.request import Request, urlopen
	import re # import Regular expression library


	DATA_DIR = 'C:/Users/xbsqu/Desktop/Python Learning/Projects/Premarket Stock Price'

	#Connecting to G Sheet...

	scope = ['https://spreadsheets.google.com/feeds',
	'https://www.googleapis.com/auth/drive']
	creds = ServiceAccountCredentials.from_json_keyfile_name(path.join(DATA_DIR, 'client_secret.json'), scope)
	client = gspread.authorize(creds)

	sheet = client.open('Stock Watcher')
	worksheet = sheet.get_worksheet(2) #Will need to update this to the live sheet

	#Now connected to the G Sheet.

	#We need to send a header so as to not get 403 errors
	stock_url = 'https://finviz.com/quote.ashx?t=RCL'
	req = Request(stock_url, headers={'User-Agent': 'Mozilla/5.0'})
	webpage = urlopen(req).read()

	#Make the soup call & scrape the table
	page_soup = bs(webpage,'html.parser')
	headline_table = page_soup.find('table',{'class': 'fullview-news-outer'})

	#Remove the publishing blog name
	for span_tag in headline_table.findAll('span'):
	span_tag.decompose()


	#Turn the Soup data into a df and add headers
	headline_table = pd.read_html(str(headline_table))[0]
	headline_table.columns = ['Date', 'Title']

	#print(headline_table.head())
	#Now we need to fix the date column

	date_column = headline_table['Date']
	short_date = date_column.str.slice(stop=9)

	for date in date_column:

	if re.search(r'^[A-Z]', date):

	date_column.replace(date, short_date)

	continue

	else:
	date == ""