emarte91/WSBScraper.py

## WSBScraper.py
import praw
import pandas as pd
import yfinance as yf
import datetime as dt
from psaw import PushshiftAPI

#Global Variables
api = PushshiftAPI()
reddit_read_only = praw.Reddit(client_id="",
                               client_secret="",
                               user_agent="") # Read-only instance
subreddit = reddit_read_only.subreddit("wallstreetbets")
submission = reddit_read_only.submission("page id")


#Functions

# scrape daily discussion comments
def scrape_comments():
    print(submission.title + submission.url)
    submission.comments.replace_more(limit=0)
    for top_level_comment in submission.comments:
        print(top_level_comment.body)
        print("--------")
    quick_menu()

#yahoo finacne 30 day scan
def yahoo_test():
    ticker = input("Enter Stock Ticker")
    data = yf.Ticker(ticker).history(period='30d')
    print(data.info)
    quick_menu()

# test db

def hot_post_pull():
    hot_posts = subreddit.top(time_filter="day")
    words_collection = []
    potential_stock_symbols = []
    known_not_stocks = ['$26k', '$90k','$1mil','$12','$11','FUCK','IM','SEC','IN','GAIN','$500k','$375','$25']

    for posts in hot_posts:
        title = posts.title
        title_words = title.split()
        words_collection.append(title_words)

    for title in words_collection:
        for word in title:
            if word.startswith('$') and word not in known_not_stocks and not word.endswith(','):
                potential_stock_symbols.append(word)

    # Counts the amount of times a stock was mentioned today
    count = pd.Series(potential_stock_symbols).value_counts()
    print(count)
    quick_menu()

def scan_yesterday_tickers():
    all_tickers = []
    today = dt.date.today()
    yesterday = today - dt.timedelta(days=1)
    searches = api.search_submissions(after=yesterday,
                                    subreddit='wallstreetbets',
                                    filter=['url','author','title'])

    for search in searches:
        words = search.title.split()
        # gets the $XXXX out of a title
        cashtags = list(set(filter(lambda word: word.lower().startswith('$'), words)))

        if len(cashtags) > 0:
            all_tickers.append(cashtags)
            utc_conversion = dt.datetime.fromtimestamp(search.created_utc).isoformat()
            print(f"{search.title} + {utc_conversion}")
            #print(search.url)
    count = pd.Series(all_tickers).value_counts()
    print(count)
    quick_menu()

def quick_menu():
    choice = int(input("1. Scan Daily Reddit Post\n2. Read Daily WSB Comments\n3. Check Stock Price 30d\n4: Check Yesterdays Tickers\n::"))
    if choice == 1:
        hot_post_pull()
    elif choice == 2:
        scrape_comments()
    elif choice == 3:
        yahoo_test()
    elif choice == 4:
        scan_yesterday_tickers()
    else:
        quick_menu()


quick_menu()
	import praw
	import pandas as pd
	import yfinance as yf
	import datetime as dt
	from psaw import PushshiftAPI

	#Global Variables
	api = PushshiftAPI()
	reddit_read_only = praw.Reddit(client_id="",
	client_secret="",
	user_agent="") # Read-only instance
	subreddit = reddit_read_only.subreddit("wallstreetbets")
	submission = reddit_read_only.submission("page id")


	#Functions

	# scrape daily discussion comments
	def scrape_comments():
	print(submission.title + submission.url)
	submission.comments.replace_more(limit=0)
	for top_level_comment in submission.comments:
	print(top_level_comment.body)
	print("--------")
	quick_menu()

	#yahoo finacne 30 day scan
	def yahoo_test():
	ticker = input("Enter Stock Ticker")
	data = yf.Ticker(ticker).history(period='30d')
	print(data.info)
	quick_menu()

	# test db

	def hot_post_pull():
	hot_posts = subreddit.top(time_filter="day")
	words_collection = []
	potential_stock_symbols = []
	known_not_stocks = ['$26k', '$90k','$1mil','$12','$11','FUCK','IM','SEC','IN','GAIN','$500k','$375','$25']

	for posts in hot_posts:
	title = posts.title
	title_words = title.split()
	words_collection.append(title_words)

	for title in words_collection:
	for word in title:
	if word.startswith('$') and word not in known_not_stocks and not word.endswith(','):
	potential_stock_symbols.append(word)

	# Counts the amount of times a stock was mentioned today
	count = pd.Series(potential_stock_symbols).value_counts()
	print(count)
	quick_menu()

	def scan_yesterday_tickers():
	all_tickers = []
	today = dt.date.today()
	yesterday = today - dt.timedelta(days=1)
	searches = api.search_submissions(after=yesterday,
	subreddit='wallstreetbets',
	filter=['url','author','title'])

	for search in searches:
	words = search.title.split()
	# gets the $XXXX out of a title
	cashtags = list(set(filter(lambda word: word.lower().startswith('$'), words)))

	if len(cashtags) > 0:
	all_tickers.append(cashtags)
	utc_conversion = dt.datetime.fromtimestamp(search.created_utc).isoformat()
	print(f"{search.title} + {utc_conversion}")
	#print(search.url)
	count = pd.Series(all_tickers).value_counts()
	print(count)
	quick_menu()

	def quick_menu():
	choice = int(input("1. Scan Daily Reddit Post\n2. Read Daily WSB Comments\n3. Check Stock Price 30d\n4: Check Yesterdays Tickers\n::"))
	if choice == 1:
	hot_post_pull()
	elif choice == 2:
	scrape_comments()
	elif choice == 3:
	yahoo_test()
	elif choice == 4:
	scan_yesterday_tickers()
	else:
	quick_menu()


	quick_menu()