Last active
August 24, 2022 23:37
-
-
Save emarte91/c27848d5f473e3ac8afa0c608cc0d62a to your computer and use it in GitHub Desktop.
WSB Scaper Work in progress
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import praw | |
import pandas as pd | |
import yfinance as yf | |
import datetime as dt | |
from psaw import PushshiftAPI | |
#Global Variables | |
api = PushshiftAPI() | |
reddit_read_only = praw.Reddit(client_id="", | |
client_secret="", | |
user_agent="") # Read-only instance | |
subreddit = reddit_read_only.subreddit("wallstreetbets") | |
submission = reddit_read_only.submission("page id") | |
#Functions | |
# scrape daily discussion comments | |
def scrape_comments(): | |
print(submission.title + submission.url) | |
submission.comments.replace_more(limit=0) | |
for top_level_comment in submission.comments: | |
print(top_level_comment.body) | |
print("--------") | |
quick_menu() | |
#yahoo finacne 30 day scan | |
def yahoo_test(): | |
ticker = input("Enter Stock Ticker") | |
data = yf.Ticker(ticker).history(period='30d') | |
print(data.info) | |
quick_menu() | |
# test db | |
def hot_post_pull(): | |
hot_posts = subreddit.top(time_filter="day") | |
words_collection = [] | |
potential_stock_symbols = [] | |
known_not_stocks = ['$26k', '$90k','$1mil','$12','$11','FUCK','IM','SEC','IN','GAIN','$500k','$375','$25'] | |
for posts in hot_posts: | |
title = posts.title | |
title_words = title.split() | |
words_collection.append(title_words) | |
for title in words_collection: | |
for word in title: | |
if word.startswith('$') and word not in known_not_stocks and not word.endswith(','): | |
potential_stock_symbols.append(word) | |
# Counts the amount of times a stock was mentioned today | |
count = pd.Series(potential_stock_symbols).value_counts() | |
print(count) | |
quick_menu() | |
def scan_yesterday_tickers(): | |
all_tickers = [] | |
today = dt.date.today() | |
yesterday = today - dt.timedelta(days=1) | |
searches = api.search_submissions(after=yesterday, | |
subreddit='wallstreetbets', | |
filter=['url','author','title']) | |
for search in searches: | |
words = search.title.split() | |
# gets the $XXXX out of a title | |
cashtags = list(set(filter(lambda word: word.lower().startswith('$'), words))) | |
if len(cashtags) > 0: | |
all_tickers.append(cashtags) | |
utc_conversion = dt.datetime.fromtimestamp(search.created_utc).isoformat() | |
print(f"{search.title} + {utc_conversion}") | |
#print(search.url) | |
count = pd.Series(all_tickers).value_counts() | |
print(count) | |
quick_menu() | |
def quick_menu(): | |
choice = int(input("1. Scan Daily Reddit Post\n2. Read Daily WSB Comments\n3. Check Stock Price 30d\n4: Check Yesterdays Tickers\n::")) | |
if choice == 1: | |
hot_post_pull() | |
elif choice == 2: | |
scrape_comments() | |
elif choice == 3: | |
yahoo_test() | |
elif choice == 4: | |
scan_yesterday_tickers() | |
else: | |
quick_menu() | |
quick_menu() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment