Last active
October 1, 2024 19:00
-
-
Save financial-python/1fdd810266da876c274362fb046d7ebc to your computer and use it in GitHub Desktop.
How to get stock tickers from reddit using Praw
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
First, we need to create a Reddit account and get an API key to access the Reddit API. Once you have created an account, you can obtain an API key by following these steps: | |
Go to https://ssl.reddit.com/prefs/apps/ | |
Scroll down to "Developed Applications" and click "Create App" | |
Choose "Web app" and give your app a name and description. | |
Set "About url" and "Redirect uri" to "http://localhost:8000" | |
Click "Create app" and copy the "client_id" and "client_secret" values. | |
Once you have the API key, you can start coding. | |
you will also need to install praw package: pip install praw | |
""" | |
#import packages | |
import praw | |
import re | |
from collections import Counter | |
#config variables: | |
#create list of the subreddits you want to parse: | |
SUBREDDITS_TO_PARSE = ['wallstreetbets', 'stocks', 'investing', 'StockMarket'] | |
#words that may be capitalized but are not stocks: | |
KNOWN_NOT_STOCKS = ['UPVOTE', 'SUPPORT', 'YOLO', 'CLASS', 'ACTION', 'LAWSUIT', 'AGAINST', 'VALHALLA', 'MOON', 'PE', 'COVID', 'IMO', 'IPO', 'BTC', 'PUT', 'CALL', | |
'ROBINHOOD', 'GAIN', 'LOSS', 'PORN', 'WSB', 'I', 'STILL', "DIDN'T", 'HEAR', 'EBITDA', 'SQUEEZE', 'BS', 'VIX', 'FUD', 'HUT', 'ITM', 'OTM', | |
'NO', 'BELL', 'CEO', 'CFO', 'Q1', 'DD', 'MOASS', 'STONK', 'MEME', 'DICK', 'FOMO', 'EV', 'PIPE', 'HOLD', 'OTC', 'NOKPF', 'TTM', 'SPY', | |
'TO', 'A', 'THE', 'FUCK', 'US', 'FUCKING', 'ARE', 'DD', 'US', 'TLDR', 'EDIT', 'IV', 'SP500', 'SEC', 'GLOBE', 'NEWSWIRE', 'PT', | |
'NYSE', 'SPAC', 'FDA', 'DNA', 'HODL', 'USDA', 'PTSD', 'ETF', 'LLC', 'CSE', 'USA', 'EPS', 'BUY', 'B', 'AM', 'PM', 'SI', 'SP', 'TBA', 'TBD'] | |
#create a Reddit instance using the client_id and client_secret from the app we created early | |
#the 'user_agent' is just a unique identifier for your application that you can make up | |
reddit = praw.Reddit( | |
client_id="your_client_id", | |
client_secret="your_client_secret", | |
username="your_reddit_username", | |
password="your_reddit_password", | |
user_agent="your_user_agent", | |
) | |
#list of words in title and comments | |
word_collection = [] | |
#loop through subreddits | |
for sub in SUBREDDITS_TO_PARSE: | |
#create instance of the subreddit class | |
subreddit_instance = reddit.subreddit(sub) | |
#sort by hot and get the most recent 50 submissions | |
submissions = subreddit_instance.hot(limit=50) | |
#loop through submissions, split title, add to list | |
for submission in submissions: | |
print(f"submission title: {submission.title}") | |
title_words = submission.title.split() | |
word_collection.extend(title_words) | |
#get submission comments | |
submission.comments.replace_more(limit=0) # flatten tree | |
comments = submission.comments.list() # all comments | |
#we can also look through the comments to see what words they contain: | |
for top_level_comment in comments[:1]: | |
print(f"top_level_comment: {top_level_comment}") | |
comment_words = top_level_comment.body.split() | |
word_collection.extend(comment_words) | |
#list to hold words that may be stock tickers | |
potential_stock_symbols = [] | |
#loop through word_collection, cleanup words, and check if each word is potentially a stock ticker | |
for word in word_collection: | |
cleaned_word = remove_punc(word) | |
cleaned_word = remove_emoji(cleaned_word) | |
if cleaned_word.isupper() and not containsNumber(cleaned_word) and cleaned_word not in KNOWN_NOT_STOCKS: | |
potential_stock_symbols.append(cleaned_word) | |
#see which stocks appear more than 10 times in list | |
cnt = Counter(potential_stock_symbols) | |
trending_tickers = [k for k, v in cnt.items() if v > 10] | |
print(trending_tickers) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment