Skip to content

Instantly share code, notes, and snippets.

@financial-python
Last active October 1, 2024 19:00
Show Gist options
  • Save financial-python/1fdd810266da876c274362fb046d7ebc to your computer and use it in GitHub Desktop.
Save financial-python/1fdd810266da876c274362fb046d7ebc to your computer and use it in GitHub Desktop.
How to get stock tickers from reddit using Praw
"""
First, we need to create a Reddit account and get an API key to access the Reddit API. Once you have created an account, you can obtain an API key by following these steps:
Go to https://ssl.reddit.com/prefs/apps/
Scroll down to "Developed Applications" and click "Create App"
Choose "Web app" and give your app a name and description.
Set "About url" and "Redirect uri" to "http://localhost:8000"
Click "Create app" and copy the "client_id" and "client_secret" values.
Once you have the API key, you can start coding.
you will also need to install praw package: pip install praw
"""
#import packages
import praw
import re
from collections import Counter
#config variables:
#create list of the subreddits you want to parse:
SUBREDDITS_TO_PARSE = ['wallstreetbets', 'stocks', 'investing', 'StockMarket']
#words that may be capitalized but are not stocks:
KNOWN_NOT_STOCKS = ['UPVOTE', 'SUPPORT', 'YOLO', 'CLASS', 'ACTION', 'LAWSUIT', 'AGAINST', 'VALHALLA', 'MOON', 'PE', 'COVID', 'IMO', 'IPO', 'BTC', 'PUT', 'CALL',
'ROBINHOOD', 'GAIN', 'LOSS', 'PORN', 'WSB', 'I', 'STILL', "DIDN'T", 'HEAR', 'EBITDA', 'SQUEEZE', 'BS', 'VIX', 'FUD', 'HUT', 'ITM', 'OTM',
'NO', 'BELL', 'CEO', 'CFO', 'Q1', 'DD', 'MOASS', 'STONK', 'MEME', 'DICK', 'FOMO', 'EV', 'PIPE', 'HOLD', 'OTC', 'NOKPF', 'TTM', 'SPY',
'TO', 'A', 'THE', 'FUCK', 'US', 'FUCKING', 'ARE', 'DD', 'US', 'TLDR', 'EDIT', 'IV', 'SP500', 'SEC', 'GLOBE', 'NEWSWIRE', 'PT',
'NYSE', 'SPAC', 'FDA', 'DNA', 'HODL', 'USDA', 'PTSD', 'ETF', 'LLC', 'CSE', 'USA', 'EPS', 'BUY', 'B', 'AM', 'PM', 'SI', 'SP', 'TBA', 'TBD']
#create a Reddit instance using the client_id and client_secret from the app we created early
#the 'user_agent' is just a unique identifier for your application that you can make up
reddit = praw.Reddit(
client_id="your_client_id",
client_secret="your_client_secret",
username="your_reddit_username",
password="your_reddit_password",
user_agent="your_user_agent",
)
#list of words in title and comments
word_collection = []
#loop through subreddits
for sub in SUBREDDITS_TO_PARSE:
#create instance of the subreddit class
subreddit_instance = reddit.subreddit(sub)
#sort by hot and get the most recent 50 submissions
submissions = subreddit_instance.hot(limit=50)
#loop through submissions, split title, add to list
for submission in submissions:
print(f"submission title: {submission.title}")
title_words = submission.title.split()
word_collection.extend(title_words)
#get submission comments
submission.comments.replace_more(limit=0) # flatten tree
comments = submission.comments.list() # all comments
#we can also look through the comments to see what words they contain:
for top_level_comment in comments[:1]:
print(f"top_level_comment: {top_level_comment}")
comment_words = top_level_comment.body.split()
word_collection.extend(comment_words)
#list to hold words that may be stock tickers
potential_stock_symbols = []
#loop through word_collection, cleanup words, and check if each word is potentially a stock ticker
for word in word_collection:
cleaned_word = remove_punc(word)
cleaned_word = remove_emoji(cleaned_word)
if cleaned_word.isupper() and not containsNumber(cleaned_word) and cleaned_word not in KNOWN_NOT_STOCKS:
potential_stock_symbols.append(cleaned_word)
#see which stocks appear more than 10 times in list
cnt = Counter(potential_stock_symbols)
trending_tickers = [k for k, v in cnt.items() if v > 10]
print(trending_tickers)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment