financial-python/get_stocks_from_reddit.py

## get_stocks_from_reddit.py
"""
First, we need to create a Reddit account and get an API key to access the Reddit API. Once you have created an account, you can obtain an API key by following these steps:

Go to https://ssl.reddit.com/prefs/apps/
Scroll down to "Developed Applications" and click "Create App"
Choose "Web app" and give your app a name and description.
Set "About url" and "Redirect uri" to "http://localhost:8000"
Click "Create app" and copy the "client_id" and "client_secret" values.
Once you have the API key, you can start coding.

you will also need to install praw package: pip install praw
"""

#import packages
import praw
import re
from collections import Counter

#config variables:

#create list of the subreddits you want to parse:
SUBREDDITS_TO_PARSE = ['wallstreetbets', 'stocks', 'investing', 'StockMarket']

#words that may be capitalized but are not stocks:
KNOWN_NOT_STOCKS = ['UPVOTE', 'SUPPORT', 'YOLO', 'CLASS', 'ACTION', 'LAWSUIT', 'AGAINST', 'VALHALLA', 'MOON', 'PE', 'COVID', 'IMO', 'IPO', 'BTC', 'PUT', 'CALL',
                    'ROBINHOOD', 'GAIN', 'LOSS', 'PORN', 'WSB', 'I', 'STILL', "DIDN'T", 'HEAR', 'EBITDA', 'SQUEEZE', 'BS', 'VIX', 'FUD', 'HUT', 'ITM', 'OTM',
                    'NO', 'BELL', 'CEO', 'CFO', 'Q1', 'DD', 'MOASS', 'STONK', 'MEME', 'DICK', 'FOMO', 'EV', 'PIPE', 'HOLD', 'OTC', 'NOKPF', 'TTM', 'SPY',
                    'TO', 'A', 'THE', 'FUCK', 'US', 'FUCKING', 'ARE', 'DD', 'US', 'TLDR', 'EDIT', 'IV', 'SP500', 'SEC', 'GLOBE', 'NEWSWIRE', 'PT',
                    'NYSE', 'SPAC', 'FDA', 'DNA', 'HODL', 'USDA', 'PTSD', 'ETF', 'LLC', 'CSE', 'USA', 'EPS', 'BUY', 'B', 'AM', 'PM', 'SI', 'SP', 'TBA', 'TBD']


#create a Reddit instance using the client_id and client_secret from the app we created early
#the 'user_agent' is just a unique identifier for your application that you can make up
reddit = praw.Reddit(
    client_id="your_client_id",
    client_secret="your_client_secret",
    username="your_reddit_username",
    password="your_reddit_password",
    user_agent="your_user_agent",
)

#list of words in title and comments
word_collection = []


#loop through subreddits
for sub in SUBREDDITS_TO_PARSE:

    #create instance of the subreddit class
    subreddit_instance = reddit.subreddit(sub)

    #sort by hot and get the most recent 50 submissions
    submissions = subreddit_instance.hot(limit=50)

    #loop through submissions, split title, add to list
    for submission in submissions:
      print(f"submission title: {submission.title}")
      title_words = submission.title.split()
      word_collection.extend(title_words)

      #get submission comments
      submission.comments.replace_more(limit=0)  # flatten tree
      comments = submission.comments.list()  # all comments

      #we can also look through the comments to see what words they contain:
      for top_level_comment in comments[:1]:
        print(f"top_level_comment: {top_level_comment}")
        comment_words = top_level_comment.body.split()
        word_collection.extend(comment_words)


#list to hold words that may be stock tickers
potential_stock_symbols = []

#loop through word_collection, cleanup words, and check if each word is potentially a stock ticker
for word in word_collection:
  cleaned_word = remove_punc(word)
  cleaned_word = remove_emoji(cleaned_word)
  if cleaned_word.isupper() and not containsNumber(cleaned_word) and cleaned_word not in KNOWN_NOT_STOCKS:
      potential_stock_symbols.append(cleaned_word)


#see which stocks appear more than 10 times in list
cnt = Counter(potential_stock_symbols)
trending_tickers = [k for k, v in cnt.items() if v > 10]
print(trending_tickers)
	"""
	First, we need to create a Reddit account and get an API key to access the Reddit API. Once you have created an account, you can obtain an API key by following these steps:

	Go to https://ssl.reddit.com/prefs/apps/
	Scroll down to "Developed Applications" and click "Create App"
	Choose "Web app" and give your app a name and description.
	Set "About url" and "Redirect uri" to "http://localhost:8000"
	Click "Create app" and copy the "client_id" and "client_secret" values.
	Once you have the API key, you can start coding.

	you will also need to install praw package: pip install praw
	"""

	#import packages
	import praw
	import re
	from collections import Counter

	#config variables:

	#create list of the subreddits you want to parse:
	SUBREDDITS_TO_PARSE = ['wallstreetbets', 'stocks', 'investing', 'StockMarket']

	#words that may be capitalized but are not stocks:
	KNOWN_NOT_STOCKS = ['UPVOTE', 'SUPPORT', 'YOLO', 'CLASS', 'ACTION', 'LAWSUIT', 'AGAINST', 'VALHALLA', 'MOON', 'PE', 'COVID', 'IMO', 'IPO', 'BTC', 'PUT', 'CALL',
	'ROBINHOOD', 'GAIN', 'LOSS', 'PORN', 'WSB', 'I', 'STILL', "DIDN'T", 'HEAR', 'EBITDA', 'SQUEEZE', 'BS', 'VIX', 'FUD', 'HUT', 'ITM', 'OTM',
	'NO', 'BELL', 'CEO', 'CFO', 'Q1', 'DD', 'MOASS', 'STONK', 'MEME', 'DICK', 'FOMO', 'EV', 'PIPE', 'HOLD', 'OTC', 'NOKPF', 'TTM', 'SPY',
	'TO', 'A', 'THE', 'FUCK', 'US', 'FUCKING', 'ARE', 'DD', 'US', 'TLDR', 'EDIT', 'IV', 'SP500', 'SEC', 'GLOBE', 'NEWSWIRE', 'PT',
	'NYSE', 'SPAC', 'FDA', 'DNA', 'HODL', 'USDA', 'PTSD', 'ETF', 'LLC', 'CSE', 'USA', 'EPS', 'BUY', 'B', 'AM', 'PM', 'SI', 'SP', 'TBA', 'TBD']



	#create a Reddit instance using the client_id and client_secret from the app we created early
	#the 'user_agent' is just a unique identifier for your application that you can make up
	reddit = praw.Reddit(
	client_id="your_client_id",
	client_secret="your_client_secret",
	username="your_reddit_username",
	password="your_reddit_password",
	user_agent="your_user_agent",
	)

	#list of words in title and comments
	word_collection = []


	#loop through subreddits
	for sub in SUBREDDITS_TO_PARSE:

	#create instance of the subreddit class
	subreddit_instance = reddit.subreddit(sub)

	#sort by hot and get the most recent 50 submissions
	submissions = subreddit_instance.hot(limit=50)

	#loop through submissions, split title, add to list
	for submission in submissions:
	print(f"submission title: {submission.title}")
	title_words = submission.title.split()
	word_collection.extend(title_words)

	#get submission comments
	submission.comments.replace_more(limit=0) # flatten tree
	comments = submission.comments.list() # all comments

	#we can also look through the comments to see what words they contain:
	for top_level_comment in comments[:1]:
	print(f"top_level_comment: {top_level_comment}")
	comment_words = top_level_comment.body.split()
	word_collection.extend(comment_words)


	#list to hold words that may be stock tickers
	potential_stock_symbols = []

	#loop through word_collection, cleanup words, and check if each word is potentially a stock ticker
	for word in word_collection:
	cleaned_word = remove_punc(word)
	cleaned_word = remove_emoji(cleaned_word)
	if cleaned_word.isupper() and not containsNumber(cleaned_word) and cleaned_word not in KNOWN_NOT_STOCKS:
	potential_stock_symbols.append(cleaned_word)


	#see which stocks appear more than 10 times in list
	cnt = Counter(potential_stock_symbols)
	trending_tickers = [k for k, v in cnt.items() if v > 10]
	print(trending_tickers)