Skip to content

Instantly share code, notes, and snippets.

@georgefree123
georgefree123 / comments_example.csv
Last active March 5, 2021 10:16
comments_example.csv
id body likes awards created controversiality parent_id comment ticker a comment occurances a comment ticker b comment occurances b comment ticker c comment occurances c
<id of the comment> <comment body/content> <# of likes> <awards received> <post creation datetime> <controversiality score> <parent id> <#1 ticker from comments> <# occurances> <#2 ticker from comments> <# occurances> <#3 ticker from comments> <# occurances>
id title content author_id author flair likes created sentiment comment a a comment b b comment c c sum comment sentiment
<id of the post> <title of the post> <post body/content> <id of the author> <author name> <post flair> <# of likes> <post creation datetime> <post sentiment (+ve/-ve)> <#1 ticker from comments> <# occurances> <#2 ticker from comments> <# occurances> <#3 ticker from comments> <# occurances> <sum sentiment of all comments>
import nltk
# used to get context
from nltk.corpus import twitter_samples
from nltk.tag import pos_tag
#from nltk import FreqDist
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.corpus import stopwords
# NASDAQ SOURCE:
# https://www.nasdaq.com/market-activity/stocks/screener?exchange=NASDAQ&render=download
class Ticker:
def list_files(self, folder='./tickers'):
# list all of the files in the ./tickers directory
return glob.glob("{}/*.csv".format(folder))
def get_tickers(self, csv_path):
# just grab all of ticker symbols from
class ParseComments:
def convert_comment_dict(self, comment_dict):
id=comment_dict['id'] if comment_dict['id'] != None or "" else "-"
body=comment_dict['body'] if comment_dict['body'] != None or "" else "-"
likes=comment_dict['likes'] if comment_dict['likes'] != None or "" else "-"
awards=comment_dict['total_awards_received'] if comment_dict['total_awards_received'] != None or "" else "-"
created=comment_dict['created'] if comment_dict['created'] != None or "" else "-"
controversiality=comment_dict['id'] if comment_dict['controversiality'] != None or "" else "-"
parent_id=comment_dict['parent_id'] if comment_dict['parent_id'] != None or comment_dict['parent_id'] != "" else "-"
class ParsePost():
def parse_post(post_json):
# parse Reddit post json
id=post_json['id']
title=post_json['title']
content=post_json['selftext']
author_id=post_json['author_fullname']
author=post_json['author']
flair=post_json['link_flair_text']
likes=post_json['likes']
@georgefree123
georgefree123 / reddit-api.py
Last active March 5, 2021 13:35
Reddit API example for scraping WallStreetBets
import requests
import json
class RedditAPI:
def request_data(self, url):
headers = {'User-Agent': 'python:1:1.0 (by /u/georgefree123)'}
data = requests.get(url, headers=headers).json()
return data
def get_recent_posts(self, count=10, sort='new', subreddit='wallstreetbets'):