georgefree123

## comments_example.csv

          
            id
            body
            likes
            awards
            created
            controversiality
            parent_id
            comment ticker a
            comment occurances a
            comment ticker b
            comment occurances b
            comment ticker c
            comment occurances c

            
              <id of the comment>
              <comment body/content>
              <# of likes>
              <awards received>
              <post creation datetime>
              <controversiality score>
              <parent id>
              <#1 ticker from comments>
              <# occurances>
              <#2 ticker from comments>
              <# occurances>
              <#3 ticker from comments>
              <# occurances>

## post_example.csv

          
            id
            title
            content
            author_id
            author
            flair
            likes
            created
            sentiment
            comment a
            a
            comment b
            b
            comment c
            c
            sum comment sentiment

            
              <id of the post>
              <title of the post>
              <post body/content>
              <id of the author>
              <author name>
              <post flair>
              <# of likes>
              <post creation datetime>
              <post sentiment (+ve/-ve)>
              <#1 ticker from comments>
              <# occurances>
              <#2 ticker from comments>
              <# occurances>
              <#3 ticker from comments>
              <# occurances>
              <sum sentiment of all comments>

## sentiment_analysis.py
import nltk
# used to get context
from nltk.corpus import twitter_samples
from nltk.tag import pos_tag
#from nltk import FreqDist
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

from nltk.corpus import stopwords

## tickers.py
# NASDAQ SOURCE:
# https://www.nasdaq.com/market-activity/stocks/screener?exchange=NASDAQ&render=download

class Ticker:
   def list_files(self, folder='./tickers'):
      # list all of the files in the ./tickers directory
      return glob.glob("{}/*.csv".format(folder))

   def get_tickers(self, csv_path):
      # just grab all of ticker symbols from

## parse_comments.py
class ParseComments:
   def convert_comment_dict(self, comment_dict):

      id=comment_dict['id'] if comment_dict['id'] != None or "" else "-"
      body=comment_dict['body'] if comment_dict['body'] != None or "" else "-"
      likes=comment_dict['likes'] if comment_dict['likes'] != None or "" else "-"
      awards=comment_dict['total_awards_received'] if comment_dict['total_awards_received'] != None or "" else "-"
      created=comment_dict['created'] if comment_dict['created'] != None or "" else "-"
      controversiality=comment_dict['id'] if comment_dict['controversiality'] != None or "" else "-"
      parent_id=comment_dict['parent_id'] if comment_dict['parent_id'] != None or comment_dict['parent_id'] != "" else "-"

## parse-posts.py
class ParsePost():
   def parse_post(post_json):
      # parse Reddit post json
      id=post_json['id']
      title=post_json['title']
      content=post_json['selftext']
      author_id=post_json['author_fullname']
      author=post_json['author']
      flair=post_json['link_flair_text']
      likes=post_json['likes']

## reddit-api.py
import requests
import json

class RedditAPI:
   def request_data(self, url):
      headers = {'User-Agent': 'python:1:1.0 (by /u/georgefree123)'}
      data = requests.get(url, headers=headers).json()
      return data

   def get_recent_posts(self, count=10, sort='new', subreddit='wallstreetbets'):
	id	body	likes	awards	created	controversiality	parent_id	comment ticker a	comment occurances a	comment ticker b	comment occurances b	comment ticker c	comment occurances c
	<id of the comment>	<comment body/content>	<# of likes>	<awards received>	<post creation datetime>	<controversiality score>	<parent id>	<#1 ticker from comments>	<# occurances>	<#2 ticker from comments>	<# occurances>	<#3 ticker from comments>	<# occurances>
	id	title	content	author_id	author	flair	likes	created	sentiment	comment a	a	comment b	b	comment c	c	sum comment sentiment
	<id of the post>	<title of the post>	<post body/content>	<id of the author>	<author name>	<post flair>	<# of likes>	<post creation datetime>	<post sentiment (+ve/-ve)>	<#1 ticker from comments>	<# occurances>	<#2 ticker from comments>	<# occurances>	<#3 ticker from comments>	<# occurances>	<sum sentiment of all comments>
	import nltk
	# used to get context
	from nltk.corpus import twitter_samples
	from nltk.tag import pos_tag
	#from nltk import FreqDist
	from nltk.stem import WordNetLemmatizer
	from nltk.corpus import wordnet

	from nltk.corpus import stopwords
	# NASDAQ SOURCE:
	# https://www.nasdaq.com/market-activity/stocks/screener?exchange=NASDAQ&render=download

	class Ticker:
	def list_files(self, folder='./tickers'):
	# list all of the files in the ./tickers directory
	return glob.glob("{}/*.csv".format(folder))

	def get_tickers(self, csv_path):
	# just grab all of ticker symbols from
	class ParseComments:
	def convert_comment_dict(self, comment_dict):

	id=comment_dict['id'] if comment_dict['id'] != None or "" else "-"
	body=comment_dict['body'] if comment_dict['body'] != None or "" else "-"
	likes=comment_dict['likes'] if comment_dict['likes'] != None or "" else "-"
	awards=comment_dict['total_awards_received'] if comment_dict['total_awards_received'] != None or "" else "-"
	created=comment_dict['created'] if comment_dict['created'] != None or "" else "-"
	controversiality=comment_dict['id'] if comment_dict['controversiality'] != None or "" else "-"
	parent_id=comment_dict['parent_id'] if comment_dict['parent_id'] != None or comment_dict['parent_id'] != "" else "-"
	class ParsePost():
	def parse_post(post_json):
	# parse Reddit post json
	id=post_json['id']
	title=post_json['title']
	content=post_json['selftext']
	author_id=post_json['author_fullname']
	author=post_json['author']
	flair=post_json['link_flair_text']
	likes=post_json['likes']
	import requests
	import json

	class RedditAPI:
	def request_data(self, url):
	headers = {'User-Agent': 'python:1:1.0 (by /u/georgefree123)'}
	data = requests.get(url, headers=headers).json()
	return data

	def get_recent_posts(self, count=10, sort='new', subreddit='wallstreetbets'):