This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import modules | |
import os | |
import requests #to send our requests through the api | |
import pandas as pd #to create a pandas dataframe and and save our data in csv format | |
#get client_id, secret_key, username and password from a text document in the current working directory | |
with open('key.txt') as f: | |
data.readlines() | |
client_id = data[0].strip() | |
secret_key = data[1].strip() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#with our secret key and client id we create our basic requests authentication | |
auth = requests.auth.HTTPBasicAuth(client_id, secret_key) | |
data = { | |
'grant_type':'password', #mode of authentication | |
'username' : username, | |
'password' : password | |
} #dictionary with our username, password and type of authentication | |
headers = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
api = 'https://oauth.reddit.com' | |
res = requests.get(f'{api}/r/wallstreetbets/new', headers = headers, params = {'limit': '100'}) | |
data = res.json() | |
#this would contain all the most recent 100 messages from the subreddit in a json format.] | |
#A json format is a very popular format that resembles a dictionary. | |
#feel free to check it our or trace the json document tree for what you want | |
#for this tutorial we would be using a Pandas Dataframe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
while True: | |
res = requests.get(f'{api}/r/wallstreetbets/new', headers = headers, | |
params = {'limit': '100', 'after':df['name'].iloc[len(df) - 1]}) | |
if len(res.json()['data']['children']) == 0 : | |
break | |
for post in res.json()['data']['children']: | |
df = df.append({ | |
'name':post['data']['name'], | |
'created_utc': post['data']['created_utc'], | |
'subreddit':post['data']['subreddit'], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import warnings | |
warnings.filterwarnings("ignore") #the append method is depreciated so we kill all warnings to | |
#keep your terminal looking clean and sharp. Be careful when using this in other scenarios though | |
#knowing what warnings say might help in debugging. | |
subreddits = ['investing', 'wallstreetbets','StockMarket'] #subreddits | |
all_df = pd.DataFrame({ #the final df | |
'name':[], | |
'created_utc': [], | |
'subreddit':[], |