Skip to content

Instantly share code, notes, and snippets.

@odunola499
Created September 10, 2022 14:18
Show Gist options
  • Save odunola499/50d71d0c284caff6eb614c5e1ff52696 to your computer and use it in GitHub Desktop.
Save odunola499/50d71d0c284caff6eb614c5e1ff52696 to your computer and use it in GitHub Desktop.
import warnings
warnings.filterwarnings("ignore") #the append method is depreciated so we kill all warnings to
#keep your terminal looking clean and sharp. Be careful when using this in other scenarios though
#knowing what warnings say might help in debugging.
subreddits = ['investing', 'wallstreetbets','StockMarket'] #subreddits
all_df = pd.DataFrame({ #the final df
'name':[],
'created_utc': [],
'subreddit':[],
'title':[],
'selftext':[],
'upvote_ratio':[],
'ups':[],
'downs':[],
'score':[]
})
for sub in subreddits:
print(f'downloading messages from {sub}')
df = pd.DataFrame({
'name':[],
'created_utc': [],
'subreddit':[],
'title':[],
'selftext':[],
'upvote_ratio':[],
'ups':[],
'downs':[],
'score':[]
})
while True:
try: #to catch the call back of if df didnt exist before
res = requests.get(f'{api}/r/{sub}/new', headers = headers,
params = {'limit': '100', 'after':df['name'].iloc[len(df) - 1]})
except:
res = requests.get(f'{api}/r/{sub}/new', headers = headers,
params = {'limit': '100'})
if len(res.json()['data']['children']) == 0 :
break
for post in res.json()['data']['children']:
df = df.append({
'name':post['data']['name'],
'created_utc': post['data']['created_utc'],
'subreddit':post['data']['subreddit'],
'title':post['data']['title'],
'selftext':post['data']['selftext'],
'upvote_ratio':post['data']['upvote_ratio'],
'ups':post['data']['ups'],
'downs':post['data']['downs'],
'score':post['data']['score']
}, ignore_index = True)
all_df = pd.concat([all_df, df], axis = 'rows', ignore_index = True)
print('Done!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment