odunola499/final_script.py

## final_script.py
import warnings
warnings.filterwarnings("ignore") #the append method is depreciated so we kill all warnings to
#keep your terminal looking clean and sharp. Be careful when using this in other scenarios though
#knowing what warnings say might help in debugging.

subreddits = ['investing', 'wallstreetbets','StockMarket'] #subreddits
all_df = pd.DataFrame({ #the final df
    'name':[],
    'created_utc': [],
    'subreddit':[],
    'title':[],
    'selftext':[],
    'upvote_ratio':[],
    'ups':[],
    'downs':[],
    'score':[]
})
for sub in subreddits:
    print(f'downloading messages from {sub}')
    df = pd.DataFrame({
    'name':[],
    'created_utc': [],
    'subreddit':[],
    'title':[],
    'selftext':[],
    'upvote_ratio':[],
    'ups':[],
    'downs':[],
    'score':[]
        })

    while True:
        try: #to catch the call back of if df didnt exist before
            res = requests.get(f'{api}/r/{sub}/new', headers = headers,
            params = {'limit': '100', 'after':df['name'].iloc[len(df) - 1]})
        except:
            res = requests.get(f'{api}/r/{sub}/new', headers = headers,
            params = {'limit': '100'})

        if len(res.json()['data']['children']) == 0 :
            break

        for post in res.json()['data']['children']:

            df = df.append({
                'name':post['data']['name'],
            'created_utc': post['data']['created_utc'],
            'subreddit':post['data']['subreddit'],
            'title':post['data']['title'],
            'selftext':post['data']['selftext'],
            'upvote_ratio':post['data']['upvote_ratio'],
            'ups':post['data']['ups'],
            'downs':post['data']['downs'],
            'score':post['data']['score']
            }, ignore_index = True)

    all_df = pd.concat([all_df, df], axis = 'rows', ignore_index = True)
print('Done!')
	import warnings
	warnings.filterwarnings("ignore") #the append method is depreciated so we kill all warnings to
	#keep your terminal looking clean and sharp. Be careful when using this in other scenarios though
	#knowing what warnings say might help in debugging.

	subreddits = ['investing', 'wallstreetbets','StockMarket'] #subreddits
	all_df = pd.DataFrame({ #the final df
	'name':[],
	'created_utc': [],
	'subreddit':[],
	'title':[],
	'selftext':[],
	'upvote_ratio':[],
	'ups':[],
	'downs':[],
	'score':[]
	})
	for sub in subreddits:
	print(f'downloading messages from {sub}')
	df = pd.DataFrame({
	'name':[],
	'created_utc': [],
	'subreddit':[],
	'title':[],
	'selftext':[],
	'upvote_ratio':[],
	'ups':[],
	'downs':[],
	'score':[]
	})

	while True:
	try: #to catch the call back of if df didnt exist before
	res = requests.get(f'{api}/r/{sub}/new', headers = headers,
	params = {'limit': '100', 'after':df['name'].iloc[len(df) - 1]})
	except:
	res = requests.get(f'{api}/r/{sub}/new', headers = headers,
	params = {'limit': '100'})

	if len(res.json()['data']['children']) == 0 :
	break

	for post in res.json()['data']['children']:

	df = df.append({
	'name':post['data']['name'],
	'created_utc': post['data']['created_utc'],
	'subreddit':post['data']['subreddit'],
	'title':post['data']['title'],
	'selftext':post['data']['selftext'],
	'upvote_ratio':post['data']['upvote_ratio'],
	'ups':post['data']['ups'],
	'downs':post['data']['downs'],
	'score':post['data']['score']
	}, ignore_index = True)

	all_df = pd.concat([all_df, df], axis = 'rows', ignore_index = True)
	print('Done!')