Skip to content

Instantly share code, notes, and snippets.

@nikolak
Created April 12, 2013 21:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save nikolak/5375314 to your computer and use it in GitHub Desktop.
Save nikolak/5375314 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
#
# Script Name: getRedditJSONSubmissionData.py
# Usage: ./getRedditJSONSubmissionData.py > redditData.json
# ----------------------------------------------------------------------------
# This script will average one request every two seconds.
# If the servers return data faster, you might
# need to change the sleep time to avoid going over the API limits.
# Also, make sure you change the settings in your Reddit account
# to get 100 objects at a time. You can also use the URL variable "limit=100"
#(it might be count=100?)
#
# Also, the code to handle errors if a non-status 200 response is
# received should be improved to
# eventually stop requesting after X amount of failures --
# this might happen if Reddit's servers go down
# for an extended time period.
# ---------------------------------------------------------------------------
import json
import time
import argparse
import requests
# ===== To modify ======
user_agent = "My Awesome Reddit Python Script"
maximum_retries = 5
debug = False
# ======================
base_url = "http://www.reddit.com/r/{sub}/new/.json?limit=100&after={aft}"
def main(username, password, subreddit, output_file, mode):
after = ""
if username is None or password is None:
session = requests.Session()
session.headers.update({'User-Agent': user_agent})
else:
user_pass_dict = {'user': username,
'passwd': password,
'api_type': 'json'}
session = requests.Session()
session.headers.update({'User-Agent': user_agent})
request = session.post(r'http://www.reddit.com/api/login',
data=user_pass_dict)
json_data = json.loads(request.content) # ?
current_retries = 0 # Count of how many times previous request failed.
while True:
output = open(output_file, mode)
# Not using with open(file...) to avoid
# constantly opening and closing file
# raises IOError if it can't access/open file
time.sleep(2) # Sleep for 2 seconds to avoid going over API limit
url = base_url.format(sub=subreddit, aft=after)
print("Getting data from: {} ...".format(url))
html = session.get(url) # Make request to Reddit API
if html.status_code != 200:
# Error handing block
output.write(str(html.status_code) + '\n' + url + '\n')
# Print HTTP error status code to STDOUT
current_retries += 1
print("Retrying {} | {}/{}".format(url, current_retries,
maximum_retries))
if current_retries >= maximum_retries:
after = ""
current_retries = 0
# End Error handling block
else:
try:
url_data = json.loads(html.content)
after = url_data['data']['after']
# Update after variable to receive the
# next batch of submissions in this loop
for submission in url_data['data']['children']:
out_string = "Submission in {} by {}".format(
submission['data']['subreddit'],
submission['data']['author'])
output.write(out_string+"\n")
print(out_string)
except:
print("Error")
if current_retries > 0:
current_retries = 0
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Python Reddit Script')
# parser.add_argument('-a', action="store_true", default=False)
parser.add_argument('-u', action="store", dest="username", default=None,
help='Yur reddit username')
parser.add_argument('-p', action="store", dest="password", default=None,
help='Your reddit password')
parser.add_argument('-r', action="store", dest="subreddit", default="all",
help='Subreddit to fetch data from')
parser.add_argument('-m', action="store", dest="mode", default="a",
help='File mode, "a" will append to previous file "r"\
" will overwrite old data if there is any')
parser.add_argument('filename', action="store",
help='Filename to save data to.')
arg = parser.parse_args()
main(arg.username, arg.password, arg.subreddit, arg.filename, arg.mode)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment