nikolak/l1.py

## l1.py
#!/usr/bin/python
#
# Script Name: getRedditJSONSubmissionData.py
# Usage: ./getRedditJSONSubmissionData.py > redditData.json
# ----------------------------------------------------------------------------
# This script will average one request every two seconds.
# If the servers return data faster, you might
# need to change the sleep time to avoid going over the API limits.
# Also, make sure you change the settings in your Reddit account
# to get 100 objects at a time.  You can also use the URL variable "limit=100"
#(it might be count=100?)
#
# Also, the code to handle errors if a non-status 200 response is
# received should be improved to
# eventually stop requesting after X amount of failures --
# this might happen if Reddit's servers go down
# for an extended time period.
# ---------------------------------------------------------------------------
import json
import time
import argparse
import requests

# ===== To modify ======
user_agent = "My Awesome Reddit Python Script"
maximum_retries = 5
debug = False
# ======================

base_url = "http://www.reddit.com/r/{sub}/new/.json?limit=100&after={aft}"


def main(username, password, subreddit, output_file, mode):
    after = ""
    if username is None or password is None:
        session = requests.Session()
        session.headers.update({'User-Agent': user_agent})
    else:
        user_pass_dict = {'user': username,
                          'passwd': password,
                          'api_type': 'json'}
        session = requests.Session()
        session.headers.update({'User-Agent': user_agent})
        request = session.post(r'http://www.reddit.com/api/login',
                               data=user_pass_dict)
        json_data = json.loads(request.content)  # ?
    current_retries = 0  # Count of how many times previous request failed.
    while True:
        output = open(output_file, mode)
        # Not using with open(file...) to avoid
        # constantly opening and closing file
        # raises IOError if it can't access/open file
        time.sleep(2)  # Sleep for 2 seconds to avoid going over API limit
        url = base_url.format(sub=subreddit, aft=after)
        print("Getting data from: {} ...".format(url))
        html = session.get(url)  # Make request to Reddit API
        if html.status_code != 200:
            # Error handing block
            output.write(str(html.status_code) + '\n' + url + '\n')
            # Print HTTP error status code to STDOUT
            current_retries += 1
            print("Retrying {} | {}/{}".format(url, current_retries,
                                               maximum_retries))
            if current_retries >= maximum_retries:
                after = ""
                current_retries = 0
            # End Error handling block
        else:
            try:
                url_data = json.loads(html.content)
                after = url_data['data']['after']
                # Update after variable to receive the
                # next batch of submissions in this loop
                for submission in url_data['data']['children']:
                    out_string = "Submission in {} by {}".format(
                        submission['data']['subreddit'],
                        submission['data']['author'])
                output.write(out_string+"\n")
                print(out_string)
            except:
                print("Error")

            if current_retries > 0:
                current_retries = 0

if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Python Reddit Script')
    # parser.add_argument('-a', action="store_true", default=False)
    parser.add_argument('-u', action="store", dest="username", default=None,
                        help='Yur reddit username')
    parser.add_argument('-p', action="store", dest="password", default=None,
                        help='Your reddit password')
    parser.add_argument('-r', action="store", dest="subreddit", default="all",
                        help='Subreddit to fetch data from')
    parser.add_argument('-m', action="store", dest="mode", default="a",
                        help='File mode, "a" will append to previous file "r"\
                       " will overwrite old data if there is any')
    parser.add_argument('filename', action="store",
                        help='Filename to save data to.')

    arg = parser.parse_args()

    main(arg.username, arg.password, arg.subreddit, arg.filename, arg.mode)
	#!/usr/bin/python
	#
	# Script Name: getRedditJSONSubmissionData.py
	# Usage: ./getRedditJSONSubmissionData.py > redditData.json
	# ----------------------------------------------------------------------------
	# This script will average one request every two seconds.
	# If the servers return data faster, you might
	# need to change the sleep time to avoid going over the API limits.
	# Also, make sure you change the settings in your Reddit account
	# to get 100 objects at a time. You can also use the URL variable "limit=100"
	#(it might be count=100?)
	#
	# Also, the code to handle errors if a non-status 200 response is
	# received should be improved to
	# eventually stop requesting after X amount of failures --
	# this might happen if Reddit's servers go down
	# for an extended time period.
	# ---------------------------------------------------------------------------
	import json
	import time
	import argparse
	import requests

	# ===== To modify ======
	user_agent = "My Awesome Reddit Python Script"
	maximum_retries = 5
	debug = False
	# ======================

	base_url = "http://www.reddit.com/r/{sub}/new/.json?limit=100&after={aft}"


	def main(username, password, subreddit, output_file, mode):
	after = ""
	if username is None or password is None:
	session = requests.Session()
	session.headers.update({'User-Agent': user_agent})
	else:
	user_pass_dict = {'user': username,
	'passwd': password,
	'api_type': 'json'}
	session = requests.Session()
	session.headers.update({'User-Agent': user_agent})
	request = session.post(r'http://www.reddit.com/api/login',
	data=user_pass_dict)
	json_data = json.loads(request.content) # ?
	current_retries = 0 # Count of how many times previous request failed.
	while True:
	output = open(output_file, mode)
	# Not using with open(file...) to avoid
	# constantly opening and closing file
	# raises IOError if it can't access/open file
	time.sleep(2) # Sleep for 2 seconds to avoid going over API limit
	url = base_url.format(sub=subreddit, aft=after)
	print("Getting data from: {} ...".format(url))
	html = session.get(url) # Make request to Reddit API
	if html.status_code != 200:
	# Error handing block
	output.write(str(html.status_code) + '\n' + url + '\n')
	# Print HTTP error status code to STDOUT
	current_retries += 1
	print("Retrying {} \| {}/{}".format(url, current_retries,
	maximum_retries))
	if current_retries >= maximum_retries:
	after = ""
	current_retries = 0
	# End Error handling block
	else:
	try:
	url_data = json.loads(html.content)
	after = url_data['data']['after']
	# Update after variable to receive the
	# next batch of submissions in this loop
	for submission in url_data['data']['children']:
	out_string = "Submission in {} by {}".format(
	submission['data']['subreddit'],
	submission['data']['author'])
	output.write(out_string+"\n")
	print(out_string)
	except:
	print("Error")

	if current_retries > 0:
	current_retries = 0

	if __name__ == '__main__':

	parser = argparse.ArgumentParser(description='Python Reddit Script')
	# parser.add_argument('-a', action="store_true", default=False)
	parser.add_argument('-u', action="store", dest="username", default=None,
	help='Yur reddit username')
	parser.add_argument('-p', action="store", dest="password", default=None,
	help='Your reddit password')
	parser.add_argument('-r', action="store", dest="subreddit", default="all",
	help='Subreddit to fetch data from')
	parser.add_argument('-m', action="store", dest="mode", default="a",
	help='File mode, "a" will append to previous file "r"\
	" will overwrite old data if there is any')
	parser.add_argument('filename', action="store",
	help='Filename to save data to.')

	arg = parser.parse_args()

	main(arg.username, arg.password, arg.subreddit, arg.filename, arg.mode)