pcote/top_200_pony_entries.py

## top_200_pony_entries.py
#top_200_pony_entries.py
import urllib
from pdb import set_trace
import time
import MySQLdb
import json
import pickle

mysql = MySQLdb

def store_to_database( submission_list ):
    conn = mysql.connect( user='', db='', passwd = '', host='' )
    csr = conn.cursor()

    for submission in submission_list:
        permalink = submission['permalink']
        title = submission['title'].encode( 'cp1252' )
        url = submission['url']
        author = submission['author'].encode( 'cp1252' )
        num_comments = int(submission['num_comments'])
        sql_query = """insert into submissions values( %s, %s, %s, %s, %s )"""
        try:
            res = csr.execute( sql_query, ( permalink, title, url, author, str(num_comments) ) )
        except UnicodeEncodeError as e:
            print( "unicode emergency: please hold" )

    csr.close()

def grab_100_entries( after=None):
    count_set = ( 0, )
    base_url = "http://www.reddit.com/r/mylittlepony.json?limit=100"
    # url assembly
    offset_count_arg = ""
    if after != None:
        offset_count_arg = "&after=%s" % after
    full_url = base_url + offset_count_arg

    # get the data and tidy it up
    sock = urllib.urlopen( full_url )
    raw_data = sock.read()
    string_data = raw_data.decode( "cp1252" )
    json_data = json.loads( string_data )
    return json_data

def store_data( json_coll ):

    submission_list = []
    # clean it up into a nice simplified list.
    submissions = json_coll['data']['children']
    submissions = [ sub['data'] for sub in submissions ]
    submission_list.extend( submissions )

    # now we can dump it to file
    pickle_file = open( "pony_entries", "w" )
    pickle.dump( submission_list, pickle_file )

def clear_old_stuff( csr ):
    csr.execute("delete from submissions")


json_coll = grab_100_entries()
store_data( json_coll )

# note: no need to grab entries when I can just a previous fetch from a pickled version.
pickle_file = open( "pony_entries" )
submission_list = pickle.load( pickle_file )
store_to_database( submission_list )
after_arg = json_coll['data']['after']

time.sleep(2)
json_coll = grab_100_entries(after=after_arg)
store_data( json_coll )

# note: no need to grab entries when I can just a previous fetch from a pickled version.
pickle_file = open( "pony_entries" )
submission_list = pickle.load( pickle_file )
store_to_database( submission_list )
	#top_200_pony_entries.py
	import urllib
	from pdb import set_trace
	import time
	import MySQLdb
	import json
	import pickle

	mysql = MySQLdb

	def store_to_database( submission_list ):
	conn = mysql.connect( user='', db='', passwd = '', host='' )
	csr = conn.cursor()

	for submission in submission_list:
	permalink = submission['permalink']
	title = submission['title'].encode( 'cp1252' )
	url = submission['url']
	author = submission['author'].encode( 'cp1252' )
	num_comments = int(submission['num_comments'])
	sql_query = """insert into submissions values( %s, %s, %s, %s, %s )"""
	try:
	res = csr.execute( sql_query, ( permalink, title, url, author, str(num_comments) ) )
	except UnicodeEncodeError as e:
	print( "unicode emergency: please hold" )

	csr.close()

	def grab_100_entries( after=None):
	count_set = ( 0, )
	base_url = "http://www.reddit.com/r/mylittlepony.json?limit=100"
	# url assembly
	offset_count_arg = ""
	if after != None:
	offset_count_arg = "&after=%s" % after
	full_url = base_url + offset_count_arg

	# get the data and tidy it up
	sock = urllib.urlopen( full_url )
	raw_data = sock.read()
	string_data = raw_data.decode( "cp1252" )
	json_data = json.loads( string_data )
	return json_data

	def store_data( json_coll ):

	submission_list = []
	# clean it up into a nice simplified list.
	submissions = json_coll['data']['children']
	submissions = [ sub['data'] for sub in submissions ]
	submission_list.extend( submissions )

	# now we can dump it to file
	pickle_file = open( "pony_entries", "w" )
	pickle.dump( submission_list, pickle_file )

	def clear_old_stuff( csr ):
	csr.execute("delete from submissions")


	json_coll = grab_100_entries()
	store_data( json_coll )

	# note: no need to grab entries when I can just a previous fetch from a pickled version.
	pickle_file = open( "pony_entries" )
	submission_list = pickle.load( pickle_file )
	store_to_database( submission_list )
	after_arg = json_coll['data']['after']

	time.sleep(2)
	json_coll = grab_100_entries(after=after_arg)
	store_data( json_coll )

	# note: no need to grab entries when I can just a previous fetch from a pickled version.
	pickle_file = open( "pony_entries" )
	submission_list = pickle.load( pickle_file )
	store_to_database( submission_list )