veev/get_superbowl_tweets.py

## get_superbowl_tweets.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import json
import urllib2
import base64
import xml
import sys
import time
import sched

def post_getTweets(ruleQuery, fromDate, toDate, nextURL):

    url = 'https://gnip-api.twitter.com/search/fullarchive/accounts/Twitter-Marketing-Manifold/prod.json'
    UN = 'email@address'
    PWD = 'XXXXPasswordXXXX'

    rule = ruleQuery
    query = ''

    if (nextURL == ''):
        query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","maxResults":"500"}'
    else:
        query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","next":"' + nextURL + '", "maxResults":"500"}'

    #print query

    base64string = base64.encodestring('%s:%s' % (UN, PWD)).replace('\n', '')
    req = urllib2.Request(url=url, data=query)
    req.add_header('Content-type', 'application/json')
    req.add_header("Authorization", "Basic %s" % base64string)

    try:
        response = urllib2.urlopen(req)
        the_page = response.read()
        return the_page
    except urllib2.HTTPError as e:
        print e.read()
        return e.read()


def post_getCounts(ruleQuery, fromDate, toDate):

    url = 'https://gnip-api.twitter.com/search/fullarchive/accounts/Twitter-Marketing-Manifold/prod/counts.json'
    UN = 'email@address'
    PWD = 'XXXXPasswordXXXX'

    rule = ruleQuery
    query = ''

    query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","bucket":"day"}'

    #print query

    base64string = base64.encodestring('%s:%s' % (UN, PWD)).replace('\n', '')
    req = urllib2.Request(url=url, data=query)
    req.add_header('Content-type', 'application/json')
    req.add_header("Authorization", "Basic %s" % base64string)

    try:
        response = urllib2.urlopen(req)
        the_page = response.read()
        return the_page
    except urllib2.HTTPError as e:
        print e.read()
        return e.read()


def get_tweets(prefix, rule, fromDate, toDate, nextUrl, startCount = 0):
    counter = startCount
    moreTweets = True
    while moreTweets:
        print prefix + " page " + str(counter)
        results = post_getTweets(rule, fromDate, toDate, nextUrl)

        try:
            data = json.loads(results)
            tweetStart = str(500 * counter)
            tweetEnd = str(500 * (counter + 1))

            with open('data/' + prefix + '_' + tweetStart + '-' + tweetEnd + '.json', 'w') as outfile:
                json.dump(data, outfile, indent=1)

            if 'next' in data:
                nextUrl = data['next']
                counter =  counter + 1
                moreTweets = True
            else:
                moreTweets = False

        except Exception as e:
            print e
            moreTweets = False

    print "Done!"


def get_counts(rule, fromDate, toDate):
    results = post_getCounts(rule, fromDate, toDate)
    print results


query = "superbowl OR Super Bowl OR SB50 OR Broncos OR BRONCOS OR PANTHERS OR SuperBowl"
prefix = "superbowl"

get_tweets(prefix, query, 201602020000, 201602090000, "", 0)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import os
	import json
	import urllib2
	import base64
	import xml
	import sys
	import time
	import sched

	def post_getTweets(ruleQuery, fromDate, toDate, nextURL):

	url = 'https://gnip-api.twitter.com/search/fullarchive/accounts/Twitter-Marketing-Manifold/prod.json'
	UN = 'email@address'
	PWD = 'XXXXPasswordXXXX'

	rule = ruleQuery
	query = ''

	if (nextURL == ''):
	query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","maxResults":"500"}'
	else:
	query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","next":"' + nextURL + '", "maxResults":"500"}'

	#print query

	base64string = base64.encodestring('%s:%s' % (UN, PWD)).replace('\n', '')
	req = urllib2.Request(url=url, data=query)
	req.add_header('Content-type', 'application/json')
	req.add_header("Authorization", "Basic %s" % base64string)

	try:
	response = urllib2.urlopen(req)
	the_page = response.read()
	return the_page
	except urllib2.HTTPError as e:
	print e.read()
	return e.read()


	def post_getCounts(ruleQuery, fromDate, toDate):

	url = 'https://gnip-api.twitter.com/search/fullarchive/accounts/Twitter-Marketing-Manifold/prod/counts.json'
	UN = 'email@address'
	PWD = 'XXXXPasswordXXXX'

	rule = ruleQuery
	query = ''

	query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","bucket":"day"}'

	#print query

	base64string = base64.encodestring('%s:%s' % (UN, PWD)).replace('\n', '')
	req = urllib2.Request(url=url, data=query)
	req.add_header('Content-type', 'application/json')
	req.add_header("Authorization", "Basic %s" % base64string)

	try:
	response = urllib2.urlopen(req)
	the_page = response.read()
	return the_page
	except urllib2.HTTPError as e:
	print e.read()
	return e.read()


	def get_tweets(prefix, rule, fromDate, toDate, nextUrl, startCount = 0):
	counter = startCount
	moreTweets = True
	while moreTweets:
	print prefix + " page " + str(counter)
	results = post_getTweets(rule, fromDate, toDate, nextUrl)

	try:
	data = json.loads(results)
	tweetStart = str(500 * counter)
	tweetEnd = str(500 * (counter + 1))

	with open('data/' + prefix + '_' + tweetStart + '-' + tweetEnd + '.json', 'w') as outfile:
	json.dump(data, outfile, indent=1)

	if 'next' in data:
	nextUrl = data['next']
	counter = counter + 1
	moreTweets = True
	else:
	moreTweets = False

	except Exception as e:
	print e
	moreTweets = False

	print "Done!"


	def get_counts(rule, fromDate, toDate):
	results = post_getCounts(rule, fromDate, toDate)
	print results


	query = "superbowl OR Super Bowl OR SB50 OR Broncos OR BRONCOS OR PANTHERS OR SuperBowl"
	prefix = "superbowl"

	get_tweets(prefix, query, 201602020000, 201602090000, "", 0)