philshem/twitter_search.py

## twitter_search.py
import json
import twitter # https://github.com/bear/python-twitter
import time

def main():

    api = twitter.Api(consumer_key='INSERT', \
        consumer_secret='INSERT', \
        access_token_key='INSERT', \
        access_token_secret='INSERT')

    loops = 5000

    with open('tweets.json','wb') as outfile: # json mirror
        for search_string in ['advisor OR adviser']:
            maxid = 1000000000000000000
            for i in xrange(0,loops):
                print 'maxid=',maxid,', twitter loop',i

                results = api.GetSearch(search_string, count=100,max_id = maxid)

                tmpid = maxid
                for tweet in results:
                    tweet = str(tweet).replace('\n',' ').replace('\r',' ')

                    tweet = (json.loads(tweet))
                    maxid = int(tweet['id'])

                    json.dump(tweet,outfile)
                    outfile.write('\n')

                time.sleep(5) # don't piss off twitter

                if tmpid == maxid:
                    print maxid,tmpid,'break'
                    break # break out of search loop if results aren't updated (maximum is about 1 week)

if __name__ == "__main__":
    main()
    print 'done with twitter.'
	import json
	import twitter # https://github.com/bear/python-twitter
	import time

	def main():

	api = twitter.Api(consumer_key='INSERT', \
	consumer_secret='INSERT', \
	access_token_key='INSERT', \
	access_token_secret='INSERT')

	loops = 5000

	with open('tweets.json','wb') as outfile: # json mirror
	for search_string in ['advisor OR adviser']:
	maxid = 1000000000000000000
	for i in xrange(0,loops):
	print 'maxid=',maxid,', twitter loop',i

	results = api.GetSearch(search_string, count=100,max_id = maxid)

	tmpid = maxid
	for tweet in results:
	tweet = str(tweet).replace('\n',' ').replace('\r',' ')

	tweet = (json.loads(tweet))
	maxid = int(tweet['id'])

	json.dump(tweet,outfile)
	outfile.write('\n')

	time.sleep(5) # don't piss off twitter

	if tmpid == maxid:
	print maxid,tmpid,'break'
	break # break out of search loop if results aren't updated (maximum is about 1 week)

	if __name__ == "__main__":
	main()
	print 'done with twitter.'