Skip to content

Instantly share code, notes, and snippets.

@vgoklani
Created November 11, 2011 18:31
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vgoklani/1358795 to your computer and use it in GitHub Desktop.
Save vgoklani/1358795 to your computer and use it in GitHub Desktop.
Twitter Streaming via Python and pycurl + filters by Klout score
#!/usr/bin/env python
import pycurl, urllib, json, sys, httplib2
'''
curl -d 'track=google' https://stream.twitter.com/1/statuses/filter.json -uusername:password
http://dev.twitter.com/pages/streaming_api_methods
https://gist.github.com/1046726
http://www.angryobjects.com/2011/10/15/http-with-python-pycurl-by-example/
'''
FIREHOSE_URL = "https://stream.twitter.com/1/statuses/%s.json"
KLOUT_API_KEY = ''
TWITTER_USERNAME = ''
TWITTER_PASSWORD = ''
nozzle = ['filter', 'firehose', 'links', 'retweet', 'sample']
f = []
g = []
http = httplib2.Http()
def attach_nozzle(nozzle, hose, args, username, password):
nozzle_url = FIREHOSE_URL % nozzle
conn = pycurl.Curl()
conn.setopt(pycurl.USERPWD, "%s:%s" % (username, password))
conn.setopt(pycurl.URL, nozzle_url)
conn.setopt(pycurl.WRITEFUNCTION, hose)
conn.setopt(pycurl.POSTFIELDS, urllib.urlencode(args))
#conn.setopt(pycurl.CONNECTTIMEOUT, 15)
#conn.setopt(pycurl.TIMEOUT, 25)
try:
conn.perform()
except pycurl.error, err:
#errno, errstr = error
sys.stderr.write('ERROR -> 0: %s\n' % str(err))
except Exception, err:
sys.stderr.write('ERROR -> 1: %s\n' % str(err))
def to_screen(data):
try:
print json.dumps(json.loads(data), sort_keys=True, indent=1)
except ValueError, err:
sys.stderr.write('ERROR -> 2: %s\n' % str(err))
print json.dumps(data, skipkeys=True)
except TypeError, err:
sys.stderr.write('ERROR -> 3: %s\n' % str(err))
except Exception, err:
sys.stderr.write('ERROR -> 4: %s\n' % str(err))
def to_file(data):
try:
j = json.loads(data)
if('screen_name' in j['user'] and 'text' in j):
username = j['user']['screen_name']
kloutscore = getKloutScore(username)
if(float(kloutscore) > 40):
json.dump(j, f)
json.dump(j, g, sort_keys=True, indent=1)
print str(username) + '\t' + str(kloutscore) + '\t' + ' \t=> ' + j['text'] + '\n'
if(len(j['entities']['urls']) > 0):
print '\n\t' + '==> ' + str(expandURL(j['entities']['urls'][0]['expanded_url']))
except ValueError, err:
sys.stderr.write('ERROR -> 2: %s\n' % str(err))
print json.dumps(data, skipkeys=True)
except TypeError, err:
sys.stderr.write('ERROR -> 3: %s\n' % str(err))
except Exception, err:
sys.stderr.write('ERROR -> 4: %s\n' % str(err))
finally:
f.flush()
def getKloutScore(username):
url = 'https://api.klout.com/1/klout.json?users=' + username + '&key=' + KLOUT_API_KEY
response, content = http.request(url)
j = json.loads(content)
return j['users'][0]['kscore']
def expandURL(url):
return (urllib.urlopen(url)).geturl()
if __name__ == '__main__':
if(len(sys.argv) == 2):
username = TWITTER_USERNAME
password = TWITTER_PASSWORD
args = {'track':sys.argv[1]}
else:
print 'python firehose.py query'
sys.exit(0)
f = open(args['track'] + '.json', 'w+')
g = open(args['track'] + '_pp' + '.json', 'w+')
attach_nozzle(nozzle[0], to_file, args, username, password)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment