Skip to content

Instantly share code, notes, and snippets.

@kvanbere
Created August 18, 2012 15:00
Show Gist options
  • Save kvanbere/3387295 to your computer and use it in GitHub Desktop.
Save kvanbere/3387295 to your computer and use it in GitHub Desktop.
Loads X number of lines off the public Twitter stream
#
# by Kyle Van Berendonck
# kvanberendonck@gmail.com
#
from urllib2 import *
import sys
import json
import difflib
# Construct Authentication Manager
print "Initialising connection to Twitter (1/3) Setup"
pwd_mngr = HTTPPasswordMgrWithDefaultRealm()
twi_url = "https://stream.twitter.com/1/statuses/sample.json"
pwd_mngr.add_password(None, twi_url, 'username', 'password') # Change there
# Do Pre-Authentication
print "Initialising connection to Twitter (2/3) Authentication"
twi_hndlr = HTTPBasicAuthHandler(pwd_mngr)
# Do Connection
print "Initialising connection to Twitter (3/3) Connection"
opn = build_opener(twi_hndlr)
pg = opn.open(twi_url)
tweets = []
# Loop over all lines
n = 0
while 1:
n += 1
if n > 30: # Read 30 latest lines
break
# Read Line from JSON Stream
print "Reading Line (" + str(n) + "/30)"
line = pg.readline()
try:
# Load Next JSON Line
tweet = json.loads(line)
# Ignore Re-Tweets (1)
if tweet.has_key("retweeted_status") or not tweet.has_key("text"): continue
# Fetch text from tweet
text = unquote(str(tweet["text"])).lower()
# Ignore Re-Tweets (2)
if text.find("rt ") > -1:
continue
# Append to Array
tweets.append( text )
except ValueError:
pass
print "Dumping Data..."
for tweet in tweets:
print "Tweet:", tweet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment