Created
August 18, 2012 15:00
-
-
Save kvanbere/3387295 to your computer and use it in GitHub Desktop.
Loads X number of lines off the public Twitter stream
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# by Kyle Van Berendonck | |
# kvanberendonck@gmail.com | |
# | |
from urllib2 import * | |
import sys | |
import json | |
import difflib | |
# Construct Authentication Manager | |
print "Initialising connection to Twitter (1/3) Setup" | |
pwd_mngr = HTTPPasswordMgrWithDefaultRealm() | |
twi_url = "https://stream.twitter.com/1/statuses/sample.json" | |
pwd_mngr.add_password(None, twi_url, 'username', 'password') # Change there | |
# Do Pre-Authentication | |
print "Initialising connection to Twitter (2/3) Authentication" | |
twi_hndlr = HTTPBasicAuthHandler(pwd_mngr) | |
# Do Connection | |
print "Initialising connection to Twitter (3/3) Connection" | |
opn = build_opener(twi_hndlr) | |
pg = opn.open(twi_url) | |
tweets = [] | |
# Loop over all lines | |
n = 0 | |
while 1: | |
n += 1 | |
if n > 30: # Read 30 latest lines | |
break | |
# Read Line from JSON Stream | |
print "Reading Line (" + str(n) + "/30)" | |
line = pg.readline() | |
try: | |
# Load Next JSON Line | |
tweet = json.loads(line) | |
# Ignore Re-Tweets (1) | |
if tweet.has_key("retweeted_status") or not tweet.has_key("text"): continue | |
# Fetch text from tweet | |
text = unquote(str(tweet["text"])).lower() | |
# Ignore Re-Tweets (2) | |
if text.find("rt ") > -1: | |
continue | |
# Append to Array | |
tweets.append( text ) | |
except ValueError: | |
pass | |
print "Dumping Data..." | |
for tweet in tweets: | |
print "Tweet:", tweet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment