Skip to content

Instantly share code, notes, and snippets.

@9b
Created March 21, 2012 03:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 9b/2144144 to your computer and use it in GitHub Desktop.
Save 9b/2144144 to your computer and use it in GitHub Desktop.
Parse the Twitter stream for #tibet or #freetibet and identify potential spammers based on static traits.
import pycurl
import simplejson as json
import urllib
import time
import datetime
STREAM_URL = "https://stream.twitter.com/1/statuses/filter.json"
USER = "YOUR USERNAME"
PASS = "YOUR PASSWORD"
SPAMMERS = []
data = { "track":"tibet,freetibet" }
data = urllib.urlencode(data)
def spam_sent(data):
do = None
def handle_spam(user):
if user not in SPAMMERS:
conn = pycurl.Curl()
conn.setopt(pycurl.USERPWD, "%s:%s" % (USER, PASS))
conn.setopt(pycurl.URL, "http://api.twitter.com/1/report_spam.json")
conn.setopt(pycurl.POSTFIELDS,urllib.urlencode({"screen_name":user}))
conn.setopt(pycurl.WRITEFUNCTION, spam_sent)
conn.perform()
if conn.getinfo(pycurl.HTTP_CODE) == 200:
SPAMMERS.append(user)
print "== ABUSE SENT for " + user + " =="
def on_receive(data):
try:
d = json.loads(data)
followers = d['user']['followers_count']
birth = time.strftime('%Y-%m-%d', time.strptime(d['user']['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
user = d['user']['screen_name']
current = datetime.datetime.now()
birth = datetime.datetime.strptime(birth,"%Y-%m-%d")
delta = current - birth
tweet = d['text']
if user not in SPAMMERS:
if (followers >= 5 or delta.days >= 5) and (len(tweet) > 10):
tweet_time = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(d['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
print user + ": " + tweet + " - " + tweet_time
else:
print "!POTENTIAL SPAMMER - " + user + " flagged: FOLLOWERS - " + str(followers) + " ALIVE - " + str(delta.days) + " days TWEET LENGTH - " + str(len(tweet))
handle_spam(user)
except:
print "!TWITTER ERROR!"
conn = pycurl.Curl()
conn.setopt(pycurl.USERPWD, "%s:%s" % (USER, PASS))
conn.setopt(pycurl.URL, STREAM_URL)
conn.setopt(pycurl.WRITEFUNCTION, on_receive)
conn.setopt(pycurl.POSTFIELDS,data)
conn.perform()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment