public
Created

Parse the Twitter stream for #tibet or #freetibet and identify potential spammers based on static traits.

  • Download Gist
clean_tibet.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
import pycurl
import simplejson as json
import urllib
import time
import datetime
STREAM_URL = "https://stream.twitter.com/1/statuses/filter.json"
USER = "YOUR USERNAME"
PASS = "YOUR PASSWORD"
SPAMMERS = []
data = { "track":"tibet,freetibet" }
data = urllib.urlencode(data)
 
def spam_sent(data):
do = None
 
def handle_spam(user):
if user not in SPAMMERS:
conn = pycurl.Curl()
conn.setopt(pycurl.USERPWD, "%s:%s" % (USER, PASS))
conn.setopt(pycurl.URL, "http://api.twitter.com/1/report_spam.json")
conn.setopt(pycurl.POSTFIELDS,urllib.urlencode({"screen_name":user}))
conn.setopt(pycurl.WRITEFUNCTION, spam_sent)
conn.perform()
if conn.getinfo(pycurl.HTTP_CODE) == 200:
SPAMMERS.append(user)
print "== ABUSE SENT for " + user + " =="
 
def on_receive(data):
try:
d = json.loads(data)
followers = d['user']['followers_count']
birth = time.strftime('%Y-%m-%d', time.strptime(d['user']['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
user = d['user']['screen_name']
current = datetime.datetime.now()
birth = datetime.datetime.strptime(birth,"%Y-%m-%d")
delta = current - birth
tweet = d['text']
if user not in SPAMMERS:
if (followers >= 5 or delta.days >= 5) and (len(tweet) > 10):
tweet_time = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(d['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
print user + ": " + tweet + " - " + tweet_time
else:
print "!POTENTIAL SPAMMER - " + user + " flagged: FOLLOWERS - " + str(followers) + " ALIVE - " + str(delta.days) + " days TWEET LENGTH - " + str(len(tweet))
handle_spam(user)
except:
print "!TWITTER ERROR!"
 
conn = pycurl.Curl()
conn.setopt(pycurl.USERPWD, "%s:%s" % (USER, PASS))
conn.setopt(pycurl.URL, STREAM_URL)
conn.setopt(pycurl.WRITEFUNCTION, on_receive)
conn.setopt(pycurl.POSTFIELDS,data)
conn.perform()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.