Skip to content

Instantly share code, notes, and snippets.

@marians
Created February 20, 2012 11:11
Show Gist options
  • Save marians/1868815 to your computer and use it in GitHub Desktop.
Save marians/1868815 to your computer and use it in GitHub Desktop.
Collecting tweets mentioning given keywords, storing the result to a MySQL table
CREATE TABLE `tweets` (
`id` varchar(24) NOT NULL DEFAULT '',
`created_at` datetime NOT NULL,
`user_id` bigint(20) unsigned NOT NULL,
`user_name` varchar(128) NOT NULL DEFAULT '',
`user_followers` int(11) unsigned NOT NULL,
`user_friends` int(10) unsigned DEFAULT NULL,
`user_listed` int(10) unsigned DEFAULT NULL,
`user_statuses` int(10) unsigned DEFAULT NULL,
`user_location` varchar(100) DEFAULT NULL,
`user_utc_offset` int(11) DEFAULT NULL,
`is_retweet` tinyint(3) unsigned NOT NULL,
`is_reply` tinyint(3) unsigned NOT NULL,
`text` varchar(200) NOT NULL DEFAULT '',
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
import tweetstream
import MySQLdb
import sys
import datetime
words = ['keyword1', 'keyword2']
TWITTER_USER = "YourUserName"
TWITTER_PASS = "YourPassword"
DB_HOST = 'localhost'
DB_USER = 'root'
DB_PASS = ''
DB_NAME = 'tweets'
def twitter_to_iso_time(dt):
datestring = dt[4:7] + " " + dt[8:10] + " " + dt[11:19] + " " + dt[26:30]
date = datetime.datetime.strptime(datestring, '%b %d %H:%M:%S %Y')
return date.strftime('%Y-%m-%d %H:%M:%S')
def save_tweet(tweet):
global cursor
is_retweet = 0
is_reply = 0
if tweet['retweeted']:
is_retweet = 1
if tweet['in_reply_to_user_id_str'] is not None:
is_reply = 1
location = None
if tweet['user']['location'] is not None and tweet['user']['location'] != "":
location = tweet['user']['location'].encode('utf-8')
sql = """
INSERT LOW_PRIORITY IGNORE INTO tweets
(id, created_at, user_id, user_name, user_followers,
user_friends, user_listed, user_statuses, user_location,
user_utc_offset, is_retweet, is_reply, text) VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
cursor.execute(sql, [tweet['id'], twitter_to_iso_time(tweet['created_at']), tweet['user']['id'],
tweet['user']['screen_name'].encode('utf-8'), tweet['user']['followers_count'],
tweet['user']['friends_count'], tweet['user']['listed_count'],
tweet['user']['statuses_count'], location,
tweet['user']['utc_offset'], is_retweet, is_reply, tweet['text'].encode('utf-8')])
if __name__=='__main__':
try:
conn = MySQLdb.connect (host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME)
cursor = conn.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SET CHARACTER SET utf8');
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit (1)
try:
with tweetstream.FilterStream(TWITTER_USER, TWITTER_PASS, track=words) as stream:
for tweet in stream:
print "From: %s (%d)\n%s" % (
tweet["user"]["screen_name"], stream.count, tweet['text'])
print ""
save_tweet(tweet)
except tweetstream.ConnectionError, e:
print "Disconnected from twitter. Reason:", e.reason
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment