Skip to content

Instantly share code, notes, and snippets.

Created May 18, 2011 07:16
Show Gist options
  • Save pvieytes/978125 to your computer and use it in GitHub Desktop.
Save pvieytes/978125 to your computer and use it in GitHub Desktop.
Parsing Twitter's user time with Python
import Tweet
import simplejson
import urllib2
def read_tweets(user, num_tweets):
tweets = []
url = "\
screen_name=%s&count=%s&include_rts=true" % (user, num_tweets)
file = urllib2.urlopen(url)
content =
json = simplejson.loads(content)
for js_tweet in json:
tweet = Tweet() = js_tweet['id']
tweet.username = js_tweet['user']['screen_name']
tweet.retweet_user = js_tweet['retweeted_status']['user']['screen_name']
tweet.retweeted = True
tweet.retweeted = False
tweet.set_date(js_tweet['created_at']), tweet.username must exist
#convert plain text to html text
tweet.set_text(js_tweet['text']), tweet.username must exist
if tweet.retweeted:
tweet.user_avatar_url = js_tweet['retweeted_status']['user']['profile_image_url']
tweet.user_avatar_url = js_tweet['user']['profile_image_url']
return tweets
import time
from datetime import datetime
import re
class Tweet():
"""Store the tweet info
id = None
username = None
url = None
user_avatar_url = None
tweet_url = None
profile_url = None
html_text = None
retweeted = None
retweet_user = None
date = None
def set_date(self, date_str):
"""Convert string to datetime
time_struct = time.strptime(date_str, "%a %b %d %H:%M:%S +0000 %Y")#Tue Apr 26 08:57:55 +0000 2011 = datetime.fromtimestamp(time.mktime(time_struct))
def set_text(self, plain_text):
"""convert plain text into html text with http, user and hashtag links
re_http = re.compile(r"(http://[^ ]+)")
self.html_text = re_http.sub(r'\1', plain_text)
re_https = re.compile(r"(https://[^ ]+)")
self.html_text = re_https.sub(r'\1', self.html_text)
re_user = re.compile(r'@[0-9a-zA-Z+_]*',re.IGNORECASE)
for iterator in re_user.finditer(self.html_text):
a_username =
username = a_username.replace('@','')
link = '' + a_username + ''
self.html_text = self.html_text.replace(a_username, link)
re_hash = re.compile(r'#[0-9a-zA-Z+_]*',re.IGNORECASE)
for iterator in re_hash.finditer(self.html_text):
h_tag =
link_tag = h_tag.replace('#','%23')
link = '' + h_tag + ''
self.html_text = self.html_text.replace(h_tag + " ", link + " ")
#check last tag
offset = len(self.html_text) - len(h_tag)
index = self.html_text.find(h_tag, offset)
if index >= 0:
self.html_text = self.html_text[:index] + " " + link
def set_profile_url(self):
"""Create the url profile
if self.retweeted:
self.profile_url = "" % self.retweet_user
self.profile_url = "" % self.username
def set_tweet_url(self):
"""Create the url of the tweet
self.tweet_url = "" % (self.username,
Copy link

Will the +0000 in the date format for created_at field in the tweetJSON always remain the same? And is this format consistent throughout all the APIs of twitter? I have heard a lot of chatter about +0000 occurring in the end instead of like this for the Search API, instead of the format you've handled which is the same as the Streaming API.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment