Skip to content

Instantly share code, notes, and snippets.

@revox
Last active December 15, 2015 12:01
Show Gist options
  • Save revox/15455afb530211b1f37f to your computer and use it in GitHub Desktop.
Save revox/15455afb530211b1f37f to your computer and use it in GitHub Desktop.
Get a Twitter users timeline up to 3,200 tweets written into a CSV, uses sixohsox Twitter modules retry facitlity (30 second sleep) if your rate limited
# dont keep this script in public_html!!!!
import twitter, sys,json,csv, time
def twitter_user_timeline(twitter_api, q):
''' get last tweet ID so we can paginate down from that 200 at a time to 3,200
'''
user_timeline = twitter_api.statuses.user_timeline(screen_name=q,count=1)
print user_timeline[0]['id']
ids = [user_timeline[0]['id']]
statuses = []
for i in range(0, 16): ## iterate through all tweets available with thsi API = 3,200
## tweet extract method with the last list item as the max_id
user_timeline = twitter_api.statuses.user_timeline(screen_name=q,
count=200, include_retweets=False, max_id=ids[-1]) # note a negative index means counting from end not the start of the array
statuses += user_timeline
# time.sleep(300) ## 5 minute rest between api calls, uncomment this if your being limited
for tweet in user_timeline:
ids.append(tweet['id']) ## append those tweet id's
print tweet['id'] # I like to watch
return statuses
''' helper functions, clean data, unpack dictionaries '''
def getVal(val):
clean = ""
if isinstance(val, bool):
return val
if isinstance(val, int):
return val
if val:
clean = val.encode('utf-8')
return clean
def getLng(val):
if isinstance(val, dict):
return val['coordinates'][0]
def getLat(val):
if isinstance(val, dict):
return val['coordinates'][1]
def getPlace(val):
if isinstance(val, dict):
return val['full_name'].encode('utf-8')
# == OAuth Authentication ==
# The consumer keys can be found on your application's Details
consumer_key=""
consumer_secret=""
# Create an access token under the the "Your access token" section
access_token=""
access_token_secret=""
auth = twitter.oauth.OAuth(access_token,
access_token_secret,
consumer_key,
consumer_secret)
twitter_api = twitter.Twitter(auth=auth)
twitter_api.retry = True # should prevent rate limit errors but sleep anyway as this only sleeps for 30 seconds
# Sample usage
q = "David_Cameron"
results = twitter_user_timeline(twitter_api, q)
print len(results)
# Show one sample search result by slicing the list...
# print json.dumps(results[0], indent=1)
csvfile = open(q + '_timeline.csv', 'w')
csvwriter = csv.writer(csvfile)
csvwriter.writerow(['created_at',
'user-screen_name',
'text',
'coordinates lng',
'coordinates lat',
'place',
'user-location',
'user-geo_enabled',
'user-lang',
'user-time_zone',
'user-statuses_count',
'user-followers_count',
'user-created_at'])
for tweet in results:
csvwriter.writerow([tweet['created_at'],
getVal(tweet['user']['screen_name']),
getVal(tweet['text']),
getLng(tweet['coordinates']),
getLat(tweet['coordinates']),
getPlace(tweet['place']),
getVal(tweet['user']['location']),
getVal(tweet['user']['geo_enabled']),
getVal(tweet['user']['lang']),
getVal(tweet['user']['time_zone']),
getVal(tweet['user']['statuses_count']),
getVal(tweet['user']['followers_count']),
getVal(tweet['user']['created_at'])
])
print "done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment