Skip to content

Instantly share code, notes, and snippets.

@kiliankoe
Last active October 14, 2017 21:09
Show Gist options
  • Save kiliankoe/65d7fd44fb84b291a1ce2b391ed69f32 to your computer and use it in GitHub Desktop.
Save kiliankoe/65d7fd44fb84b291a1ce2b391ed69f32 to your computer and use it in GitHub Desktop.
Download last ~2k @DVBAG tweets - Twitter API limits to 3.2k, but filter replies and "old-style" RTs
#!/usr/bin/env python
import csv
import re
import datetime
from time import mktime
from tweepy import OAuthHandler
from tweepy import API
access_token = ''
access_token_secret = ''
consumer_key = ''
consumer_secret = ''
def is_reply_or_rt(tweet):
if tweet.in_reply_to_status_id is not None:
return True
elif re.match(r'^RT @', tweet.text): # 'old-style' RTs
return True
return False
if __name__ == '__main__':
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = API(auth, wait_on_rate_limit=True)
all_tweets = []
oldest = None
while True:
new_tweets = api.user_timeline(screen_name='DVBAG', count=200, max_id=oldest)
if len(new_tweets) == 0:
break
print(f'...{len(all_tweets)} tweets downloaded so far')
all_tweets.extend(new_tweets)
oldest = all_tweets[-1].id - 1
out_tweets = [
[tweet.id_str, tweet.created_at, tweet.text]
for tweet in all_tweets
if not is_reply_or_rt(tweet)
]
with open('dump.csv', 'w') as dump:
writer = csv.writer(dump)
writer.writerow(['id', 'timestamp', 'text'])
for tweet in out_tweets:
writer.writerow(tweet)
print(f'Saved {len(out_tweets)} tweets excluding RTs and replies.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment