-
-
Save onmyeoin/62c72a7d61fc840b2689b2cf106f583c to your computer and use it in GitHub Desktop.
A script to download all of a user's tweets into a csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy | |
import csv | |
def get_all_tweets(screen_name): | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" | |
access_secret = "" | |
#authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
#initialize a list to hold all the tweepy Tweets & list with no retweets | |
alltweets = [] | |
noRT = [] | |
#make initial request for most recent tweets with extended mode enabled to get full tweets | |
new_tweets = api.user_timeline(screen_name = screen_name, tweet_mode = 'extended', count=200) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#save the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
#keep grabbing tweets until the api limit is reached | |
while len(alltweets) <= 3200: | |
print("getting tweets before {}".format(oldest)) | |
#all subsiquent requests use the max_id param to prevent duplicates | |
new_tweets = api.user_timeline(screen_name = screen_name,tweet_mode = 'extended', count=200,max_id=oldest) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#update the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
print("...{} tweets downloaded so far".format(len(alltweets))) | |
#removes retweets | |
for tweet in alltweets: | |
if 'RT' in tweet.full_text: | |
continue | |
else: | |
noRT.append([tweet.id_str, tweet.created_at, tweet.full_text]) | |
#write to csv | |
with open('{}_tweets.csv'.format(screen_name), 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(["id","created_at","text"]) | |
writer.writerows(noRT) | |
print('{}_tweets.csv was successfully created.'.format(screen_name)) | |
pass | |
if __name__ == '__main__': | |
#pass in the username of the account you want to download | |
get_all_tweets("realDonaldTrump") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I'm getting this error:
`Traceback (most recent call last):
File "Library/Python/3.7/lib/python/site-packages/tweepy/parsers.py", line 48, in parse
json = json_lib.loads(payload)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/init.py", line 348, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 353, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 643951 (char 643950)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "PycharmProjects/The Project/_test_2.py", line 65, in
get_all_tweets("realDonaldTrump")
File "PycharmProjects/The Project/_test_2.py", line 24, in get_all_tweets
new_tweets = api.user_timeline(screen_name=screen_name, tweet_mode='extended', count=200)
File "Library/Python/3.7/lib/python/site-packages/tweepy/binder.py", line 250, in _call
return method.execute()
File "Library/Python/3.7/lib/python/site-packages/tweepy/binder.py", line 236, in execute
result = self.parser.parse(self, resp.text)
File "Library/Python/3.7/lib/python/site-packages/tweepy/parsers.py", line 91, in parse
json = JSONParser.parse(self, method, payload)
File "Library/Python/3.7/lib/python/site-packages/tweepy/parsers.py", line 50, in parse
raise TweepError('Failed to parse JSON payload: %s' % e)
tweepy.error.TweepError: Failed to parse JSON payload: Unterminated string starting at: line 1 column 643951 (char 643950)`
Do you have any idea how to fix this?