-
-
Save onmyeoin/62c72a7d61fc840b2689b2cf106f583c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy | |
import csv | |
def get_all_tweets(screen_name): | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" | |
access_secret = "" | |
#authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
#initialize a list to hold all the tweepy Tweets & list with no retweets | |
alltweets = [] | |
noRT = [] | |
#make initial request for most recent tweets with extended mode enabled to get full tweets | |
new_tweets = api.user_timeline(screen_name = screen_name, tweet_mode = 'extended', count=200) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#save the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
#keep grabbing tweets until the api limit is reached | |
while len(alltweets) <= 3200: | |
print("getting tweets before {}".format(oldest)) | |
#all subsiquent requests use the max_id param to prevent duplicates | |
new_tweets = api.user_timeline(screen_name = screen_name,tweet_mode = 'extended', count=200,max_id=oldest) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#update the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
print("...{} tweets downloaded so far".format(len(alltweets))) | |
#removes retweets | |
for tweet in alltweets: | |
if 'RT' in tweet.full_text: | |
continue | |
else: | |
noRT.append([tweet.id_str, tweet.created_at, tweet.full_text]) | |
#write to csv | |
with open('{}_tweets.csv'.format(screen_name), 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(["id","created_at","text"]) | |
writer.writerows(noRT) | |
print('{}_tweets.csv was successfully created.'.format(screen_name)) | |
pass | |
if __name__ == '__main__': | |
#pass in the username of the account you want to download | |
get_all_tweets("realDonaldTrump") |
Hi,
I'm getting this error:
`Traceback (most recent call last):
File "Library/Python/3.7/lib/python/site-packages/tweepy/parsers.py", line 48, in parse
json = json_lib.loads(payload)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/init.py", line 348, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 353, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 643951 (char 643950)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "PycharmProjects/The Project/_test_2.py", line 65, in
get_all_tweets("realDonaldTrump")
File "PycharmProjects/The Project/_test_2.py", line 24, in get_all_tweets
new_tweets = api.user_timeline(screen_name=screen_name, tweet_mode='extended', count=200)
File "Library/Python/3.7/lib/python/site-packages/tweepy/binder.py", line 250, in _call
return method.execute()
File "Library/Python/3.7/lib/python/site-packages/tweepy/binder.py", line 236, in execute
result = self.parser.parse(self, resp.text)
File "Library/Python/3.7/lib/python/site-packages/tweepy/parsers.py", line 91, in parse
json = JSONParser.parse(self, method, payload)
File "Library/Python/3.7/lib/python/site-packages/tweepy/parsers.py", line 50, in parse
raise TweepError('Failed to parse JSON payload: %s' % e)
tweepy.error.TweepError: Failed to parse JSON payload: Unterminated string starting at: line 1 column 643951 (char 643950)`
Do you have any idea how to fix this?
Hi there
Thanks for creating this. Am completely new to Github and coding - apologies if this is a really simple/trivial question...
I have copied the script above, replaced with my specific search all the instances of eg realDonaldTrump and screen_name on lines 7, 24, 54, and 59.
Am a Mac user. Do I now simply copy this script into 'Terminal', or does it go somewhere else, or... ?
And have I missed anything else vital please?
Thanks very much!