Last active
April 22, 2018 10:53
-
-
Save keithrozario/2cf05656335e604e1ec1896c8ff4cd44 to your computer and use it in GitHub Desktop.
Delete all tweets older than x days
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy | |
from tweepy import TweepError | |
from datetime import datetime, timedelta | |
import logging | |
import json | |
from custom_config import consumer_key, consumer_secret, access_key, access_secret | |
tweets_delete = [] | |
max_age = 90 # in days | |
archive_file = 'archive.jsonl' | |
# Logging setup | |
logging.basicConfig(filename='delete_tweets.log', | |
filemode='w', | |
level=logging.INFO, | |
format='%(asctime)s %(message)s', | |
datefmt='%m/%d/%Y %I:%M:%S %p') | |
logger = logging.getLogger(__name__) | |
console = logging.StreamHandler() | |
console.setLevel(logging.INFO) | |
logger.addHandler(console) | |
# Connecting to Twitter API via tweepy | |
logger.info("Connecting to Twitter API") | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
# Getting Tweets over max_age days old | |
logger.info("GETTING TWEETS under %d days" % max_age) | |
tweet_count = 1 | |
for tweet in tweepy.Cursor(api.user_timeline).items(): | |
if tweet.created_at < (datetime.now() - timedelta(days=max_age)): # anything older than 90 days | |
tweets_delete.append({'id': tweet.id, | |
'created_at': tweet.created_at, | |
'json': json.dumps(tweet._json)}) | |
else: | |
pass | |
tweet_count += 1 | |
if tweet_count%100 == 0: | |
logger.info("Processed %d tweets" % tweet_count) | |
logger.info("%d Tweets Processed" % tweet_count) | |
logger.info("Found %d tweets older than %d days" % (len(tweets_delete) + 1, max_age)) | |
# Archive tweets before deletion | |
logger.info("Archiving tweets") | |
with open(archive_file, 'w') as archive: | |
tweet_count = 1 | |
for tweet in tweets_delete: | |
archive.write(tweet['json'] + "\n") | |
tweet_count += 1 | |
logger.info("Archived %d tweets to %s" % (tweet_count, archive_file)) | |
# Deleting Tweets | |
tweet_count = 1 | |
for tweet in tweets_delete: | |
try: | |
api.destroy_status(tweet['id']) | |
logger.info('Tweet ID:%d from %s deleted!' % (tweet['id'], tweet['created_at'])) | |
tweet_count += 1 | |
except TweepError as e: | |
logger.info('Tweet ID:%d could not be deleted due to %s' % (tweet['id'], e.response.text)) | |
logger.info("%d Tweets Deleted" % tweet_count) | |
logger.info("END") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment