Skip to content

Instantly share code, notes, and snippets.

@thendrix
Created January 9, 2021 12:31
Show Gist options
  • Save thendrix/c41eb72bf4a1d1887c9c8bf76bf381f1 to your computer and use it in GitHub Desktop.
Save thendrix/c41eb72bf4a1d1887c9c8bf76bf381f1 to your computer and use it in GitHub Desktop.
TwitterPurge can remove all your tweets by using the archive function to access tweets not accessible via timeline API
#!/usr/bin/env python3
# TwitterPurge is a simplified python script to perform operations on twitter archives.
# I wrote this in a single day while reading an API doc for the first time.
#
# Any tips you can spare will go to fund alt social media and tools
# BTC: bc1qe0el876trjjuuu3zr729n3w3zp4t2k92smklkz
#
# Setup python
# ==========================================================
# Download and install python3 from https://www.python.org/downloads/
# Create a python3 sandbox with latest tweepy
# python3 -m venv sandbox
# source sandbox/bin/activate
# pip install tweepy
#
# Create Twitter API auth
# ==========================================================
# https://realpython.com/twitter-bot-python-tweepy/#creating-twitter-api-authentication-credentials
#
# Enable permission to read+write (post tweets) and write DMs
# Now reset your "Access token & secret" to update permissions
#
# Get an archive of your tweets via Twitter web interface
# ==========================================================
# 1. Request archive of your tweets
# 2. Wait until archived and download
# 3. Extract archive, and place this script into the same directory
# 4. Generate config/auth.json and add keys to the file
# 5. Purge tweets, likes, DMs, etc using this script
import json, html
import os, sys
try:
import tweepy
except:
tweepy = None
print('Missing tweepy see installation comment')
exit(-1)
# Ugly hack to expose global 'database' to ops for this script
db = {}
g_rate_limit_exceeded = False
g_config_filename = 'config/auth.json'
g_username = None
def RateLimitExceeded():
global g_rate_limit_exceeded
g_rate_limit_exceeded = True
def GenerateAuthTemplate():
if not os.path.exists(g_config_filename):
configPath = 'config'
try:
os.makedirs(configPath)
except OSError:
if not os.path.isdir(configPath):
print(str(OSError))
# raise
return -1
text = '{\n\t"key" : "",\n\t"secret" : "",\n\t"access_token" : "",\n\t"access_token_secret" : "",\n\t"username" : "@jack"\n}\n'
with open(g_config_filename, 'w') as fd:
fd.write(text)
fd.close()
def LoadTweepy():
try:
fd = open(g_config_filename, 'r', encoding='UTF-8')
secret = json.load(fd)
except:
print(f'Failed to load "{g_config_filename}" use --gen-auth-template')
exit(0)
try:
auth = tweepy.OAuthHandler(secret["key"], secret["secret"])
auth.set_access_token(secret["access_token"], secret["access_token_secret"])
api = tweepy.API(auth, wait_on_rate_limit=True)
global g_username
g_username = secret["username"]
except:
print(f'Failed to load tweepy api. Did you add keys to "{g_config_filename}"?')
api = None
return api
def LogError(_msg):
print(_msg)
def DebugStatus(_status):
print(json.dumps(_status._json, indent=4, sort_keys=True))
def ReadTextFile(_filename, _log=True):
try:
fd = open(_filename, 'r', encoding='UTF-8')
text = fd.read()
fd.close()
return text
except:
if _log:
LogError('Could not read file "' + _filename + '"')
return None
## Import a JSON file to perform operations on filtered JSON objects
def ImportJSON(_filename, _filter, _op):
text = ReadTextFile(_filename)
if not text:
return
# Strip off the start of the text to aid parser
idx = text.index('=', 1) + 1
# Replace HTML escape characters as json module chokes on them
text = html.unescape(text[idx:])
# Optionally purge unprintable characters besides CR if needed here
# text = "".join(c for c in text if c.isprintable() or '\n')
# Alter JSON to be accepted by parser
text = f'{{ "data" : {text} }}'
# text = '{ "data" :' + text + ' }'
# Convert text to json data
tweets = json.loads(text)
tweets = tweets['data']
# Processing
for tweet in tweets:
select = tweet[_filter]
_op(select)
if g_rate_limit_exceeded:
print('Rate limit exceeded, so processing will stop')
return
# Operations to perform on parsed JSON objects
def OpListFavoriteIDs(_json):
print(_json['tweetId'])
def OpDestroyFavorite(_json):
api = LoadTweepy()
if not api:
LogError('API failed to load or auth')
return
id = _json['tweetId']
try:
print(f'Destroy like: {id}')
api.destroy_favorite(id)
except:
print(f'Failed to destroy {id}')
def OpDestroyTweet(_json):
api = LoadTweepy()
if not api:
LogError('API failed to load or auth')
return
id = _json['id']
try:
print(f'Destroy tweet: {id}')
api.destroy_status(id)
except:
print(f'Failed to destroy {id}')
def OpListDirectMessageIDs(_json):
for message in _json['messages']:
print(message['messageCreate']['id'])
def OpDestroyDirectMessageIDs(_json):
api = LoadTweepy()
if not api:
LogError('API failed to load or auth')
return
ids = []
for message in _json['messages']:
id = message['messageCreate']['id']
ids.append(id)
# Not efficient at all, but with rate limiting who cares
if not db["removed-direct-messages"]:
db["removed-direct-messages"] = []
removed = db["removed-direct-messages"]
for id in ids:
if id in removed:
# print(f'{id} marked as previously destroyed')
continue
try:
print(f'Destroy direct message: {id}')
api.destroy_direct_message(id)
db["removed-direct-messages"].append(id)
# @todo - Write out db to disk as rate limiting will be slower anyway
# if api.get_direct_message(id):
# print(f'Destroy direct message: {id}')
# api.destroy_direct_message(id)
# db["removed-direct-messages"].append(id)
# # @todo - Write out db to disk as rate limiting will be slower anyway
# else:
# print(f'Direct message does not exist: {id}')
except tweepy.RateLimitError:
print(f'Failed to destroy {id} due to rate limiting')
RateLimitExceeded()
return
except:
print(f'Failed to destroy {id}')
return
# Higher level functions
def DestroyAllDirectMessages():
filename = "data/direct-messages.js"
typeFilter = "dmConversation"
global db
# Read previous 'database' state to help with DM rate limits/restarts
filename = 'db.json'
try:
fd = open(filename, 'r', encoding='UTF-8')
db = json.load(fd)
# print(db)
except:
print(f'Failed to load "{filename}"')
ImportJSON(filename, typeFilter, OpDestroyDirectMessageIDs)
# Write new 'database' state
try:
with open(filename, 'w') as fd:
json.dump(db, fd)
except:
print(f'Failed to save "{filename}"')
def DestroyAllLikes():
filename = "data/like.js"
typeFilter = "like"
ImportJSON(filename, typeFilter, OpDestroyFavorite)
def DumpLikeIds():
filename = "data/like.js"
typeFilter = "like"
ImportJSON(filename, typeFilter, OpListFavoriteIDs)
# ImportJSON(filename, typeFilter, print)
def DumpDirectMessageIds():
filename = "data/direct-messages.js"
typeFilter = "dmConversation"
ImportJSON(filename, typeFilter, OpListDirectMessageIDs)
# ImportJSON(filename, typeFilter, print)
def DestroyAllTweets():
filename = "data/tweet.js"
typeFilter = "tweet"
ImportJSON(filename, typeFilter, OpDestroyTweet)
# unretweet(id)
def TweetActionTest(_api, _status, _args):
# @todo Filter... boolean test chain or just callbacks?
# Date range
# Date
# Contains string
# @todo Object that has common checks like substr and date range
# if object is None then consider it as 'ALL'
# substr = 'test'
# year = 2021
# replyto = 'ReplyGuyParent'
# @todo Callback for action, default 'delete'
if _status.favorite_count < 10:
# if _status.retweet_count > 0:
# if _status.id == 1347381781826138115:
# if _status.in_reply_to_screen_name and status.in_reply_to_screen_name == replyto:
# if _status.id == 1347394733883027457:
# if substr in _status.text:
# if _status.created_at.year == year:
# print(f'Processing tweet: {_status.id}')
# DebugStatus(_status)
# print(f'{_status.id} : {_status.in_reply_to_screen_name}')
print(f'{_status.id} : {_status.text}')
# print(f'Destroy tweet: {_status.id}')
# _api.destroy_status(_status.id)
# else:
# print(f'Skipping tweet: {_status.id}')
def TweetActionDeleteAll(_api, _status, _args):
print(f'Destroy tweet: {_status.id}')
_api.destroy_status(_status.id)
# Apply filtered action to tweets
def TimelineTweetAction(_user, _operation, _count=500, _args=None):
api = LoadTweepy()
if not api:
LogError('API failed to load or auth')
return
try:
user = api.get_user(_user)
id = user.id
print(f'User {_user} = {id}')
except:
print('No user found')
return
try:
statusList = api.user_timeline(id, count=_count)
except:
print('timeline query failed')
return
print(f'Found {len(statusList)} tweets via API')
for status in statusList:
try:
_operation(api, status, _args)
except:
print(f'Failed to destroy {status.id}')
# Uses API only instead of archives (limited reach)
def TimelineDestroyTweets(_user=g_username, _count=500):
api = LoadTweepy()
if not api:
LogError('API failed to load or auth')
return
try:
user = api.get_user(_user)
id = user.id
print(f'User {_user} = {id}')
except:
print('No user found')
return
try:
statusList = api.user_timeline(id, count=_count)
except:
print('timeline query failed')
return
print(f'Found {len(statusList)} tweets via API')
for status in statusList:
try:
print(f'Destroy tweet: {status.id}')
api.destroy_status(status.id)
except:
print(f'Failed to destroy {status.id}')
def TimelineDestroyFavorites(_user=g_username, _count=500):
api = LoadTweepy()
if not api:
LogError('API failed to load or auth')
return
# Can only purge 19 at a time ('1 page')
c = 1
while c > 0:
try:
favorites = api.favorites(_user)
except Exception as e:
print('favorites query failed')
print(e)
return
# Don't stop until no more to process
c = len(favorites)
print(f'Found {len(favorites)} favorites via API')
for i in favorites:
try:
print(f'Destroy favorite: {i.id}')
api.destroy_favorite(i.id)
except:
print(f'Failed to destroy {i.id}')
def Post(_text):
api = LoadTweepy()
if api:
api.update_status(_text)
def TestArgs():
print('Yes, this is the third room.')
# Main entry
if __name__ == "__main__":
cmds = [
# Archive based commands
('--archived-tweets', DestroyAllTweets, 'Delete archived tweets from Twitter'),
('--archived-likes', DestroyAllLikes, 'Delete archived likes from Twitter'),
('--archived-dms', DestroyAllDirectMessages, 'Delete archived DMs from Twitter'),
# Timeline based commands
('--timeline-tweets', TimelineDestroyTweets, 'Delete latest tweets via API'),
('--timeline-likes', TimelineDestroyFavorites, 'Delete latest likes via API'),
('--generate-auth', GenerateAuthTemplate, 'Create a default auth.json to fill out')
]
args = sys.argv[1:]
if not args or args == '--help' or args == '-h':
print('Delete tweets, likes, and DMs from Twitter')
for cmd in cmds:
print(f'\t{cmd[0]} \t{cmd[2]}')
else:
for cmd in cmds:
if cmd[0] == args[0]:
cmd[1]()
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment