thendrix/TwitterPurge.py

## TwitterPurge.py
#!/usr/bin/env python3
# TwitterPurge is a simplified python script to perform operations on twitter archives.
# I wrote this in a single day while reading an API doc for the first time.
#
# Any tips you can spare will go to fund alt social media and tools
# BTC: bc1qe0el876trjjuuu3zr729n3w3zp4t2k92smklkz
#
# Setup python
# ==========================================================
# Download and install python3 from https://www.python.org/downloads/
# Create a python3 sandbox with latest tweepy
# python3 -m venv sandbox
# source sandbox/bin/activate
# pip install tweepy
#
# Create Twitter API auth
# ==========================================================
# https://realpython.com/twitter-bot-python-tweepy/#creating-twitter-api-authentication-credentials
#
# Enable permission to read+write (post tweets) and write DMs
# Now reset your "Access token & secret" to update permissions
#
# Get an archive of your tweets via Twitter web interface
# ==========================================================
# 1. Request archive of your tweets
# 2. Wait until archived and download
# 3. Extract archive, and place this script into the same directory
# 4. Generate config/auth.json and add keys to the file
# 5. Purge tweets, likes, DMs, etc using this script

import json, html
import os, sys


try:
	import tweepy
except:
	tweepy = None
	print('Missing tweepy see installation comment')
	exit(-1)


# Ugly hack to expose global 'database' to ops for this script
db = {}
g_rate_limit_exceeded = False
g_config_filename = 'config/auth.json'
g_username = None

def RateLimitExceeded():
	global g_rate_limit_exceeded
	g_rate_limit_exceeded = True

def GenerateAuthTemplate():
	if not os.path.exists(g_config_filename):
		configPath = 'config'
		try:
			os.makedirs(configPath)
		except OSError:
			if not os.path.isdir(configPath):
				print(str(OSError))
				# raise
				return -1

		text = '{\n\t"key" : "",\n\t"secret" : "",\n\t"access_token" : "",\n\t"access_token_secret" : "",\n\t"username" : "@jack"\n}\n'
		with open(g_config_filename, 'w') as fd:
			fd.write(text)
			fd.close()

def LoadTweepy():
	try:
		fd = open(g_config_filename, 'r', encoding='UTF-8')
		secret = json.load(fd)
	except:
		print(f'Failed to load "{g_config_filename}" use --gen-auth-template')
		exit(0)

	try:
		auth = tweepy.OAuthHandler(secret["key"], secret["secret"])
		auth.set_access_token(secret["access_token"], secret["access_token_secret"])
		api = tweepy.API(auth, wait_on_rate_limit=True)
		global g_username
		g_username = secret["username"]
	except:
		print(f'Failed to load tweepy api. Did you add keys to "{g_config_filename}"?')
		api = None

	return api


def LogError(_msg):
	print(_msg)


def DebugStatus(_status):
	print(json.dumps(_status._json, indent=4, sort_keys=True))


def ReadTextFile(_filename, _log=True):
	try:
		fd = open(_filename, 'r', encoding='UTF-8')
		text = fd.read()
		fd.close()
		return text
	except:
		if _log:
			LogError('Could not read file "' + _filename + '"')
	return None

## Import a JSON file to perform operations on filtered JSON objects
def ImportJSON(_filename, _filter, _op):
	text = ReadTextFile(_filename)
	if not text:
		return

	# Strip off the start of the text to aid parser
	idx = text.index('=', 1) + 1

	# Replace HTML escape characters as json module chokes on them
	text = html.unescape(text[idx:])

	# Optionally purge unprintable characters besides CR if needed here
	# text = "".join(c for c in text if c.isprintable() or '\n')

	# Alter JSON to be accepted by parser
	text = f'{{ "data" : {text} }}'
	# text = '{ "data" :' + text + ' }'

	# Convert text to json data
	tweets = json.loads(text)
	tweets = tweets['data']

	# Processing
	for tweet in tweets:
		select = tweet[_filter]
		_op(select)

		if g_rate_limit_exceeded:
			print('Rate limit exceeded, so processing will stop')
			return


# Operations to perform on parsed JSON objects

def OpListFavoriteIDs(_json):
	print(_json['tweetId'])

def OpDestroyFavorite(_json):
	api = LoadTweepy()
	if not api:
		LogError('API failed to load or auth')
		return
	id = _json['tweetId']
	try:
		print(f'Destroy like: {id}')
		api.destroy_favorite(id)
	except:
		print(f'Failed to destroy {id}')

def OpDestroyTweet(_json):
	api = LoadTweepy()
	if not api:
		LogError('API failed to load or auth')
		return
	id = _json['id']
	try:
		print(f'Destroy tweet: {id}')
		api.destroy_status(id)
	except:
		print(f'Failed to destroy {id}')

def OpListDirectMessageIDs(_json):
	for message in _json['messages']:
		print(message['messageCreate']['id'])

def OpDestroyDirectMessageIDs(_json):
	api = LoadTweepy()
	if not api:
		LogError('API failed to load or auth')
		return

	ids = []
	for message in _json['messages']:
		id = message['messageCreate']['id']
		ids.append(id)

	# Not efficient at all, but with rate limiting who cares
	if not db["removed-direct-messages"]:
		db["removed-direct-messages"] = []
	removed = db["removed-direct-messages"]
	for id in ids:
		if id in removed:
			# print(f'{id} marked as previously destroyed')
			continue

		try:
			print(f'Destroy direct message: {id}')
			api.destroy_direct_message(id)
			db["removed-direct-messages"].append(id)
			# @todo - Write out db to disk as rate limiting will be slower anyway

			# if api.get_direct_message(id):
			# 	print(f'Destroy direct message: {id}')
			# 	api.destroy_direct_message(id)
			# 	db["removed-direct-messages"].append(id)
			# 	# @todo - Write out db to disk as rate limiting will be slower anyway
			# else:
			# 	print(f'Direct message does not exist: {id}')

		except tweepy.RateLimitError:
			print(f'Failed to destroy {id} due to rate limiting')
			RateLimitExceeded()
			return

		except:
			print(f'Failed to destroy {id}')
			return


# Higher level functions

def DestroyAllDirectMessages():
	filename = "data/direct-messages.js"
	typeFilter = "dmConversation"

	global db

	# Read previous 'database' state to help with DM rate limits/restarts
	filename = 'db.json'
	try:
		fd = open(filename, 'r', encoding='UTF-8')
		db = json.load(fd)
		# print(db)
	except:
		print(f'Failed to load "{filename}"')

	ImportJSON(filename, typeFilter, OpDestroyDirectMessageIDs)

	# Write new 'database' state
	try:
		with open(filename, 'w') as fd:
			json.dump(db, fd)
	except:
		print(f'Failed to save "{filename}"')

def DestroyAllLikes():
	filename = "data/like.js"
	typeFilter = "like"
	ImportJSON(filename, typeFilter, OpDestroyFavorite)

def DumpLikeIds():
	filename = "data/like.js"
	typeFilter = "like"
	ImportJSON(filename, typeFilter, OpListFavoriteIDs)
	# ImportJSON(filename, typeFilter, print)

def DumpDirectMessageIds():
	filename = "data/direct-messages.js"
	typeFilter = "dmConversation"
	ImportJSON(filename, typeFilter, OpListDirectMessageIDs)
	# ImportJSON(filename, typeFilter, print)

def DestroyAllTweets():
	filename = "data/tweet.js"
	typeFilter = "tweet"
	ImportJSON(filename, typeFilter, OpDestroyTweet)


# unretweet(id)

def TweetActionTest(_api, _status, _args):
	# @todo Filter... boolean test chain or just callbacks?
	# Date range
	# Date
	# Contains string

	# @todo Object that has common checks like substr and date range
	# if object is None then consider it as 'ALL'
	# substr = 'test'
	# year = 2021
	# replyto = 'ReplyGuyParent'

	# @todo Callback for action, default 'delete'

	if _status.favorite_count < 10:
	# if _status.retweet_count > 0:
	# if _status.id == 1347381781826138115:
	# if _status.in_reply_to_screen_name and status.in_reply_to_screen_name == replyto:
	# if _status.id == 1347394733883027457:
	# if substr in _status.text:
	# if _status.created_at.year == year:
		# print(f'Processing tweet: {_status.id}')
		# DebugStatus(_status)
		# print(f'{_status.id} : {_status.in_reply_to_screen_name}')
		print(f'{_status.id} : {_status.text}')

		# print(f'Destroy tweet: {_status.id}')
		# _api.destroy_status(_status.id)
	# else:
		# print(f'Skipping tweet: {_status.id}')


def TweetActionDeleteAll(_api, _status, _args):
	 print(f'Destroy tweet: {_status.id}')
	 _api.destroy_status(_status.id)


# Apply filtered action to tweets
def TimelineTweetAction(_user, _operation, _count=500, _args=None):
	api = LoadTweepy()
	if not api:
		LogError('API failed to load or auth')
		return

	try:
		user = api.get_user(_user)
		id = user.id
		print(f'User {_user} = {id}')
	except:
		print('No user found')
		return

	try:
		statusList = api.user_timeline(id, count=_count)
	except:
		print('timeline query failed')
		return
	print(f'Found {len(statusList)} tweets via API')
	for status in statusList:
		try:
			_operation(api, status, _args)
		except:
			print(f'Failed to destroy {status.id}')

# Uses API only instead of archives (limited reach)
def TimelineDestroyTweets(_user=g_username, _count=500):
	api = LoadTweepy()
	if not api:
		LogError('API failed to load or auth')
		return

	try:
		user = api.get_user(_user)
		id = user.id
		print(f'User {_user} = {id}')
	except:
		print('No user found')
		return

	try:
		statusList = api.user_timeline(id, count=_count)
	except:
		print('timeline query failed')
		return
	print(f'Found {len(statusList)} tweets via API')
	for status in statusList:
		try:
			print(f'Destroy tweet: {status.id}')
			api.destroy_status(status.id)
		except:
			print(f'Failed to destroy {status.id}')


def TimelineDestroyFavorites(_user=g_username, _count=500):
	api = LoadTweepy()
	if not api:
		LogError('API failed to load or auth')
		return

	# Can only purge 19 at a time ('1 page')
	c = 1
	while c > 0:
		try:
			favorites = api.favorites(_user)
		except Exception as e:
			print('favorites query failed')
			print(e)
			return

		# Don't stop until no more to process
		c = len(favorites)

		print(f'Found {len(favorites)} favorites via API')
		for i in favorites:
			try:
				print(f'Destroy favorite: {i.id}')
				api.destroy_favorite(i.id)
			except:
				print(f'Failed to destroy {i.id}')

def Post(_text):
	api = LoadTweepy()
	if api:
		api.update_status(_text)


def TestArgs():
	print('Yes, this is the third room.')

# Main entry
if __name__ == "__main__":
	cmds = [
		# Archive based commands
		('--archived-tweets', DestroyAllTweets, 'Delete archived tweets from Twitter'),
		('--archived-likes', DestroyAllLikes, 'Delete archived likes from Twitter'),
		('--archived-dms', DestroyAllDirectMessages, 'Delete archived DMs from Twitter'),
		# Timeline based commands
		('--timeline-tweets', TimelineDestroyTweets, 'Delete latest tweets via API'),
		('--timeline-likes', TimelineDestroyFavorites, 'Delete latest likes via API'),
		('--generate-auth', GenerateAuthTemplate, 'Create a default auth.json to fill out')
		]

	args = sys.argv[1:]
	if not args or args == '--help' or args == '-h':
		print('Delete tweets, likes, and DMs from Twitter')
		for cmd in cmds:
			print(f'\t{cmd[0]}  \t{cmd[2]}')
	else:
		for cmd in cmds:
			if cmd[0] == args[0]:
				cmd[1]()
				break
	#!/usr/bin/env python3
	# TwitterPurge is a simplified python script to perform operations on twitter archives.
	# I wrote this in a single day while reading an API doc for the first time.
	#
	# Any tips you can spare will go to fund alt social media and tools
	# BTC: bc1qe0el876trjjuuu3zr729n3w3zp4t2k92smklkz
	#
	# Setup python
	# ==========================================================
	# Download and install python3 from https://www.python.org/downloads/
	# Create a python3 sandbox with latest tweepy
	# python3 -m venv sandbox
	# source sandbox/bin/activate
	# pip install tweepy
	#
	# Create Twitter API auth
	# ==========================================================
	# https://realpython.com/twitter-bot-python-tweepy/#creating-twitter-api-authentication-credentials
	#
	# Enable permission to read+write (post tweets) and write DMs
	# Now reset your "Access token & secret" to update permissions
	#
	# Get an archive of your tweets via Twitter web interface
	# ==========================================================
	# 1. Request archive of your tweets
	# 2. Wait until archived and download
	# 3. Extract archive, and place this script into the same directory
	# 4. Generate config/auth.json and add keys to the file
	# 5. Purge tweets, likes, DMs, etc using this script

	import json, html
	import os, sys


	try:
	import tweepy
	except:
	tweepy = None
	print('Missing tweepy see installation comment')
	exit(-1)


	# Ugly hack to expose global 'database' to ops for this script
	db = {}
	g_rate_limit_exceeded = False
	g_config_filename = 'config/auth.json'
	g_username = None

	def RateLimitExceeded():
	global g_rate_limit_exceeded
	g_rate_limit_exceeded = True

	def GenerateAuthTemplate():
	if not os.path.exists(g_config_filename):
	configPath = 'config'
	try:
	os.makedirs(configPath)
	except OSError:
	if not os.path.isdir(configPath):
	print(str(OSError))
	# raise
	return -1

	text = '{\n\t"key" : "",\n\t"secret" : "",\n\t"access_token" : "",\n\t"access_token_secret" : "",\n\t"username" : "@jack"\n}\n'
	with open(g_config_filename, 'w') as fd:
	fd.write(text)
	fd.close()

	def LoadTweepy():
	try:
	fd = open(g_config_filename, 'r', encoding='UTF-8')
	secret = json.load(fd)
	except:
	print(f'Failed to load "{g_config_filename}" use --gen-auth-template')
	exit(0)

	try:
	auth = tweepy.OAuthHandler(secret["key"], secret["secret"])
	auth.set_access_token(secret["access_token"], secret["access_token_secret"])
	api = tweepy.API(auth, wait_on_rate_limit=True)
	global g_username
	g_username = secret["username"]
	except:
	print(f'Failed to load tweepy api. Did you add keys to "{g_config_filename}"?')
	api = None

	return api


	def LogError(_msg):
	print(_msg)


	def DebugStatus(_status):
	print(json.dumps(_status._json, indent=4, sort_keys=True))


	def ReadTextFile(_filename, _log=True):
	try:
	fd = open(_filename, 'r', encoding='UTF-8')
	text = fd.read()
	fd.close()
	return text
	except:
	if _log:
	LogError('Could not read file "' + _filename + '"')
	return None

	## Import a JSON file to perform operations on filtered JSON objects
	def ImportJSON(_filename, _filter, _op):
	text = ReadTextFile(_filename)
	if not text:
	return

	# Strip off the start of the text to aid parser
	idx = text.index('=', 1) + 1

	# Replace HTML escape characters as json module chokes on them
	text = html.unescape(text[idx:])

	# Optionally purge unprintable characters besides CR if needed here
	# text = "".join(c for c in text if c.isprintable() or '\n')

	# Alter JSON to be accepted by parser
	text = f'{{ "data" : {text} }}'
	# text = '{ "data" :' + text + ' }'

	# Convert text to json data
	tweets = json.loads(text)
	tweets = tweets['data']

	# Processing
	for tweet in tweets:
	select = tweet[_filter]
	_op(select)

	if g_rate_limit_exceeded:
	print('Rate limit exceeded, so processing will stop')
	return


	# Operations to perform on parsed JSON objects

	def OpListFavoriteIDs(_json):
	print(_json['tweetId'])

	def OpDestroyFavorite(_json):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return
	id = _json['tweetId']
	try:
	print(f'Destroy like: {id}')
	api.destroy_favorite(id)
	except:
	print(f'Failed to destroy {id}')

	def OpDestroyTweet(_json):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return
	id = _json['id']
	try:
	print(f'Destroy tweet: {id}')
	api.destroy_status(id)
	except:
	print(f'Failed to destroy {id}')

	def OpListDirectMessageIDs(_json):
	for message in _json['messages']:
	print(message['messageCreate']['id'])

	def OpDestroyDirectMessageIDs(_json):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	ids = []
	for message in _json['messages']:
	id = message['messageCreate']['id']
	ids.append(id)

	# Not efficient at all, but with rate limiting who cares
	if not db["removed-direct-messages"]:
	db["removed-direct-messages"] = []
	removed = db["removed-direct-messages"]
	for id in ids:
	if id in removed:
	# print(f'{id} marked as previously destroyed')
	continue

	try:
	print(f'Destroy direct message: {id}')
	api.destroy_direct_message(id)
	db["removed-direct-messages"].append(id)
	# @todo - Write out db to disk as rate limiting will be slower anyway

	# if api.get_direct_message(id):
	# print(f'Destroy direct message: {id}')
	# api.destroy_direct_message(id)
	# db["removed-direct-messages"].append(id)
	# # @todo - Write out db to disk as rate limiting will be slower anyway
	# else:
	# print(f'Direct message does not exist: {id}')

	except tweepy.RateLimitError:
	print(f'Failed to destroy {id} due to rate limiting')
	RateLimitExceeded()
	return

	except:
	print(f'Failed to destroy {id}')
	return


	# Higher level functions

	def DestroyAllDirectMessages():
	filename = "data/direct-messages.js"
	typeFilter = "dmConversation"

	global db

	# Read previous 'database' state to help with DM rate limits/restarts
	filename = 'db.json'
	try:
	fd = open(filename, 'r', encoding='UTF-8')
	db = json.load(fd)
	# print(db)
	except:
	print(f'Failed to load "{filename}"')

	ImportJSON(filename, typeFilter, OpDestroyDirectMessageIDs)

	# Write new 'database' state
	try:
	with open(filename, 'w') as fd:
	json.dump(db, fd)
	except:
	print(f'Failed to save "{filename}"')

	def DestroyAllLikes():
	filename = "data/like.js"
	typeFilter = "like"
	ImportJSON(filename, typeFilter, OpDestroyFavorite)

	def DumpLikeIds():
	filename = "data/like.js"
	typeFilter = "like"
	ImportJSON(filename, typeFilter, OpListFavoriteIDs)
	# ImportJSON(filename, typeFilter, print)

	def DumpDirectMessageIds():
	filename = "data/direct-messages.js"
	typeFilter = "dmConversation"
	ImportJSON(filename, typeFilter, OpListDirectMessageIDs)
	# ImportJSON(filename, typeFilter, print)

	def DestroyAllTweets():
	filename = "data/tweet.js"
	typeFilter = "tweet"
	ImportJSON(filename, typeFilter, OpDestroyTweet)


	# unretweet(id)

	def TweetActionTest(_api, _status, _args):
	# @todo Filter... boolean test chain or just callbacks?
	# Date range
	# Date
	# Contains string

	# @todo Object that has common checks like substr and date range
	# if object is None then consider it as 'ALL'
	# substr = 'test'
	# year = 2021
	# replyto = 'ReplyGuyParent'

	# @todo Callback for action, default 'delete'

	if _status.favorite_count < 10:
	# if _status.retweet_count > 0:
	# if _status.id == 1347381781826138115:
	# if _status.in_reply_to_screen_name and status.in_reply_to_screen_name == replyto:
	# if _status.id == 1347394733883027457:
	# if substr in _status.text:
	# if _status.created_at.year == year:
	# print(f'Processing tweet: {_status.id}')
	# DebugStatus(_status)
	# print(f'{_status.id} : {_status.in_reply_to_screen_name}')
	print(f'{_status.id} : {_status.text}')

	# print(f'Destroy tweet: {_status.id}')
	# _api.destroy_status(_status.id)
	# else:
	# print(f'Skipping tweet: {_status.id}')


	def TweetActionDeleteAll(_api, _status, _args):
	print(f'Destroy tweet: {_status.id}')
	_api.destroy_status(_status.id)


	# Apply filtered action to tweets
	def TimelineTweetAction(_user, _operation, _count=500, _args=None):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	try:
	user = api.get_user(_user)
	id = user.id
	print(f'User {_user} = {id}')
	except:
	print('No user found')
	return

	try:
	statusList = api.user_timeline(id, count=_count)
	except:
	print('timeline query failed')
	return
	print(f'Found {len(statusList)} tweets via API')
	for status in statusList:
	try:
	_operation(api, status, _args)
	except:
	print(f'Failed to destroy {status.id}')

	# Uses API only instead of archives (limited reach)
	def TimelineDestroyTweets(_user=g_username, _count=500):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	try:
	user = api.get_user(_user)
	id = user.id
	print(f'User {_user} = {id}')
	except:
	print('No user found')
	return

	try:
	statusList = api.user_timeline(id, count=_count)
	except:
	print('timeline query failed')
	return
	print(f'Found {len(statusList)} tweets via API')
	for status in statusList:
	try:
	print(f'Destroy tweet: {status.id}')
	api.destroy_status(status.id)
	except:
	print(f'Failed to destroy {status.id}')


	def TimelineDestroyFavorites(_user=g_username, _count=500):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	# Can only purge 19 at a time ('1 page')
	c = 1
	while c > 0:
	try:
	favorites = api.favorites(_user)
	except Exception as e:
	print('favorites query failed')
	print(e)
	return

	# Don't stop until no more to process
	c = len(favorites)

	print(f'Found {len(favorites)} favorites via API')
	for i in favorites:
	try:
	print(f'Destroy favorite: {i.id}')
	api.destroy_favorite(i.id)
	except:
	print(f'Failed to destroy {i.id}')

	def Post(_text):
	api = LoadTweepy()
	if api:
	api.update_status(_text)


	def TestArgs():
	print('Yes, this is the third room.')

	# Main entry
	if __name__ == "__main__":
	cmds = [
	# Archive based commands
	('--archived-tweets', DestroyAllTweets, 'Delete archived tweets from Twitter'),
	('--archived-likes', DestroyAllLikes, 'Delete archived likes from Twitter'),
	('--archived-dms', DestroyAllDirectMessages, 'Delete archived DMs from Twitter'),
	# Timeline based commands
	('--timeline-tweets', TimelineDestroyTweets, 'Delete latest tweets via API'),
	('--timeline-likes', TimelineDestroyFavorites, 'Delete latest likes via API'),
	('--generate-auth', GenerateAuthTemplate, 'Create a default auth.json to fill out')
	]

	args = sys.argv[1:]
	if not args or args == '--help' or args == '-h':
	print('Delete tweets, likes, and DMs from Twitter')
	for cmd in cmds:
	print(f'\t{cmd[0]} \t{cmd[2]}')
	else:
	for cmd in cmds:
	if cmd[0] == args[0]:
	cmd[1]()
	break