palevell/t-clean.py

## t-clean.py
#!/usr/bin/env python3
# t-clean.py v1.2.22 - Friday, September 15, 2017
# Cleanup Twitter Followings with Ruby Gem 't' and Python's language detect module

""""
Long Records (follow, unfollow, groupies, whoami, etc.)
 0 ID
 1 Since
 2 Last tweeted at
 3 Tweets
 4 Favorites
 5 Listed
 6 Following
 7 Followers
 8 Screen name
 9 Name
10 Verified
11 Protected
12 Bio
13 Status
14 Location
15 URL
"""

"""
Short Records (timeline)
 0 ID
 1 Posted at
 2 Screen name
 3 Text
"""

import csv, os, shutil, stat, sys, tempfile, time
from langdetect import detect, lang_detect_exception
from localnow import localnow
from random import randint

DEBUG = True
DRYRUN = True
logfile = 't-clean.log'
week_secs = 60 * 60 * 168
languages = { 'en': True,
              'ar': False,
              'fa': False,
              'hi': False
            }
rejected = ('ar', 'fa', 'hi')

"""
languages = list()
accept = list()
reject = list()
neutral = list()
"""

def file_age_in_seconds(pathname):
    return time.time() - os.stat(pathname)[stat.ST_MTIME]


# ToDo: move this to config.py
def taccts():
	tmpfile = tempfile.mkstemp('.txt', 't-accounts', '/tmp')[1]
	cmd = "t accounts |grep -v '^  ' >" + tmpfile
	accts = list()
	if DEBUG:
		accts2 = list()
		accts2.append('Twit1')
		accts2.append('Twit2')
	try:
		os.system(cmd)

		with open(tmpfile, 'rt') as results:
			line = 0
			for result in results.readlines():
				line += 1
				accts.append(result.rstrip('\n'))
		os.remove(tmpfile)
	except OSError as e:
		print(" *** OSError", e.strerror)
		sys.exit(e.errno)

	if not DEBUG:
		return sorted(accts)
	else:
		return sorted(accts2)


def run_t(acct, cmd, args=None, timestamp=False):
	commands = ['followings', 'followers', 'whoami', 'follow', 'unfollow', 'timeline']
	needargs = ['follow', 'unfollow']
	usecsv = ['whoami', 'followings', 'followers', 'timeline']
	if not acct in taccts():
		raise("Unrecognized Twitter account: " + acct)
		return None
	if not cmd in commands:
		raise("Unrecognized command: " + cmd)
		return None
	if cmd in needargs and args == None:
		raise(cmd + " requires argument(s)")
		return None
	elif args == None:
		args = ''
	if cmd in usecsv:
		csvopt = '--csv'
	else:
		csvopt = ''
	if timestamp:
		ts = '_' + localnow().strftime("%Y%m%d_%H%M%S")
	else:
		ts = ''
	csvfile = os.path.join('local', 'data', acct + '_' + cmd + ts + '.csv')
	if cmd in needargs and DRYRUN:
		cmd += ' --dry-run'
	# We can replace 't' with the full path, stored in t_cmd
	oscmd = 't set active ' + acct + '>/dev/null && t ' + cmd + ' ' + csvopt + ' ' + args + ' >' + csvfile
	try:
		print(str(localnow().replace(microsecond=0)) + '  ' + oscmd)
		fp.write(str(localnow().replace(microsecond=0)) + '  ' + oscmd + '\n')
		os.system(oscmd)
		# Try to abide by Twitter's rate limit . . .
		time.sleep(randint(30, 60))
	except OSError as e:
		print(" *** OSError:", e.strerror)
		raise OSError
	if os.path.exists(csvfile):
		return csvfile
	else:
		return None

# Prequisites
# Check for Ruby gem 't'
t_cmd = shutil.which('t', mode=os.F_OK | os.X_OK, path=None)
if t_cmd == None:
	print("  Unmet dependency: Ruby Gem 't'.  Aborting.")
	sys.exit()

# Main Program
grand_total = 0
fp = open(logfile, 'at', encoding='utf-8')
for acct in taccts():
	print(acct, ". . . ", end='')
	followersCSV = os.path.join('local', 'data', acct + '_followings.csv')
	total = 0
	twits = ''
	twitcount = 0
	try:
		# if not os.path.exists(followersCSV):
		# 	run_t(acct, 'followings')
		try:
			file_age = file_age_in_seconds(followersCSV)
		except:
			file_age = week_secs
		if file_age >= week_secs:
			run_t(acct, 'followings')
			with open(followersCSV, 'rt') as csvfile:
				next(csvfile)  # Skip header row
				for row in csv.reader(csvfile):
					try:
						tests = { 'Tweeted': detect(row[2]),
						          'Twit': detect(row[8]),
						          'Name': detect(row[9]),
						          'Bio': detect(row[12]),
						          'Status': detect(row[13]),
						          'Loc': detect(row[14])
						        }
						print(tests)
						tweeted = row[2]    # Last Tweet (Date)
						twit = row[8]       # Screen Name (Twitter handle)
						name = row[9]       # User (Display Name)
						bio = row[12]       # bio
						status = row[13]    # Status (Last Tweet)
						loc = row[14]       # location
						score = 0
						# Detect and score language used in name, bio, and location
						# Languages:
						#   en - English
						#   ar - Arabic
						twit_lang = detect(twit)
						name_lang = detect(name)
						bio_lang = detect(bio)
						loc_lang = detect(loc)
						status_lang = detect(status)
						print("Twit/Name/Location (%2s / %2s / %2): %16s / %25s / %25s\n\tBio (%2s): %160s\n\tStatus (%2s): %140s\n\n" % (twit_lang, name_lang, loc_lang, twit, name, loc, bio_lang, bio, status_lang, status))
						if name_lang in rejected:
							score += 1
						if bio_lang in rejected:
							score += 1
						if loc_lang in rejected:
							score += 1

						if score >= 2:
							twitcount += 1
							twits += ' ' + twit
							if twitcount % 5 != 0:
								eol = ' '
							else:
								eol = '\n'
							print(twit, end=eol)
						if twitcount >= 20:
							# process 20 twits at a time
							run_t(acct, 'whoami', timestamp=True)
							run_t(acct, 'unfollow', twits, timestamp=True)
							twits = ''
							total += twitcount
							twitcount = 0
					except lang_detect_exception.LangDetectException as e:
						pass

				if twitcount > 0:
					# unfollow remaining twits
					run_t(acct, 'whoami', timestamp=True)
					run_t(acct, 'unfollow', twits, timestamp=True)
					total += twitcount

		timelineCSV = run_t(acct, 'timeline', '-n100', timestamp=True)
		print(timelineCSV)
		with open(timelineCSV, 'rt') as csvfile:
			next(csvfile)  # Skip header row
			twitset = set()
			for row in csv.reader(csvfile):
				try:
					twit = row[2]   # Screen Name (Twitter handle)
					tweet = row[3]  # Tweet
					tweet_lang = detect(tweet)
					if tweet_lang == 'en':
						continue
					elif tweet_lang in rejected:
						if twit not in twitset:
							twitset.add(twit)
							twitcount += 1
							if twitcount % 5 != 0:
								eol = ' '
							else:
								eol = '\n'
							print(twit, end=eol)
					else:
						print("%15s: (%2s) %s" % (twit, tweet_lang, tweet))
					if twitcount >= 20:
						# process 20 twits at a time
						twits = ''
						while len(twitset) > 0:
							twits += ' ' + twitset.pop()
						twits.strip(' ')
						run_t(acct, 'whoami', timestamp=True)
						run_t(acct, 'unfollow', twits, timestamp=True)
						twitset = set()
						total += twitcount
						twitcount = 0
				except lang_detect_exception.LangDetectException as e:
					pass

			if twitcount > 0:
				# unfollow remaining twits found in timeline
				twits = ''
				while len(twitset) > 0:
					twits += ' ' + twitset.pop()
				twits.strip(' ')
				run_t(acct, 'whoami', timestamp=True)
				run_t(acct, 'unfollow', twits, timestamp=True)
				total += twitcount

		print("\tTotal twits:", str(total))
		grand_total += total
		if total > 0:
			run_t(acct, 'whoami', timestamp=True)
			os.rename(followersCSV , followersCSV  + '~.1')

	except OSError as oe:
		print(" *** OSError", oe.strerror)
		sys.exit(oe.errno)
	except TypeError as te:
		print(" *** TypeError:", te)
		print(te.__traceback__)
		# print(te.with_traceback())
		sys.exit()
	except Exception as e:
		print(" *** Exception:", e)
		sys.exit()

fp.close()
print("\nGrand total twits: ", str(grand_total))
sys.exit()
	#!/usr/bin/env python3
	# t-clean.py v1.2.22 - Friday, September 15, 2017
	# Cleanup Twitter Followings with Ruby Gem 't' and Python's language detect module

	""""
	Long Records (follow, unfollow, groupies, whoami, etc.)
	0 ID
	1 Since
	2 Last tweeted at
	3 Tweets
	4 Favorites
	5 Listed
	6 Following
	7 Followers
	8 Screen name
	9 Name
	10 Verified
	11 Protected
	12 Bio
	13 Status
	14 Location
	15 URL
	"""

	"""
	Short Records (timeline)
	0 ID
	1 Posted at
	2 Screen name
	3 Text
	"""

	import csv, os, shutil, stat, sys, tempfile, time
	from langdetect import detect, lang_detect_exception
	from localnow import localnow
	from random import randint

	DEBUG = True
	DRYRUN = True
	logfile = 't-clean.log'
	week_secs = 60 * 60 * 168
	languages = { 'en': True,
	'ar': False,
	'fa': False,
	'hi': False
	}
	rejected = ('ar', 'fa', 'hi')

	"""
	languages = list()
	accept = list()
	reject = list()
	neutral = list()
	"""

	def file_age_in_seconds(pathname):
	return time.time() - os.stat(pathname)[stat.ST_MTIME]


	# ToDo: move this to config.py
	def taccts():
	tmpfile = tempfile.mkstemp('.txt', 't-accounts', '/tmp')[1]
	cmd = "t accounts \|grep -v '^ ' >" + tmpfile
	accts = list()
	if DEBUG:
	accts2 = list()
	accts2.append('Twit1')
	accts2.append('Twit2')
	try:
	os.system(cmd)

	with open(tmpfile, 'rt') as results:
	line = 0
	for result in results.readlines():
	line += 1
	accts.append(result.rstrip('\n'))
	os.remove(tmpfile)
	except OSError as e:
	print(" *** OSError", e.strerror)
	sys.exit(e.errno)

	if not DEBUG:
	return sorted(accts)
	else:
	return sorted(accts2)


	def run_t(acct, cmd, args=None, timestamp=False):
	commands = ['followings', 'followers', 'whoami', 'follow', 'unfollow', 'timeline']
	needargs = ['follow', 'unfollow']
	usecsv = ['whoami', 'followings', 'followers', 'timeline']
	if not acct in taccts():
	raise("Unrecognized Twitter account: " + acct)
	return None
	if not cmd in commands:
	raise("Unrecognized command: " + cmd)
	return None
	if cmd in needargs and args == None:
	raise(cmd + " requires argument(s)")
	return None
	elif args == None:
	args = ''
	if cmd in usecsv:
	csvopt = '--csv'
	else:
	csvopt = ''
	if timestamp:
	ts = '_' + localnow().strftime("%Y%m%d_%H%M%S")
	else:
	ts = ''
	csvfile = os.path.join('local', 'data', acct + '_' + cmd + ts + '.csv')
	if cmd in needargs and DRYRUN:
	cmd += ' --dry-run'
	# We can replace 't' with the full path, stored in t_cmd
	oscmd = 't set active ' + acct + '>/dev/null && t ' + cmd + ' ' + csvopt + ' ' + args + ' >' + csvfile
	try:
	print(str(localnow().replace(microsecond=0)) + ' ' + oscmd)
	fp.write(str(localnow().replace(microsecond=0)) + ' ' + oscmd + '\n')
	os.system(oscmd)
	# Try to abide by Twitter's rate limit . . .
	time.sleep(randint(30, 60))
	except OSError as e:
	print(" *** OSError:", e.strerror)
	raise OSError
	if os.path.exists(csvfile):
	return csvfile
	else:
	return None

	# Prequisites
	# Check for Ruby gem 't'
	t_cmd = shutil.which('t', mode=os.F_OK \| os.X_OK, path=None)
	if t_cmd == None:
	print(" Unmet dependency: Ruby Gem 't'. Aborting.")
	sys.exit()

	# Main Program
	grand_total = 0
	fp = open(logfile, 'at', encoding='utf-8')
	for acct in taccts():
	print(acct, ". . . ", end='')
	followersCSV = os.path.join('local', 'data', acct + '_followings.csv')
	total = 0
	twits = ''
	twitcount = 0
	try:
	# if not os.path.exists(followersCSV):
	# run_t(acct, 'followings')
	try:
	file_age = file_age_in_seconds(followersCSV)
	except:
	file_age = week_secs
	if file_age >= week_secs:
	run_t(acct, 'followings')
	with open(followersCSV, 'rt') as csvfile:
	next(csvfile) # Skip header row
	for row in csv.reader(csvfile):
	try:
	tests = { 'Tweeted': detect(row[2]),
	'Twit': detect(row[8]),
	'Name': detect(row[9]),
	'Bio': detect(row[12]),
	'Status': detect(row[13]),
	'Loc': detect(row[14])
	}
	print(tests)
	tweeted = row[2] # Last Tweet (Date)
	twit = row[8] # Screen Name (Twitter handle)
	name = row[9] # User (Display Name)
	bio = row[12] # bio
	status = row[13] # Status (Last Tweet)
	loc = row[14] # location
	score = 0
	# Detect and score language used in name, bio, and location
	# Languages:
	# en - English
	# ar - Arabic
	twit_lang = detect(twit)
	name_lang = detect(name)
	bio_lang = detect(bio)
	loc_lang = detect(loc)
	status_lang = detect(status)
	print("Twit/Name/Location (%2s / %2s / %2): %16s / %25s / %25s\n\tBio (%2s): %160s\n\tStatus (%2s): %140s\n\n" % (twit_lang, name_lang, loc_lang, twit, name, loc, bio_lang, bio, status_lang, status))
	if name_lang in rejected:
	score += 1
	if bio_lang in rejected:
	score += 1
	if loc_lang in rejected:
	score += 1

	if score >= 2:
	twitcount += 1
	twits += ' ' + twit
	if twitcount % 5 != 0:
	eol = ' '
	else:
	eol = '\n'
	print(twit, end=eol)
	if twitcount >= 20:
	# process 20 twits at a time
	run_t(acct, 'whoami', timestamp=True)
	run_t(acct, 'unfollow', twits, timestamp=True)
	twits = ''
	total += twitcount
	twitcount = 0
	except lang_detect_exception.LangDetectException as e:
	pass

	if twitcount > 0:
	# unfollow remaining twits
	run_t(acct, 'whoami', timestamp=True)
	run_t(acct, 'unfollow', twits, timestamp=True)
	total += twitcount

	timelineCSV = run_t(acct, 'timeline', '-n100', timestamp=True)
	print(timelineCSV)
	with open(timelineCSV, 'rt') as csvfile:
	next(csvfile) # Skip header row
	twitset = set()
	for row in csv.reader(csvfile):
	try:
	twit = row[2] # Screen Name (Twitter handle)
	tweet = row[3] # Tweet
	tweet_lang = detect(tweet)
	if tweet_lang == 'en':
	continue
	elif tweet_lang in rejected:
	if twit not in twitset:
	twitset.add(twit)
	twitcount += 1
	if twitcount % 5 != 0:
	eol = ' '
	else:
	eol = '\n'
	print(twit, end=eol)
	else:
	print("%15s: (%2s) %s" % (twit, tweet_lang, tweet))
	if twitcount >= 20:
	# process 20 twits at a time
	twits = ''
	while len(twitset) > 0:
	twits += ' ' + twitset.pop()
	twits.strip(' ')
	run_t(acct, 'whoami', timestamp=True)
	run_t(acct, 'unfollow', twits, timestamp=True)
	twitset = set()
	total += twitcount
	twitcount = 0
	except lang_detect_exception.LangDetectException as e:
	pass

	if twitcount > 0:
	# unfollow remaining twits found in timeline
	twits = ''
	while len(twitset) > 0:
	twits += ' ' + twitset.pop()
	twits.strip(' ')
	run_t(acct, 'whoami', timestamp=True)
	run_t(acct, 'unfollow', twits, timestamp=True)
	total += twitcount

	print("\tTotal twits:", str(total))
	grand_total += total
	if total > 0:
	run_t(acct, 'whoami', timestamp=True)
	os.rename(followersCSV , followersCSV + '~.1')

	except OSError as oe:
	print(" *** OSError", oe.strerror)
	sys.exit(oe.errno)
	except TypeError as te:
	print(" *** TypeError:", te)
	print(te.__traceback__)
	# print(te.with_traceback())
	sys.exit()
	except Exception as e:
	print(" *** Exception:", e)
	sys.exit()

	fp.close()
	print("\nGrand total twits: ", str(grand_total))
	sys.exit()