Skip to content

Instantly share code, notes, and snippets.

@salty-horse
Created January 17, 2016 22:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save salty-horse/ddb8caaaa6775bdb801a to your computer and use it in GitHub Desktop.
Save salty-horse/ddb8caaaa6775bdb801a to your computer and use it in GitHub Desktop.
Process Twitter followers fetched with get_followers.py
#!/usr/bin/env python3
import re
import json
import glob
def print_user(user):
print(
user['id'],
user['screen_name'],
user['name'].replace('\r\n', ' ').replace('\n', ' '),
user['statuses_count'],
user['followers_count'],
user['friends_count'],
user['description'].replace('\r\n', ' ').replace('\n', ' '),
user['default_profile_image'],
# This adds an embedded image to Google Spreadsheets, but may be slow
# if the file is large.
# '=image("{}")'.format(user['profile_image_url_https']),
sep='\t')
followers = []
for fname in glob.glob('followers*.txt'):
with open(fname) as f:
json_data = json.load(f)
followers.extend(json_data)
HEBREW = re.compile(u'[\u0590-\u05FF]')
CYRILLIC = re.compile(u'[\u0400-\u0500]')
KANJI = re.compile(u'[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uffef\u4e00-\u9faf\u3400-\u4dbf]')
print(
'User ID',
'screen name',
'name',
'tweet count',
'followers',
'following',
'description',
'egg avatar',
sep='\t'
)
for user in followers:
print_user(user)
continue # Stop here
# Skip users you trust
if HEBREW.search(user['name']) or HEBREW.search(user['description']):
continue
# Enable/disable these as needed
if len(CYRILLIC.findall(user['name'])) > 3:
continue
print_user(user)
if len(CYRILLIC.findall(user['description'])) > 3:
continue
print_user(user)
if len(KANJI.findall(user['name'])) > 5:
continue
print_user(user)
if len(KANJI.findall(user['description'])) > 5:
continue
print_user(user)
if user['statuses_count'] == 0:
continue
print_user(user)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment