zmwangx/README.md

## README.md

      
    Raw
  

              README.md
            
          
    To use this program, put your Twitter OAuth credentials in ~/.config/twitter/oauth.json (or customize this path in tw-init.py). oauth.json should look like
{
    "consumer_key": "...",
    "consumer_secret": "...",
    "access_token": "...",
    "access_token_secret": "..."
}

See tw-init.py -h for usage. There are several external dependencies:

colorama (via pip)
libmagic (via brew, for instance)
python-magic (via pip)
tweepy (via pip)

Currently automatic updating is not implemented, but this functionality is already planned out and very easy to add (simply parse out the last traversed status id from status.log written by tw-init.py).
Note on concurrency

Currently downloads are processed one at a time. While it is super fast for me, I have to say good luck if you have a bad connection to Twitter's CDN. I could write a simple concurrent model with multiprocessing, but it's kind of awkward and I don't like it. You might want to DIY. I would use Go instead if I want to do concurrency right, but Go doesn't have tweepy (no tweepy.Cursor!), and I'm not familiar enough with Go.
If you really have a bad connection to Twitter, and don't want to write concurrency code yourself (it's really simple), the legacy version is for you. Browse the commit history. See "Note on legacy version" for details.
Note on legacy version

In a previous version, I used my own generic image downloader, which turned out to be a really bad decision. Now the program should be much faster, much less resource intensive, and easily adapted to Windows (without cygwin, mingw, etc.).

  
## tw-init
#!/usr/bin/env python

import argparse
import datetime
import json
import os
import subprocess
import sys
import time
import urllib

from colorama import Fore
import magic
from tweepy import *

### parse args
parser = argparse.ArgumentParser()
parser.add_argument('screen_name')
parser.add_argument('-d', '--directory', default='', help='path to save images to; default is ~/Downloads/img/twitter/screen_name')
parser.add_argument('--since-id', default='1')
args = parser.parse_args()
screen_name = args.screen_name
since_id = args.since_id
if args.directory:
    directory = args.directory
else:
    directory = os.path.expanduser('~/Downloads/img/twitter/' + screen_name)
print directory
status_log_file = directory + '/status.log'
### finished parsing args

### utilities
def print_error(err):
    print >>sys.stderr, Fore.RED + sys.argv[0] + ' error: ' + err + Fore.RESET
    sys.stderr.flush()
### end of utilities

### set up oauth
oauth_json_file = os.path.expanduser('~/.config/twitter/oauth.json')
if not os.path.exists(oauth_json_file):
    print_error('no ' + oauth_json_file)
    exit(1)
oauth = json.loads(open(oauth_json_file, 'r').read())
auth = OAuthHandler(oauth['consumer_key'], oauth['consumer_secret'])
auth.set_access_token(oauth['access_token'], oauth['access_token_secret'])
api = API(auth)
### finished oauth stuff

### traverse the timeline and save images
if not os.path.exists(directory):
    os.makedirs(directory)
status_log = open(status_log_file, 'a')
# save all statuses (at most 3200 -- we have enough resources)
statuses = []
for status in Cursor(api.user_timeline, screen_name=screen_name, since_id=since_id).items():
    statuses.append(status)
# replay backwards
for status in reversed(statuses):
    status_id = str(status.id)
    print >>status_log, status_id
    date = status.created_at.strftime('%Y%m%d%H%M%S')
    if hasattr(status, 'retweeted_status') or not hasattr(status, 'extended_entities'):
        continue
    if 'media' in status.extended_entities:
        print 'http://twitter.com/' + screen_name + '/status/' + status_id # print uri of status
        count = 0;
        for media in status.extended_entities['media']:
            count += 1
            if media['type'] == 'photo':
                image_uri = media['media_url'] + ':large'
                print image_uri
                filename = date + '-twitter.com_' + screen_name + '-' + status_id + '-' + str(count)
                filepath = directory + '/' + filename
                # download image
                urllib.urlretrieve(image_uri, filepath)
                # identify mime type and attach extension
                if os.path.exists(filepath):
                    mime = magic.from_file(filepath, mime=True)
                    if mime == "image/gif":
                        newfilepath = filepath + ".gif"
                    elif mime == "image/jpeg":
                        newfilepath = filepath + ".jpg"
                    elif mime == "image/png":
                        newfilepath = filepath + ".png"
                    else:
                        err = filepath + ": unrecgonized image type"
                        print_error(err)
                        continue
                    os.rename(filepath, newfilepath)
                else:
                    # donwload failed for whatever reason
                    err = filename + ": failed to download " + image_uri
                    print_error(err)
                    continue
### finish traversing
	#!/usr/bin/env python

	import argparse
	import datetime
	import json
	import os
	import subprocess
	import sys
	import time
	import urllib

	from colorama import Fore
	import magic
	from tweepy import *

	### parse args
	parser = argparse.ArgumentParser()
	parser.add_argument('screen_name')
	parser.add_argument('-d', '--directory', default='', help='path to save images to; default is ~/Downloads/img/twitter/screen_name')
	parser.add_argument('--since-id', default='1')
	args = parser.parse_args()
	screen_name = args.screen_name
	since_id = args.since_id
	if args.directory:
	directory = args.directory
	else:
	directory = os.path.expanduser('~/Downloads/img/twitter/' + screen_name)
	print directory
	status_log_file = directory + '/status.log'
	### finished parsing args

	### utilities
	def print_error(err):
	print >>sys.stderr, Fore.RED + sys.argv[0] + ' error: ' + err + Fore.RESET
	sys.stderr.flush()
	### end of utilities

	### set up oauth
	oauth_json_file = os.path.expanduser('~/.config/twitter/oauth.json')
	if not os.path.exists(oauth_json_file):
	print_error('no ' + oauth_json_file)
	exit(1)
	oauth = json.loads(open(oauth_json_file, 'r').read())
	auth = OAuthHandler(oauth['consumer_key'], oauth['consumer_secret'])
	auth.set_access_token(oauth['access_token'], oauth['access_token_secret'])
	api = API(auth)
	### finished oauth stuff

	### traverse the timeline and save images
	if not os.path.exists(directory):
	os.makedirs(directory)
	status_log = open(status_log_file, 'a')
	# save all statuses (at most 3200 -- we have enough resources)
	statuses = []
	for status in Cursor(api.user_timeline, screen_name=screen_name, since_id=since_id).items():
	statuses.append(status)
	# replay backwards
	for status in reversed(statuses):
	status_id = str(status.id)
	print >>status_log, status_id
	date = status.created_at.strftime('%Y%m%d%H%M%S')
	if hasattr(status, 'retweeted_status') or not hasattr(status, 'extended_entities'):
	continue
	if 'media' in status.extended_entities:
	print 'http://twitter.com/' + screen_name + '/status/' + status_id # print uri of status
	count = 0;
	for media in status.extended_entities['media']:
	count += 1
	if media['type'] == 'photo':
	image_uri = media['media_url'] + ':large'
	print image_uri
	filename = date + '-twitter.com_' + screen_name + '-' + status_id + '-' + str(count)
	filepath = directory + '/' + filename
	# download image
	urllib.urlretrieve(image_uri, filepath)
	# identify mime type and attach extension
	if os.path.exists(filepath):
	mime = magic.from_file(filepath, mime=True)
	if mime == "image/gif":
	newfilepath = filepath + ".gif"
	elif mime == "image/jpeg":
	newfilepath = filepath + ".jpg"
	elif mime == "image/png":
	newfilepath = filepath + ".png"
	else:
	err = filepath + ": unrecgonized image type"
	print_error(err)
	continue
	os.rename(filepath, newfilepath)
	else:
	# donwload failed for whatever reason
	err = filename + ": failed to download " + image_uri
	print_error(err)
	continue
	### finish traversing