Chaz6/tumblr-get.py

## tumblr-get.py
#!/usr/bin/python

import os, sys
import urlparse
from shutil import copyfileobj
from urllib import urlopen, unquote
from xml.etree import ElementTree as ET
from socket import error as socket_error
import socket

origGetAddrInfo = socket.getaddrinfo

def getAddrInfoWrapper(host, port, family=0, socktype=0, proto=0, flags=0):
    return origGetAddrInfo(host, port, socket.AF_INET, socktype, proto, flags)

# replace the original socket.getaddrinfo by our version

socket.getaddrinfo = getAddrInfoWrapper

if len(sys.argv) < 2:
    print >> sys.stderr, "Pass tumblr name as argument"
    sys.exit()

def ensure_dir(f):
    d = os.path.dirname(f)
    if not os.path.exists(d):
        os.makedirs(d)

tumblr_name = sys.argv[1]
api_endpoint = 'http://%s.tumblr.com/api/read' % tumblr_name
start = 0
num = 50
post_count = 1
if len(sys.argv) == 3:
    start = int(sys.argv[2])

while post_count:
    myurl = "%s?type=photo&start=%s&num=%s" % (api_endpoint, start, num)
    print "url: ", myurl
    resp = urlopen(myurl)
    content = resp.read()
    tree = ET.fromstring(content)
    post_tags = tree.findall(".//post")
    post_count = len(post_tags)
    for post_tag in post_tags:
        post_id = post_tag.attrib['id']
        post_date = post_tag.attrib['date-gmt'].split(" ")[0]
        pic_count = 1
        for photo_tag in post_tag.findall(".//photo-url"):
            if photo_tag.attrib['max-width'] == "1280":
                photo_url = photo_tag.text
                outname = "%s_%s_%s_%s" % (tumblr_name, post_date, post_id, pic_count)
                outnameext = "%s_%s_%s_%s%s" % (tumblr_name, post_date, post_id, pic_count, os.path.splitext(photo_url)[1])
                pic_count += 1
                if os.path.exists(outnameext):
                    print "%s already downloaded" % outnameext
                else:
                    print "Downloading %s" % outnameext
                    try:
                        resp = urlopen(photo_url)
                    except IOError as ierr:
                        print "Error: ", ierr.errno
                    outfile = open(outnameext, 'w')
                    try:
                        copyfileobj(resp, outfile)
                    except socket_error as serr:
                        print "Error: ", serr.errno
                    outfile.close
    start += num
	#!/usr/bin/python

	import os, sys
	import urlparse
	from shutil import copyfileobj
	from urllib import urlopen, unquote
	from xml.etree import ElementTree as ET
	from socket import error as socket_error
	import socket

	origGetAddrInfo = socket.getaddrinfo

	def getAddrInfoWrapper(host, port, family=0, socktype=0, proto=0, flags=0):
	return origGetAddrInfo(host, port, socket.AF_INET, socktype, proto, flags)

	# replace the original socket.getaddrinfo by our version

	socket.getaddrinfo = getAddrInfoWrapper

	if len(sys.argv) < 2:
	print >> sys.stderr, "Pass tumblr name as argument"
	sys.exit()

	def ensure_dir(f):
	d = os.path.dirname(f)
	if not os.path.exists(d):
	os.makedirs(d)

	tumblr_name = sys.argv[1]
	api_endpoint = 'http://%s.tumblr.com/api/read' % tumblr_name
	start = 0
	num = 50
	post_count = 1
	if len(sys.argv) == 3:
	start = int(sys.argv[2])

	while post_count:
	myurl = "%s?type=photo&start=%s&num=%s" % (api_endpoint, start, num)
	print "url: ", myurl
	resp = urlopen(myurl)
	content = resp.read()
	tree = ET.fromstring(content)
	post_tags = tree.findall(".//post")
	post_count = len(post_tags)
	for post_tag in post_tags:
	post_id = post_tag.attrib['id']
	post_date = post_tag.attrib['date-gmt'].split(" ")[0]
	pic_count = 1
	for photo_tag in post_tag.findall(".//photo-url"):
	if photo_tag.attrib['max-width'] == "1280":
	photo_url = photo_tag.text
	outname = "%s_%s_%s_%s" % (tumblr_name, post_date, post_id, pic_count)
	outnameext = "%s_%s_%s_%s%s" % (tumblr_name, post_date, post_id, pic_count, os.path.splitext(photo_url)[1])
	pic_count += 1
	if os.path.exists(outnameext):
	print "%s already downloaded" % outnameext
	else:
	print "Downloading %s" % outnameext
	try:
	resp = urlopen(photo_url)
	except IOError as ierr:
	print "Error: ", ierr.errno
	outfile = open(outnameext, 'w')
	try:
	copyfileobj(resp, outfile)
	except socket_error as serr:
	print "Error: ", serr.errno
	outfile.close
	start += num