Skip to content

Instantly share code, notes, and snippets.

@Chaz6
Created August 27, 2016 12:37
Show Gist options
  • Save Chaz6/3b8e11d4b23d951fee457d05f3bfcd82 to your computer and use it in GitHub Desktop.
Save Chaz6/3b8e11d4b23d951fee457d05f3bfcd82 to your computer and use it in GitHub Desktop.
Download images from a tumblr blog
#!/usr/bin/python
import os, sys
import urlparse
from shutil import copyfileobj
from urllib import urlopen, unquote
from xml.etree import ElementTree as ET
from socket import error as socket_error
import socket
origGetAddrInfo = socket.getaddrinfo
def getAddrInfoWrapper(host, port, family=0, socktype=0, proto=0, flags=0):
return origGetAddrInfo(host, port, socket.AF_INET, socktype, proto, flags)
# replace the original socket.getaddrinfo by our version
socket.getaddrinfo = getAddrInfoWrapper
if len(sys.argv) < 2:
print >> sys.stderr, "Pass tumblr name as argument"
sys.exit()
def ensure_dir(f):
d = os.path.dirname(f)
if not os.path.exists(d):
os.makedirs(d)
tumblr_name = sys.argv[1]
api_endpoint = 'http://%s.tumblr.com/api/read' % tumblr_name
start = 0
num = 50
post_count = 1
if len(sys.argv) == 3:
start = int(sys.argv[2])
while post_count:
myurl = "%s?type=photo&start=%s&num=%s" % (api_endpoint, start, num)
print "url: ", myurl
resp = urlopen(myurl)
content = resp.read()
tree = ET.fromstring(content)
post_tags = tree.findall(".//post")
post_count = len(post_tags)
for post_tag in post_tags:
post_id = post_tag.attrib['id']
post_date = post_tag.attrib['date-gmt'].split(" ")[0]
pic_count = 1
for photo_tag in post_tag.findall(".//photo-url"):
if photo_tag.attrib['max-width'] == "1280":
photo_url = photo_tag.text
outname = "%s_%s_%s_%s" % (tumblr_name, post_date, post_id, pic_count)
outnameext = "%s_%s_%s_%s%s" % (tumblr_name, post_date, post_id, pic_count, os.path.splitext(photo_url)[1])
pic_count += 1
if os.path.exists(outnameext):
print "%s already downloaded" % outnameext
else:
print "Downloading %s" % outnameext
try:
resp = urlopen(photo_url)
except IOError as ierr:
print "Error: ", ierr.errno
outfile = open(outnameext, 'w')
try:
copyfileobj(resp, outfile)
except socket_error as serr:
print "Error: ", serr.errno
outfile.close
start += num
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment