azam/archive-twitpic-data.py

## archive-twitpic-data.py
# Archive your Twitpic photos and metadata
#
# A cleaned-up fork of Terence Eden's original archiver:
# http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
#
# License: MIT

import shutil
import urllib2
import socket
import json
import time
import os

USERNAME = "your_username_goes_here"
TMP_FILE_NAME = "tmpfile"
MAX_RETRIES = 5
SLEEP_TIME = 2
TIMEOUT = 5000

page = 1
has_more_page = True

# Target Page
api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME

# Get the data about the target page
while has_more_page:
  print "Processing page: " + str(page)

  has_page_error = True
  for i in range(MAX_RETRIES):
    try:
      raw_data = urllib2.urlopen(api + str(page), timeout=TIMEOUT)
      has_page_error = False
      if i > 0:
        print "Retry successful page: " + str(page)
      break
    except urllib2.URLError, e:
      print "Failed retrieving page: " + str(page)
      time.sleep(SLEEP_TIME)
    except socket.timeout:
      print "Timeout retrieving page: " + str(page)
      time.sleep(SLEEP_TIME)
  if has_page_error:
    has_more_page = False
    break

  json_data = json.load(raw_data)

  # Save the page data
  page_file = open("page-%s.json" % page,"w")
  page_file.write(json.dumps(json_data, indent=2))
  page_file.close()

  # Get the info about each image on the page
  images = json_data["images"]

  page += 1

  for item in images:
    file_id = item["short_id"]
    file_type = item["type"]
    file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
    file_url = "https://twitpic.com/show/full/"+file_id
    file_name = file_id + "." + file_type

    if not os.path.exists(file_name):
      for i in range(MAX_RETRIES):
        # Remove temp file if exists
        try:
          os.remove(TMP_FILE_NAME)
        except OSError:
          pass
        try:
          # Save the file to temporary file
          req = urllib2.urlopen(file_url, timeout=TIMEOUT)
          with open(TMP_FILE_NAME, "wb") as tmp_file:
              shutil.copyfileobj(req, tmp_file)

          # Rename to actual file
          os.rename(TMP_FILE_NAME, file_name)

          # Set the file time
          os.utime(file_name,(file_time, file_time))

          if i > 0:
            print "Retry successful for image ID: " + file_id
          break
        except urllib2.URLError, e:
          print "Failed retrieving image ID: " + file_id
          time.sleep(SLEEP_TIME)
        except socket.timeout:
          print "Timeout retrieving image ID: " + file_id
          time.sleep(SLEEP_TIME)
	# Archive your Twitpic photos and metadata
	#
	# A cleaned-up fork of Terence Eden's original archiver:
	# http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
	#
	# License: MIT

	import shutil
	import urllib2
	import socket
	import json
	import time
	import os

	USERNAME = "your_username_goes_here"
	TMP_FILE_NAME = "tmpfile"
	MAX_RETRIES = 5
	SLEEP_TIME = 2
	TIMEOUT = 5000

	page = 1
	has_more_page = True

	# Target Page
	api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME

	# Get the data about the target page
	while has_more_page:
	print "Processing page: " + str(page)

	has_page_error = True
	for i in range(MAX_RETRIES):
	try:
	raw_data = urllib2.urlopen(api + str(page), timeout=TIMEOUT)
	has_page_error = False
	if i > 0:
	print "Retry successful page: " + str(page)
	break
	except urllib2.URLError, e:
	print "Failed retrieving page: " + str(page)
	time.sleep(SLEEP_TIME)
	except socket.timeout:
	print "Timeout retrieving page: " + str(page)
	time.sleep(SLEEP_TIME)
	if has_page_error:
	has_more_page = False
	break

	json_data = json.load(raw_data)

	# Save the page data
	page_file = open("page-%s.json" % page,"w")
	page_file.write(json.dumps(json_data, indent=2))
	page_file.close()

	# Get the info about each image on the page
	images = json_data["images"]

	page += 1

	for item in images:
	file_id = item["short_id"]
	file_type = item["type"]
	file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
	file_url = "https://twitpic.com/show/full/"+file_id
	file_name = file_id + "." + file_type

	if not os.path.exists(file_name):
	for i in range(MAX_RETRIES):
	# Remove temp file if exists
	try:
	os.remove(TMP_FILE_NAME)
	except OSError:
	pass
	try:
	# Save the file to temporary file
	req = urllib2.urlopen(file_url, timeout=TIMEOUT)
	with open(TMP_FILE_NAME, "wb") as tmp_file:
	shutil.copyfileobj(req, tmp_file)

	# Rename to actual file
	os.rename(TMP_FILE_NAME, file_name)

	# Set the file time
	os.utime(file_name,(file_time, file_time))

	if i > 0:
	print "Retry successful for image ID: " + file_id
	break
	except urllib2.URLError, e:
	print "Failed retrieving image ID: " + file_id
	time.sleep(SLEEP_TIME)
	except socket.timeout:
	print "Timeout retrieving image ID: " + file_id
	time.sleep(SLEEP_TIME)