rjmackay/export.py

## export.py
# Based on http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
# Modified to clean up filename

import urllib
import urllib2
import json
import collections
import HTMLParser
import time
import os
import re

# Create a parser for HTML entities
h = HTMLParser.HTMLParser()

# Maximum filename length
# Last 4 characters will be .jpg or .png etc
max_length = os.statvfs('.').f_namemax - 4

# Target Page
twitpic_api = "http://api.twitpic.com/2/users/show.json?username=rjmackay&page="

# Get the data about the target page
for page in range(1, 100):
  print "page " + page
  twitpic_data = json.load(urllib2.urlopen(twitpic_api + str(page)))

  # Get the info about each image on the page
  twitpic_images = twitpic_data["images"]

  for item in twitpic_images:
    twitpic_id = item['short_id']
    twitpic_title = item["message"]
    # Replace / (which can't be used in a file name) with a similar looking character
    twitpic_title = twitpic_title.replace('/', u'\u2044')
    twitpic_title = twitpic_title[:max_length]
    twitpic_file_type = item["type"]
    twitpic_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
    twitpic_file_url = "http://twitpic.com/show/full/"+twitpic_id

    twitpic_file_name = h.unescape(twitpic_title).lower().replace(" ", "-")
    twitpic_file_name = re.sub(r'[^A-Za-z-]','', twitpic_file_name)
    if (twitpic_file_name == ""):
      twitpic_file_name = twitpic_id
    twitpic_file_name = twitpic_file_name + "." + twitpic_file_type
    print twitpic_file_name

    # Save the file
    urllib.urlretrieve (twitpic_file_url, twitpic_file_name)
    # Set the file time
    os.utime(twitpic_file_name,(twitpic_time, twitpic_time))
	# Based on http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
	# Modified to clean up filename

	import urllib
	import urllib2
	import json
	import collections
	import HTMLParser
	import time
	import os
	import re

	# Create a parser for HTML entities
	h = HTMLParser.HTMLParser()

	# Maximum filename length
	# Last 4 characters will be .jpg or .png etc
	max_length = os.statvfs('.').f_namemax - 4

	# Target Page
	twitpic_api = "http://api.twitpic.com/2/users/show.json?username=rjmackay&page="

	# Get the data about the target page
	for page in range(1, 100):
	print "page " + page
	twitpic_data = json.load(urllib2.urlopen(twitpic_api + str(page)))

	# Get the info about each image on the page
	twitpic_images = twitpic_data["images"]

	for item in twitpic_images:
	twitpic_id = item['short_id']
	twitpic_title = item["message"]
	# Replace / (which can't be used in a file name) with a similar looking character
	twitpic_title = twitpic_title.replace('/', u'\u2044')
	twitpic_title = twitpic_title[:max_length]
	twitpic_file_type = item["type"]
	twitpic_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
	twitpic_file_url = "http://twitpic.com/show/full/"+twitpic_id

	twitpic_file_name = h.unescape(twitpic_title).lower().replace(" ", "-")
	twitpic_file_name = re.sub(r'[^A-Za-z-]','', twitpic_file_name)
	if (twitpic_file_name == ""):
	twitpic_file_name = twitpic_id
	twitpic_file_name = twitpic_file_name + "." + twitpic_file_type
	print twitpic_file_name

	# Save the file
	urllib.urlretrieve (twitpic_file_url, twitpic_file_name)
	# Set the file time
	os.utime(twitpic_file_name,(twitpic_time, twitpic_time))