Skip to content

Instantly share code, notes, and snippets.

@rjmackay rjmackay/
Created Sep 4, 2014

What would you like to do?
Export photos from twitpic
# Based on
# Modified to clean up filename
import urllib
import urllib2
import json
import collections
import HTMLParser
import time
import os
import re
# Create a parser for HTML entities
h = HTMLParser.HTMLParser()
# Maximum filename length
# Last 4 characters will be .jpg or .png etc
max_length = os.statvfs('.').f_namemax - 4
# Target Page
twitpic_api = ""
# Get the data about the target page
for page in range(1, 100):
print "page " + page
twitpic_data = json.load(urllib2.urlopen(twitpic_api + str(page)))
# Get the info about each image on the page
twitpic_images = twitpic_data["images"]
for item in twitpic_images:
twitpic_id = item['short_id']
twitpic_title = item["message"]
# Replace / (which can't be used in a file name) with a similar looking character
twitpic_title = twitpic_title.replace('/', u'\u2044')
twitpic_title = twitpic_title[:max_length]
twitpic_file_type = item["type"]
twitpic_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
twitpic_file_url = ""+twitpic_id
twitpic_file_name = h.unescape(twitpic_title).lower().replace(" ", "-")
twitpic_file_name = re.sub(r'[^A-Za-z-]','', twitpic_file_name)
if (twitpic_file_name == ""):
twitpic_file_name = twitpic_id
twitpic_file_name = twitpic_file_name + "." + twitpic_file_type
print twitpic_file_name
# Save the file
urllib.urlretrieve (twitpic_file_url, twitpic_file_name)
# Set the file time
os.utime(twitpic_file_name,(twitpic_time, twitpic_time))

This comment has been minimized.

Copy link

dnlknet commented Sep 5, 2014

showed error casting int -> str on 25 line but it works good thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.