Skip to content

Instantly share code, notes, and snippets.

@tanjo
Last active August 29, 2015 14:06
Show Gist options
  • Save tanjo/359504e2a0cf2575a5e1 to your computer and use it in GitHub Desktop.
Save tanjo/359504e2a0cf2575a5e1 to your computer and use it in GitHub Desktop.
Twitpic
# Based on https://gist.github.com/rjmackay/aa0dfcdf9eb313361db7
# Modified to clean up filename
# pp on https://github.com/taichino/prettyprint
# Properties
# {username} : your account user name
import urllib
import urllib2
import json
import collections
import HTMLParser
import time
import os
import re
try:
import json
except ImportError:
import simplejson as json
class MyEncoder (json.JSONEncoder):
def default(self, o):
try:
iterable = iter(o)
except TypeError:
pass
else:
return list(iterable)
try:
return json.JSONEncoder.default(self, o)
except TypeError:
return str(o)
def pp_str(obj):
orig = json.dumps(obj,
indent=4,
sort_keys=True,
skipkeys=True,
cls=MyEncoder)
return eval("u'''%s'''" % orig).encode('utf-8')
def pp(obj):
print pp_str(obj)
if __name__ == '__main__':
# Create a parser for HTML entities
h = HTMLParser.HTMLParser()
# Maximum filename length
# Last 4 characters will be .jpg or .png etc
max_length = os.statvfs('.').f_namemax - 4
# Target Page
twitpic_api = "http://api.twitpic.com/2/users/show.json?username=makietan&page="
# Get the data about the target page
for page in range(1, 100):
print "page " + str(page)
twitpic_data = json.load(urllib2.urlopen(twitpic_api + str(page)))
# Get the info about each image on the page
twitpic_images = twitpic_data["images"]
for item in twitpic_images:
twitpic_id = item['short_id']
twitpic_title = unicode(item["message"])
# Replace / (which can't be used in a file name) with a similar looking character
twitpic_title = twitpic_title.replace('/', u'\u2044')
twitpic_title = twitpic_title[:max_length]
twitpic_file_type = item["type"]
twitpic_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
twitpic_file_url = "http://twitpic.com/show/full/" + twitpic_id
twitpic_file_name = h.unescape(twitpic_title).lower().replace(" ", "-")
twitpic_file_name = re.sub(r'[^A-Za-z-]','', twitpic_file_name)
if (twitpic_file_name == ""):
twitpic_file_name = twitpic_id
twitpic_file_name = twitpic_file_name + "_" + str(twitpic_time) + "." + twitpic_file_type
print twitpic_file_name
# Save the file
urllib.urlretrieve (twitpic_file_url, twitpic_file_name)
# Set the file time
os.utime(twitpic_file_name,(twitpic_time, twitpic_time))
pp(item)
@tanjo
Copy link
Author

tanjo commented Sep 5, 2014

簡単に上書きされてしまうのでファイル名にタイムスタンプをつけた

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment