Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Twitpic downloader with Ruby
# Twitpic downloader with Ruby
#
# This tool enables you to save all your twitpic full-size images.
# Confirmed this tool working with Ruby 2.1.2.
#
# Usage
# $ mkdir work_dir
# $ ruby twitpic_downloader.rb user_name work_dir
#
# MIT License
# Copyright (c) 2014 Takafumi Yamano
require 'date'
require 'open-uri'
# prepare for saving images
USER_NAME = ARGV[0].to_s
WORK_DIR = ARGV[1].to_s
IMG_SAVE = 1
PREFIX = "twitpic-#{USER_NAME}"
if USER_NAME.empty?
puts "Error: You must supply your twitpic USER_NAME."
exit
end
unless Dir.exists?(WORK_DIR)
puts "Error: You must create the WORK_DIR beforehand."
exit
end
Dir.mkdir "#{WORK_DIR}/images" unless Dir.exists?("#{WORK_DIR}/images")
Dir.mkdir "#{WORK_DIR}/html" unless Dir.exists?("#{WORK_DIR}/html")
# download twitpic html pages
page = 1
while true
puts "page: #{page}"
input_url = "http://twitpic.com/photos/#{USER_NAME}?page=#{page}"
output_file = "#{WORK_DIR}/html/#{PREFIX}-page-#{page}.html"
unless File.exists?(output_file)
puts "download html: #{input_url}"
open(output_file, 'w') do |output|
open(input_url, 'r') do |html_data|
output.write(html_data.read)
end
end
end
break unless File.read(output_file) =~ /Next/
page += 1
end
# extract all image ids from downloaded html pages
image_ids = []
Dir.glob("#{WORK_DIR}/html/#{PREFIX}-page-*").each do |file|
image_ids.push File.read(file).scan(/<a href="\/([a-zA-Z0-9]+)">/).flatten
end
image_ids = image_ids.flatten.uniq.delete_if{|i| i == "sopapipa"}.sort
# download twitpic html pages of full size images
image_ids.each_with_index do |id, index|
puts "#{index+1}: #{id}"
full_url = "http://twitpic.com/#{id}/full"
full_file = "#{WORK_DIR}/html/#{PREFIX}-#{id}-full.html"
unless File.exists?(full_file)
puts "download full url: #{full_url}"
open(full_file, 'w') do |output|
open(full_url, 'r') do |html_data|
output.write(html_data.read)
end
end
end
end
# extract all full image urls
full_image_urls = {}
image_ids.each do |id|
file = "#{WORK_DIR}/html/#{PREFIX}-#{id}-full.html"
full_image_urls[id] = File.read(file).scan(/<img src="([^"]*)"/).flatten.grep(/(https:\/\/[^"]*)/){|i| $1}[0]
end
# download full images
unless IMG_SAVE == 1
puts "Warning: Didn't save full size images yet."
puts "Warning: Change IMG_SAVE to 1 in oreder to save full images."
exit
end
full_image_urls.each_with_index do |(id, url), index|
puts "#{index+1}: #{id}"
next if url.to_s.empty?
extension = url.scan(/\.([a-zA-Z]+)\?[0-9]+\z/).flatten[0]
full_image_file = "#{WORK_DIR}/images/#{PREFIX}-#{id}-full.#{extension}"
unless File.exists?(full_image_file)
puts "save full image: #{url}"
begin
open(full_image_file, 'wb') do |output|
open(url, 'rb') do |image_data|
output.write(image_data.read)
end
end
rescue
next
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.