takafumir/twitpic_downloader.rb

## twitpic_downloader.rb
# Twitpic downloader with Ruby
#
# This tool enables you to save all your twitpic full-size images.
# Confirmed this tool working with Ruby 2.1.2.
#
# Usage
# $ mkdir work_dir
# $ ruby twitpic_downloader.rb user_name work_dir
#
# MIT License
# Copyright (c) 2014 Takafumi Yamano

require 'date'
require 'open-uri'

# prepare for saving images
USER_NAME = ARGV[0].to_s
WORK_DIR = ARGV[1].to_s
IMG_SAVE = 1
PREFIX = "twitpic-#{USER_NAME}"

if USER_NAME.empty?
  puts "Error: You must supply your twitpic USER_NAME."
  exit
end

unless Dir.exists?(WORK_DIR)
  puts "Error: You must create the WORK_DIR beforehand."
  exit
end

Dir.mkdir "#{WORK_DIR}/images" unless Dir.exists?("#{WORK_DIR}/images")
Dir.mkdir "#{WORK_DIR}/html" unless Dir.exists?("#{WORK_DIR}/html")

# download twitpic html pages
page = 1

while true
  puts "page: #{page}"
  input_url = "http://twitpic.com/photos/#{USER_NAME}?page=#{page}"
  output_file = "#{WORK_DIR}/html/#{PREFIX}-page-#{page}.html"

  unless File.exists?(output_file)
    puts "download html: #{input_url}"
    open(output_file, 'w') do |output|
      open(input_url, 'r') do |html_data|
        output.write(html_data.read)
      end
    end
  end

  break unless File.read(output_file) =~ /Next/
  page += 1
end

# extract all image ids from downloaded html pages
image_ids = []
Dir.glob("#{WORK_DIR}/html/#{PREFIX}-page-*").each do |file|
  image_ids.push File.read(file).scan(/<a href="\/([a-zA-Z0-9]+)">/).flatten
end

image_ids = image_ids.flatten.uniq.delete_if{|i| i == "sopapipa"}.sort

# download twitpic html pages of full size images
image_ids.each_with_index do |id, index|
  puts "#{index+1}: #{id}"

  full_url = "http://twitpic.com/#{id}/full"
  full_file = "#{WORK_DIR}/html/#{PREFIX}-#{id}-full.html"

  unless File.exists?(full_file)
    puts "download full url: #{full_url}"
    open(full_file, 'w') do |output|
      open(full_url, 'r') do |html_data|
        output.write(html_data.read)
      end
    end
  end
end

# extract all full image urls
full_image_urls = {}
image_ids.each do |id|
  file = "#{WORK_DIR}/html/#{PREFIX}-#{id}-full.html"
  full_image_urls[id] = File.read(file).scan(/<img src="([^"]*)"/).flatten.grep(/(https:\/\/[^"]*)/){|i| $1}[0]
end

# download full images
unless IMG_SAVE == 1
  puts "Warning: Didn't save full size images yet."
  puts "Warning: Change IMG_SAVE to 1 in oreder to save full images."
  exit
end

full_image_urls.each_with_index do |(id, url), index|
  puts "#{index+1}: #{id}"
  next if url.to_s.empty?
  extension = url.scan(/\.([a-zA-Z]+)\?[0-9]+\z/).flatten[0]
  full_image_file = "#{WORK_DIR}/images/#{PREFIX}-#{id}-full.#{extension}"

  unless File.exists?(full_image_file)
    puts "save full image: #{url}"
    begin
      open(full_image_file, 'wb') do |output|
        open(url, 'rb') do |image_data|
          output.write(image_data.read)
        end
      end
    rescue
      next
    end
  end
end
	# Twitpic downloader with Ruby
	#
	# This tool enables you to save all your twitpic full-size images.
	# Confirmed this tool working with Ruby 2.1.2.
	#
	# Usage
	# $ mkdir work_dir
	# $ ruby twitpic_downloader.rb user_name work_dir
	#
	# MIT License
	# Copyright (c) 2014 Takafumi Yamano

	require 'date'
	require 'open-uri'

	# prepare for saving images
	USER_NAME = ARGV[0].to_s
	WORK_DIR = ARGV[1].to_s
	IMG_SAVE = 1
	PREFIX = "twitpic-#{USER_NAME}"

	if USER_NAME.empty?
	puts "Error: You must supply your twitpic USER_NAME."
	exit
	end

	unless Dir.exists?(WORK_DIR)
	puts "Error: You must create the WORK_DIR beforehand."
	exit
	end

	Dir.mkdir "#{WORK_DIR}/images" unless Dir.exists?("#{WORK_DIR}/images")
	Dir.mkdir "#{WORK_DIR}/html" unless Dir.exists?("#{WORK_DIR}/html")

	# download twitpic html pages
	page = 1

	while true
	puts "page: #{page}"
	input_url = "http://twitpic.com/photos/#{USER_NAME}?page=#{page}"
	output_file = "#{WORK_DIR}/html/#{PREFIX}-page-#{page}.html"

	unless File.exists?(output_file)
	puts "download html: #{input_url}"
	open(output_file, 'w') do \|output\|
	open(input_url, 'r') do \|html_data\|
	output.write(html_data.read)
	end
	end
	end

	break unless File.read(output_file) =~ /Next/
	page += 1
	end

	# extract all image ids from downloaded html pages
	image_ids = []
	Dir.glob("#{WORK_DIR}/html/#{PREFIX}-page-*").each do \|file\|
	image_ids.push File.read(file).scan(/<a href="\/([a-zA-Z0-9]+)">/).flatten
	end

	image_ids = image_ids.flatten.uniq.delete_if{\|i\| i == "sopapipa"}.sort

	# download twitpic html pages of full size images
	image_ids.each_with_index do \|id, index\|
	puts "#{index+1}: #{id}"

	full_url = "http://twitpic.com/#{id}/full"
	full_file = "#{WORK_DIR}/html/#{PREFIX}-#{id}-full.html"

	unless File.exists?(full_file)
	puts "download full url: #{full_url}"
	open(full_file, 'w') do \|output\|
	open(full_url, 'r') do \|html_data\|
	output.write(html_data.read)
	end
	end
	end
	end

	# extract all full image urls
	full_image_urls = {}
	image_ids.each do \|id\|
	file = "#{WORK_DIR}/html/#{PREFIX}-#{id}-full.html"
	full_image_urls[id] = File.read(file).scan(/<img src="([^"])"/).flatten.grep(/(https:\/\/[^"])/){\|i\| $1}[0]
	end

	# download full images
	unless IMG_SAVE == 1
	puts "Warning: Didn't save full size images yet."
	puts "Warning: Change IMG_SAVE to 1 in oreder to save full images."
	exit
	end

	full_image_urls.each_with_index do \|(id, url), index\|
	puts "#{index+1}: #{id}"
	next if url.to_s.empty?
	extension = url.scan(/\.([a-zA-Z]+)\?[0-9]+\z/).flatten[0]
	full_image_file = "#{WORK_DIR}/images/#{PREFIX}-#{id}-full.#{extension}"

	unless File.exists?(full_image_file)
	puts "save full image: #{url}"
	begin
	open(full_image_file, 'wb') do \|output\|
	open(url, 'rb') do \|image_data\|
	output.write(image_data.read)
	end
	end
	rescue
	next
	end
	end
	end