Skip to content

Instantly share code, notes, and snippets.

@deeeki
Created September 9, 2011 16:38
Show Gist options
  • Save deeeki/1206687 to your computer and use it in GitHub Desktop.
Save deeeki/1206687 to your computer and use it in GitHub Desktop.
Ameblo images downloader
# coding: utf-8
# Author: kenkiti (INOUE Tadashi)
# http://d.hatena.ne.jp/kenkitii/20081223/p1
# http://code.google.com/p/kenkiti/source/browse/trunk/ruby/ameblo_downloader.rb
require 'bundler/setup'
Bundler.require(:default) if defined?(Bundler)
require 'logger'
class ImageSaver < Mechanize::File
def initialize(uri=nil, response=nil, body=nil, code=nil)
super(uri, response, body, code)
end
def save(file_path)
File.open(file_path, 'wb') {|h| h.puts body } unless File.exists?(file_path)
end
end
class AmebloDownloader
def initialize(opt)
@agent = Mechanize.new
@agent.user_agent_alias = 'Windows IE 7'
@agent.log = Logger.new($stdout)
@agent.log.level = Logger::INFO
@agent.redirect_ok = true
@agent.max_history = 1
@agent.pluggable_parser['image/jpeg'] = ImageSaver
@path = opt[:path] || "image"
Dir::mkdir(@path) unless FileTest::directory?(@path)
end
def get_page(uri, reffer=nil)
@agent.get(uri, [], reffer)
rescue TimeoutError
@agent.log.warn 'Connection timeout.'; nil
rescue Mechanize::ResponseCodeError => e
@agent.log.warn "#{e.message} #{uri}"; nil
else
sleep 3
@agent.page
end
def imagelist(uri, to_next)
page = get_page(uri)
while page
page.root.search("html body a").select {|a| a['href'].include?("image-") }.map {|a|
download(a['href'])
}
uri = page.link_with(:text => /#{to_next}/)
page = uri ? @agent.click(uri) : nil
end
end
def download(uri)
page = get_page(uri)
page.root.search("html body img#centerImg").each do |img|
file_path = File.join(@path, img['src'].split("/")[-1])
unless File.exists?(file_path)
if get_page(img['src'], @agent.visited_page(uri))
@agent.page.save(file_path)
@agent.log.info "Downloaded #{img['src']}"
end
end
end
end
def self.get(uri, path)
downloader = self.new(:path => path)
downloader.imagelist(uri, "次ページ")
end
end
if $0 == __FILE__
require 'optparse'
parser = OptionParser.new
opt = {}
parser.banner = "Usage: #{File.basename($0)} options"
parser.on('-u URL', '--url URL', "Specify the URL of ameba-blog to download image.") {|u| opt[:url] = u }
parser.on('-p PATH','--path PATH', "Directory path name to save image.") {|p| opt[:path] = p }
parser.on('-h', '--help', 'Prints this message and quit') {
puts parser.help
exit 0;
}
if ARGV[0].nil?
puts parser.help
exit 0
end
begin
parser.parse!(ARGV)
rescue OptionParser::ParseError => e
$stderr.puts e.message
$stderr.puts parser.help
exit 1
else
AmebloDownloader.get(opt[:url], opt[:path])
end
end
source "http://rubygems.org"
gem 'mechanize'
GEM
remote: http://rubygems.org/
specs:
mechanize (2.0.1)
net-http-digest_auth (~> 1.1, >= 1.1.1)
net-http-persistent (~> 1.8)
nokogiri (~> 1.4)
webrobots (~> 0.0, >= 0.0.9)
net-http-digest_auth (1.1.1)
net-http-persistent (1.9)
nokogiri (1.5.0)
webrobots (0.0.11)
nokogiri (>= 1.4.4)
PLATFORMS
ruby
DEPENDENCIES
mechanize
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment