Skip to content

Instantly share code, notes, and snippets.

@itkq
Last active August 29, 2015 14:24
Show Gist options
  • Save itkq/8fd57c100f0a71582a05 to your computer and use it in GitHub Desktop.
Save itkq/8fd57c100f0a71582a05 to your computer and use it in GitHub Desktop.
Amebloから画像を全部とってくるやつ
require 'mechanize'
require 'open-uri'
$file = 'article_list'
$amember_file = 'amember_article_list'
def main
print 'Enter target ameba id: '
target = gets.chomp
print 'And archive from amember article? '
ans = gets.chomp
if ans.match(/^(y|yes)$/i)
print 'Enter your ameba id: '
ameba_id = gets.chomp
print 'Enter your ameba password: '
ameba_password = STDIN.noecho(&:gets).chomp
print "\n"
archive_amember(target, ameba_id, ameba_password)
end
archive(target)
end
def archive(target)
dir = "#{File.dirname(File.expand_path(__FILE__))}/#{target}"
img_dir = "#{dir}/img"
file = "#{dir}/#{$file}"
FileUtils.mkdir_p(img_dir) unless FileTest.exist?(img_dir)
unless File.exists?(file)
get_articles(target, dir)
end
puts 'Start archiving.'
File.open(file, 'r') do |f|
f.each_line do |line|
print "#{line.chomp} ==> "
begin
page = get_page(line.chomp)
rescue => e
puts e.message
next
end
cnt = img_archive(page, img_dir)
puts cnt
sleep(1)
end
end
end
def archive_amember(target, id, pass)
dir = "#{File.dirname(File.expand_path(__FILE__))}/#{target}"
img_dir = "#{dir}/amember_img"
file = "#{dir}/#{$amember_file}"
FileUtils.mkdir_p(img_dir) unless FileTest.exist?(img_dir)
unless File.exists?(file)
unless get_amember_articles(target, dir)
return false
end
end
unless agent = login(id, pass)
return false
end
puts 'Start Ameblo archiving'
File.open(file, 'r') do |f|
f.each_line do |line|
agent.get(line)
print "#{line.chomp} ==> "
cnt = img_archive(Nokogiri::HTML.parse(agent.page.body), img_dir, true)
puts cnt
sleep(1)
end
end
end
def img_archive(page, dir, amembar = false)
if amembar
date_class = '#dateLeft'
else
if page.css('time').empty?
date_class = '.date'
else
date_class = 'time'
end
end
/^(?<yyyy>\d{4}).(?<mm>\d{2}).(?<dd>\d{2})/ =~ page.css(date_class).text
date = "#{yyyy}_#{mm}_#{dd}"
imgs = page.css('.detailOn > img')
seq = nil
unless imgs.empty?
seq = 1
imgs.each do |img|
src = img.attr('src')
while File.exists?(path = "#{dir}/#{date}_#{sprintf("%03d", seq)}.jpg")
seq += 1
end
begin
data = open(src)
open(path, 'wb') do |output|
output.write(data.read)
end
seq += 1
rescue => e
puts e.message
end
end
end
seq.nil? ? 0 : (seq - 1)
end
def get_articles(target, dir)
print 'Getting articles ... '
path = "#{dir}/#{$file}"
FileUtils.touch($file)
page_seq = 1
page = get_page("http://ameblo.jp/#{target}/entrylist-1-#{page_seq}.html")
# old format
if page.css('.contentTitle').empty?
a_class = '.newentrytitle / a'
next_class = '.nextPage'
# new format
else
a_class = '.contentTitle'
next_class = '.pagingNext'
end
while page = get_page("http://ameblo.jp/#{target}/entrylist-1-#{page_seq}.html")
articles = page.css(a_class).map{|c| c.attr('href')}
File.open(path, "a") do |f|
f.puts articles.join("\n")
end
print "#{page_seq} "
page_seq += 1
sleep(1)
break if page.css(next_class).empty?
end
print "\n"
end
def get_amember_articles(target, dir)
print 'Getting Amember articles ... '
page_seq = 1
page = nil
articles = []
while page = get_page("http://ameblo.jp/#{target}/amemberentrylist-#{page_seq}.html")
articles += page.search('.contentTitleArea > h2 > a').map{|c| c.attr('href')}
print "#{page_seq} "
page_seq += 1
break if page.css('.pagingNext').empty?
end
print "\n"
return false if articles.empty?
path = "#{dir}/#{$amember_file}"
File.write(path, articles.join("\n") << "\n")
end
def get_page(url)
begin
charset = nil
html = open(url) do |f|
charset = f.charset
f.read
end
ret = Nokogiri::HTML.parse(html, nil, charset)
rescue => e
puts e.message
ret = nil
ensure
ret
end
end
def login(id, pass)
print "Logining ... "
agent = Mechanize.new
agent.get('http://www.ameba.jp/')
agent.page.form do |f|
f.amebaId = id
f.password = pass
end.click_button
if agent.page.title.match(/マイページ/)
puts 'logined.'
agent
else
nil
end
end
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment