Last active
August 29, 2015 14:24
-
-
Save itkq/8fd57c100f0a71582a05 to your computer and use it in GitHub Desktop.
Amebloから画像を全部とってくるやつ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'mechanize' | |
require 'open-uri' | |
$file = 'article_list' | |
$amember_file = 'amember_article_list' | |
def main | |
print 'Enter target ameba id: ' | |
target = gets.chomp | |
print 'And archive from amember article? ' | |
ans = gets.chomp | |
if ans.match(/^(y|yes)$/i) | |
print 'Enter your ameba id: ' | |
ameba_id = gets.chomp | |
print 'Enter your ameba password: ' | |
ameba_password = STDIN.noecho(&:gets).chomp | |
print "\n" | |
archive_amember(target, ameba_id, ameba_password) | |
end | |
archive(target) | |
end | |
def archive(target) | |
dir = "#{File.dirname(File.expand_path(__FILE__))}/#{target}" | |
img_dir = "#{dir}/img" | |
file = "#{dir}/#{$file}" | |
FileUtils.mkdir_p(img_dir) unless FileTest.exist?(img_dir) | |
unless File.exists?(file) | |
get_articles(target, dir) | |
end | |
puts 'Start archiving.' | |
File.open(file, 'r') do |f| | |
f.each_line do |line| | |
print "#{line.chomp} ==> " | |
begin | |
page = get_page(line.chomp) | |
rescue => e | |
puts e.message | |
next | |
end | |
cnt = img_archive(page, img_dir) | |
puts cnt | |
sleep(1) | |
end | |
end | |
end | |
def archive_amember(target, id, pass) | |
dir = "#{File.dirname(File.expand_path(__FILE__))}/#{target}" | |
img_dir = "#{dir}/amember_img" | |
file = "#{dir}/#{$amember_file}" | |
FileUtils.mkdir_p(img_dir) unless FileTest.exist?(img_dir) | |
unless File.exists?(file) | |
unless get_amember_articles(target, dir) | |
return false | |
end | |
end | |
unless agent = login(id, pass) | |
return false | |
end | |
puts 'Start Ameblo archiving' | |
File.open(file, 'r') do |f| | |
f.each_line do |line| | |
agent.get(line) | |
print "#{line.chomp} ==> " | |
cnt = img_archive(Nokogiri::HTML.parse(agent.page.body), img_dir, true) | |
puts cnt | |
sleep(1) | |
end | |
end | |
end | |
def img_archive(page, dir, amembar = false) | |
if amembar | |
date_class = '#dateLeft' | |
else | |
if page.css('time').empty? | |
date_class = '.date' | |
else | |
date_class = 'time' | |
end | |
end | |
/^(?<yyyy>\d{4}).(?<mm>\d{2}).(?<dd>\d{2})/ =~ page.css(date_class).text | |
date = "#{yyyy}_#{mm}_#{dd}" | |
imgs = page.css('.detailOn > img') | |
seq = nil | |
unless imgs.empty? | |
seq = 1 | |
imgs.each do |img| | |
src = img.attr('src') | |
while File.exists?(path = "#{dir}/#{date}_#{sprintf("%03d", seq)}.jpg") | |
seq += 1 | |
end | |
begin | |
data = open(src) | |
open(path, 'wb') do |output| | |
output.write(data.read) | |
end | |
seq += 1 | |
rescue => e | |
puts e.message | |
end | |
end | |
end | |
seq.nil? ? 0 : (seq - 1) | |
end | |
def get_articles(target, dir) | |
print 'Getting articles ... ' | |
path = "#{dir}/#{$file}" | |
FileUtils.touch($file) | |
page_seq = 1 | |
page = get_page("http://ameblo.jp/#{target}/entrylist-1-#{page_seq}.html") | |
# old format | |
if page.css('.contentTitle').empty? | |
a_class = '.newentrytitle / a' | |
next_class = '.nextPage' | |
# new format | |
else | |
a_class = '.contentTitle' | |
next_class = '.pagingNext' | |
end | |
while page = get_page("http://ameblo.jp/#{target}/entrylist-1-#{page_seq}.html") | |
articles = page.css(a_class).map{|c| c.attr('href')} | |
File.open(path, "a") do |f| | |
f.puts articles.join("\n") | |
end | |
print "#{page_seq} " | |
page_seq += 1 | |
sleep(1) | |
break if page.css(next_class).empty? | |
end | |
print "\n" | |
end | |
def get_amember_articles(target, dir) | |
print 'Getting Amember articles ... ' | |
page_seq = 1 | |
page = nil | |
articles = [] | |
while page = get_page("http://ameblo.jp/#{target}/amemberentrylist-#{page_seq}.html") | |
articles += page.search('.contentTitleArea > h2 > a').map{|c| c.attr('href')} | |
print "#{page_seq} " | |
page_seq += 1 | |
break if page.css('.pagingNext').empty? | |
end | |
print "\n" | |
return false if articles.empty? | |
path = "#{dir}/#{$amember_file}" | |
File.write(path, articles.join("\n") << "\n") | |
end | |
def get_page(url) | |
begin | |
charset = nil | |
html = open(url) do |f| | |
charset = f.charset | |
f.read | |
end | |
ret = Nokogiri::HTML.parse(html, nil, charset) | |
rescue => e | |
puts e.message | |
ret = nil | |
ensure | |
ret | |
end | |
end | |
def login(id, pass) | |
print "Logining ... " | |
agent = Mechanize.new | |
agent.get('http://www.ameba.jp/') | |
agent.page.form do |f| | |
f.amebaId = id | |
f.password = pass | |
end.click_button | |
if agent.page.title.match(/マイページ/) | |
puts 'logined.' | |
agent | |
else | |
nil | |
end | |
end | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment