Skip to content

Instantly share code, notes, and snippets.

@june29
Created December 16, 2010 16:16
Show Gist options
  • Save june29/743581 to your computer and use it in GitHub Desktop.
Save june29/743581 to your computer and use it in GitHub Desktop.
images = "images"
Dir.mkdir images unless File.exist? images
horesasu = Jigokuno::Misawa.new
horesasu.each { |meigen|
open("%s/%s.gif" % [images, meigen], "w") { |gif|
gif.puts open(meigen.image).read
}
sleep 5
}
# -*- coding: utf-8 -*-
require "open-uri"
require "rubygems"
require "nokogiri"
module Jigokuno
URL = "http://jigokuno.com/"
NEXT_XPATH = "//div[@id='page_area']/div[@class='page_navi']/a[contains(text(), '>>')]"
class Misawa
include Enumerable
attr_reader :current
def initialize
@current = URL
@document = Nokogiri::HTML(open(@current))
end
def next
next_link = @document.at(NEXT_XPATH)
return nil if next_link.nil?
@current = next_link["href"]
@document = Nokogiri::HTML(open(@current))
self
end
def each
Scraper.new(@document).meigens { |meigen| yield meigen }
next_page = self.next
return if next_page.nil?
next_page.each { |meigen| yield meigen }
end
end
class Scraper
def initialize(html)
@html = html
end
def meigens
@html.xpath("//div[@class='entry_area']").each { |entry|
h2 = entry.xpath("h2").text.tr("0-9", "0-9").chomp
id, title = h2.scan(/惚れさせ(\d+).*「(.+)」/).first
id = id.to_i
title = title.to_s
image = entry.at(".//img[@class='pict']").attributes["src"].to_s
character = entry.at("center/ul/li[2]/a").text
yield Meigen.new(id, title, character, image)
}
end
end
class Meigen
attr_reader :id, :title, :character, :image
def initialize(id, title, character, image)
@id = id
@title = title
@character = character
@image = image
end
def to_s
"%03d_%s_%s" % [@id, @title, @character]
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment