Skip to content

Instantly share code, notes, and snippets.

@hooopo
Created January 12, 2010 10:57
Show Gist options
  • Save hooopo/275108 to your computer and use it in GitHub Desktop.
Save hooopo/275108 to your computer and use it in GitHub Desktop.
=begin
<span class="next">
<a href="/group/douban_radio/discussion?start=525">后页></a>
</span>
http://www.douban.com/group/douban_radio/discussion?start=0
=end
require 'open-uri'
require 'hpricot'
module Douban
#PER_PAGE = 25
HOST = "http://www.douban.com/"
class Topic
attr_reader :topic_url
attr_reader :total_pages
attr_reader :emails
def initialize(url)
@topic_url = url
@total_pages = Hpricot.parse(open(topic_url + "?start=0").read).search("div.paginator/a/*").last.to_s.to_i
@emails = []
end
def emails
(0..total_pages).each do |i|
@emails + open(topic_url + "?start=#{100*i}").read.scan(/\A[a-z0-9.]+@[a-z0-9.]{2,10}\Z/mi)
end
end
end
class Group
attr_reader :group_url
attr_reader :total_pages
attr_reader :topic_urls
def initialize(url)
@group_url = url
@total_pages = Hpricot.parse(open(File.join(group_url, "discussion?start=0")).read).search("div.paginator/a/*").last.to_s.to_i
@topic_urls = []
end
def topics
(0..total_pages).each do |i|
@topic_urls + Hpricot.parse(open(File.join(group_url, "discussion?start=#{25*i}")).read).search("div.article//tr/td[1]/a").map{|a| a["href"]}
end
self
end
end
end
url = "http://www.douban.com/group/douban_radio/"
client = Douban::Group.new(url)
puts client.total_pages
p client.topics.topic_urls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment