Created
December 31, 2010 05:34
-
-
Save hitode909/760758 to your computer and use it in GitHub Desktop.
はてなダイアリーはてなブックマークランキング
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
require 'open-uri' | |
require 'nokogiri' | |
require 'cgi' | |
require 'digest/sha1' | |
class Entry | |
attr_accessor :url, :title | |
def initialize(url, title) | |
@url = url | |
@title = title | |
end | |
def bookmark_count | |
return @bookmark_count if @bookmark_count | |
warn self.url | |
# キャッシュ+sleepしながら | |
begin | |
@bookmark_count = open(Digest::SHA1.hexdigest(url)).read.to_i | |
rescue | |
@bookmark_count = open("http://api.b.st-hatena.com/entry.count?url=#{CGI.escape(url)}").read.to_i | |
open(Digest::SHA1.hexdigest(url), 'w'){ |f| | |
f.write @bookmark_count | |
} | |
sleep 0.5 | |
end | |
@bookmark_count | |
end | |
def date | |
date_text = self.url.match(/http:\/\/d\.hatena\.ne\.jp\/.+?\/(\d+)/)[1] | |
Time.local(date_text[0..3], date_text[4..5], date_text[6..7]) | |
end | |
def as_hatena_syntax | |
"[#{self.url}:title=#{self.title}:bookmark]" | |
end | |
end | |
def fetch(hatena_id, offset = 0) | |
url = "http://d.hatena.ne.jp/#{hatena_id}/archive?of=#{offset}" | |
page = Nokogiri::HTML open(url) | |
page.search('.archive-section a') | |
end | |
hatena_id = ARGV.first | |
raise 'usage: bookmark_ranking.rb HATENA_ID' unless hatena_id | |
# エントリが多いとこれではたりないことがありそう | |
entries = (0..3).map{|page| | |
fetch(ARGV.first, page * 50) | |
}.flatten | |
entries.map{|a| | |
Entry.new(a['href'], a.content.strip) | |
}.select{ |entry| | |
entry.url.match(/http/) && entry.date.year == 2011 | |
}.sort_by(&:bookmark_count). | |
reverse. | |
each_with_index{|entry, index| | |
puts "|*#{index+1}位|#{entry.as_hatena_syntax}|" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment