Pistos (owner)

Fork Of

Revisions

gist: 40396 Download_button fork
public
Public Clone URL: git://gist.github.com/40396.git
Embed All Files: show embed
feed.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
require 'hpricot'
require 'json'
require 'open-uri'
 
class Feed
  def self.parse( uri )
    doc = Hpricot.XML(open(uri))
    feed = new(uri)
 
    if doc.at(:item)
      feed.parse_rss2(doc)
    elsif doc.at(:entry)
      feed.parse_atom(doc)
    else
      raise ArgumentError, "Cannot parse this"
    end
 
    feed
  end
 
  attr_reader :meta, :children
 
  def initialize(uri)
    @children = []
    @meta = Meta.new(self)
  end
 
  def parse_atom(doc)
    parse_meta(doc, :feed)
    parse_common(doc, :entry, Entry)
  end
 
  def parse_rss2(doc)
    parse_meta(doc, 'rss/channel')
    parse_common(doc, :item, Item)
  end
 
  def parse_common(doc, selector, klass)
    (doc/selector).each do |node|
      @children << obj = klass.new(self)
      node.children.each do |child|
        next unless child.respond_to?(:name)
        obj[child.name] = child
      end
    end
  end
 
  def parse_meta(doc, selector)
    (doc/selector).each do |node|
      node.children.each do |child|
        next unless child.respond_to?(:name)
        next if child.name == 'entry' || child.name == 'item'
        @meta[child.name] = child
      end
    end
  end
 
  class Child
    HANDLE_TIME = lambda{|time| Time.parse(time.inner_text.strip) }
    HANDLE_LINK = lambda{|link| link[:href] }
    HANDLE_AUTHOR = lambda{|author|
      hold = {}
      author.children.each do |child|
        next unless child.respond_to?(:name)
        hold[child.name] = child.inner_text.strip
      end
      hold
    }
 
    attr_reader :parent, :list
 
    def initialize(parent)
      @parent = parent
      @list = {}
    end
 
    def []=(key, value)
      handler = self.class::HANDLE[key]
      @list[key.to_s] = handler ? handler.call(value) : value.inner_text.strip
    end
 
    def [](key)
      @list[key.to_s]
    end
  end
 
  class Item < Child
    HANDLE = { 'pubDate' => HANDLE_TIME }
    def author
      self[ 'author' ]
    end
    def title
      self[ 'title' ]
    end
    def link
      self[ 'link' ]
    end
    def guid
      self[ 'guid' ]
    end
  end
 
  class Entry < Child
    HANDLE = { 'link' => HANDLE_LINK, 'author' => HANDLE_AUTHOR,
      'updated' => HANDLE_TIME, 'published' => HANDLE_TIME }
  end
 
  class Meta < Child
    HANDLE = { 'link' => HANDLE_LINK, 'author' => HANDLE_AUTHOR,
      'updated' => HANDLE_TIME }
  end
end