Skip to content

Instantly share code, notes, and snippets.

@olkeene
Created February 26, 2009 01:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save olkeene/70568 to your computer and use it in GitHub Desktop.
Save olkeene/70568 to your computer and use it in GitHub Desktop.
require 'net/http'
class FeedDetector
# return the feed url for a url
# for example: http://blog.dominiek.com/ => http://blog.dominiek.com/feed/atom.xml
# only_detect can force detection of :rss or :atom
def self.fetch_feed_url(page_url, only_detect=:any)
url = URI.parse(page_url)
host_with_port = url.host
host_with_port << ":#{url.port}" unless url.port == 80
req = Net::HTTP::Get.new(url.path)
# something fishy going on with URI.host
res = Net::HTTP.start(url.host.gsub(/:[0-9]+/, ''), url.port) {|http|
http.request(req)
}
feed_url = self.get_feed_path(res.body, only_detect)
feed_url = "http://#{host_with_port}/#{feed_url.gsub(/^\//, '')}" unless !feed_url || feed_url =~ /^http:\/\//
feed_url || page_url
end
##
# get the feed href from an HTML document
# for example:
# ...
# <link href="/feed/atom.xml" rel="alternate" type="application/atom+xml" />
# ...
# => /feed/atom.xml
# only_detect can force detection of :rss or :atom
def self.get_feed_path(html, only_detect=:any)
md = if only_detect == :atom
get_atom(html)
elsif only_detect == :rss
get_rss(html)
else
# default
get_rss(html) || get_atom(html)
end
md && md[1]
end
def self.get_rss(html)
md = /<link.*application\/rss\+xml.*href=['"]*([^\s'"]+)['"]*.*>/.match(html)
md ||= /<link.*href=['"]*([^\s'"]+)['"]*.*application\/rss\+xml.*>/.match(html)
end
def self.get_atom(html)
md = /<link.*application\/atom\+xml.*href=['"]*([^\s'"]+)['"]*.*>/.match(html)
md ||= /<link.*href=['"]*([^\s'"]+)['"]*.*application\/atom\+xml.*>/.match(html)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment