Skip to content

Instantly share code, notes, and snippets.

@cheshire137
Created June 28, 2015 16:50
Show Gist options
  • Save cheshire137/47d5484fd0eb54143e77 to your computer and use it in GitHub Desktop.
Save cheshire137/47d5484fd0eb54143e77 to your computer and use it in GitHub Desktop.
Delicious and Pocket RSS to JSON
#!/usr/bin/env ruby
# encoding: utf-8
require 'rubygems'
# require 'nokogiri'
require 'json'
require 'rss'
require 'open-uri'
require 'uri'
class RSSFetcher
attr_reader :urls, :link_limit, :url_cache_path, :should_write_url_cache
def initialize urls, link_limit
@urls = urls
@link_limit = link_limit
@should_write_url_cache = false
@url_cache_path = File.join(File.dirname(__FILE__),
'rss-link-title-cache.json')
end
def print_json
print "Content-type: application/json\r\n\r\n"
begin
feeds = get_rss_feeds
all_links = merge_rss_feeds(feeds)
recent_links = filter_links(all_links)
write_url_title_cache(recent_links) if @should_write_url_cache
print_json_object recent_links
rescue => e
print_json_object({error: e.message})
end
end
private
def filter_links all_links
all_links.sort! {|a, b| b[:date] <=> a[:date] } # newest first
all_links.each do |link|
link[:title] = get_link_title(link[:title], link[:url])
end
all_links.select {|link|
title = link[:title]
title && title.strip != ''
}[0..@link_limit]
end
def get_cache_path url
domain = URI.parse(url).host
File.join(File.dirname(__FILE__), "rss-cache-#{domain}.xml")
end
def get_link_title original_title, url
# Pocket has Untitled for some links
(original_title == 'Untitled') ? get_title_for_url(url) : original_title
end
def get_rss_feed url
rss = nil
cache_path = get_cache_path(url)
if File.exists?(cache_path)
minutes_old = (Time.now - File.mtime(cache_path)).to_i / 60
rss = open(cache_path) if minutes_old < 360 # 6 hours
end
write_cache = false
unless rss
rss = open(url)
write_cache = true
end
parsed_rss = RSS::Parser.parse(rss)
if write_cache
File.open(cache_path, 'w:UTF-8') {|file| file.puts parsed_rss.to_s }
end
parsed_rss
end
def get_rss_feeds
@urls.map {|url| get_rss_feed(url) }
end
def get_title_for_url url
if File.exists?(@url_cache_path)
json_str = ''
File.open(@url_cache_path, 'r:UTF-8') do |file|
json_str = file.read
end
url_titles = JSON.parse(json_str)
if title=url_titles[url]
return title
end
end
@should_write_url_cache = true
# page = Nokogiri::HTML(open(url))
# page.css('title').text
source = open(url).read
source[/<title>(.*)<\/title>/, 1]
rescue OpenURI::HTTPError
nil
end
def merge_rss_feeds feeds
links = []
feeds.each do |feed|
source = feed.channel.title
feed.items.each do |item|
links << {url: item.link, date: item.pubDate, source: source,
title: item.title}
end
end
links
end
def print_json_object obj
print JSON.generate(obj) + "\r\n"
end
def write_url_title_cache links
url_titles = {}
links.each do |link|
url_titles[link[:url]] = link[:title]
end
File.open(@url_cache_path, 'w:UTF-8') do |file|
file.puts JSON.generate(url_titles)
end
end
end
urls = ['http://getpocket.com/users/your_user_name/feed/all',
'http://feeds.delicious.com/v2/rss/your_user_name']
RSSFetcher.new(urls, 30).print_json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment