Skip to content

Instantly share code, notes, and snippets.

@ariejan
Created January 6, 2009 10:20
Show Gist options
  • Save ariejan/43755 to your computer and use it in GitHub Desktop.
Save ariejan/43755 to your computer and use it in GitHub Desktop.
require 'rss/1.0'
require 'rss/2.0'
require 'hpricot'
require 'open-uri'
# This is an abstract InputHandler.
#
# InputHandlers fetch, parse and store articles from and RSS feed.
# All InputHandlers should extend this class.
class InputHandler
attr_accessor :feed_name, :feed_url
# Return very generic info about the Feed.
def info
"#{@feed_name} (#{@feed_url})"
end
# Run the InputHanlder
def run
rss = retrieve_rss
unless rss.nil? || rss.items.nil?
rss.items.each { |item| process_item(item) }
end
end
def retrieve_rss
begin
content = ""
open(@feed_url) { |s| content = s.read }
return RSS::Parser.parse(content, false)
rescue
puts "There was an error retrieving the RSS feed."
return nil
end
end
def process_item(item)
article_attributes = {
:title => item.title,
:author => item.author,
:category => item.category.content,
:body => item.description,
:link => item.link,
# item.pubDate reports to be of type Time. Time.parse(item.pubDate.to_s) works fine.
# Using item.pubDate directly (as shown) this results in the exception seen in trace.txt
# when saving.
# When replacing 'item.pubDate' with 'Time.parse(item.pubDate.to_s)' things work fine (as expected)
:published_at => item.pubDate,
:feed_url => @feed_url,
:feed_name => @feed_name
}
article = Article.new(article_attributes)
article.save
rescue
puts "Error while processing item."
end
# Run all InputHandlers
def self.run_all
handlers_dir = (defined?(RAILS_ROOT) ? "#{RAILS_ROOT}/app/handlers" : "app/handlers")
# Find all input handlers, load them and run them.
Dir["#{handlers_dir}/**/*_input_handler.rb"].each do |handler|
# Load the handler file
require handler
# Find the class_name (conventions are great!)
extract = /^#{Regexp.quote(handlers_dir)}\/?(.*).rb$/
handler_class = handler.sub(extract, '\1')
# Create an instance of the inputhandler and call 'run'.
handler_instance = handler_class.camelize.constantize.new
handler_instance.run
end
end
end
>> InputHandler.run_all
ArgumentError: wrong number of arguments (1 for 0)
from /opt/local/lib/ruby/gems/1.8/gems/activesupport-2.2.2/lib/active_support/time_with_zone.rb:146:in `to_s'
from /opt/local/lib/ruby/gems/1.8/gems/activesupport-2.2.2/lib/active_support/time_with_zone.rb:146:in `to_s'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/connection_adapters/abstract/quoting.rb:61:in `quoted_date'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/connection_adapters/abstract/quoting.rb:29:in `quote'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/connection_adapters/mysql_adapter.rb:223:in `quote'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/base.rb:2817:in `attributes_with_quotes'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/base.rb:2808:in `each'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/base.rb:2808:in `attributes_with_quotes'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/base.rb:2723:in `create_without_callbacks'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/callbacks.rb:237:in `create_without_timestamps'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/timestamp.rb:29:in `create'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/base.rb:2699:in `create_or_update_without_callbacks'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/callbacks.rb:222:in `create_or_update'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/base.rb:2383:in `save_without_validation'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/validations.rb:1009:in `save_without_dirty'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/dirty.rb:79:in `save_without_transactions'
... 1 levels...
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/transactions.rb:179:in `with_transaction_returning_status'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/connection_adapters/abstract/database_statements.rb:66:in `transaction'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/transactions.rb:129:in `transaction'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/transactions.rb:138:in `transaction'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/transactions.rb:178:in `with_transaction_returning_status'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/transactions.rb:146:in `save'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/transactions.rb:158:in `rollback_active_record_state!'
from /opt/local/lib/ruby/gems/1.8/gems/activerecord-2.2.2/lib/active_record/transactions.rb:146:in `save'
from /Users/ariejan/rails/rsstank/lib/input_handler.rb:64:in `process_item'
from /Users/ariejan/rails/rsstank/lib/input_handler.rb:25:in `run'
from /Users/ariejan/rails/rsstank/lib/input_handler.rb:25:in `each'
from /Users/ariejan/rails/rsstank/lib/input_handler.rb:25:in `run'
from /Users/ariejan/rails/rsstank/lib/input_handler.rb:91:in `run_all'
from /Users/ariejan/rails/rsstank/lib/input_handler.rb:81:in `each'
from /Users/ariejan/rails/rsstank/lib/input_handler.rb:81:in `run_all'
>>
# This just set the feed_url and feed_name, the rest is InputHandler.
class TweakersInputHandler < InputHandler
# Set +@feed_url+ and +@feed_name+ for later reference
def initialize
@feed_url = "http://feedproxy.google.com/tweakers/mixed"
@feed_name = "Tweakers.net"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment