Last active April 29, 2019 23:17
Example code
:001 > require 'feedjira'
:002 > require './janky_parser.rb'
:003 > feed_url = ''
# Add our custom parser to the top of the stack of feeds Feedjira uses.
:004 > Feedjira::Feed.add_feed_class Feedjira::Parser::Versa::JankyPublisher
# Fetch and parse our example feed
:005 > feed = Feedjira::Feed.fetch_and_parse feed_url
:006 > feed.class
=> Feedjira::Parser::Versa::JankyPublisher # Sweet! Our parser was chosen to parse the feed.
:007 > feed.entries.first.url
=> "" # The url doesn't have a query string!
:008 > feed.entries.first.published
=> 2009-09-06 16:20:00 UTC # The published date is the correct one
# Rails initializer for the Feedjira gem. It should live in
# config/initializers/feedjira.rb
# Our app/parsers folder it auto loaded so these classes should be available here.
# Feedjira::Parser::Versa::AnotherPublisher,
# Feedjira::Parser::Versa::AThirdPublisher
].each{ |parser| Feedjira::Feed.add_feed_class parser }
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<title>A Janky RSS Feed</title>
<description>This is an example of an RSS feed</description>
<lastBuildDate>Mon, 06 Sep 2010 00:01:00 +0000 </lastBuildDate>
<pubDate>Mon, 06 Sep 2009 16:20:00 +0000 </pubDate>
<title>Example entry 1</title>
<description>Here is some text containing an interesting description.</description>
<dc:created>2009/09/06 00/00/00</dc:created>
<pubDate>Mon, 06 Sep 2009 16:20:00 +0000 </pubDate>
<title>Example entry 2</title>
<description>Here is some text containing an interesting description.</description>
<dc:created>2009/09/06 00/00/00</dc:created>
<pubDate>Mon, 06 Sep 2009 16:22:00 +0000 </pubDate>
module Feedjira
module Parser
# It's good practice to namespace your parsers, so we'll put
# this one in the Versa namespace.
module Versa
### Entry Parser Class ###
# This first class is for parsing an individual <item> in the feed.
# We define it first because our top level parser need to be able to call it.
# By convention, this class name is the same as our top level parser
# but with "Entry" appended.
class JankyPublisherEntry
include SAXMachine
include FeedEntryUtilities
# Declare the fields we want to parse out of the XML feed.
element :title
element :link, :as => :url
element :description, :as => :summary
element :pubDate, :as => :published
element :guid, :as => :entry_id
# We remove the query string from the url by overriding the 'url' method
# originally defined by including FeedEntryUtilities in our class.
# (see
def url
@url = @url.gsub(/\?.*$/,'')
### Feed Parser Class ###
# This class is for parsing the top level feed fields.
class JankyPublisher
include SAXMachine
include FeedUtilities
# Define the fields we want to parse using SAX Machine declarations
element :title
element :link, :as => :url
element :description
# Parse all the <item>s in the feed with the class we just defined above
elements :item, :as => :entries, :class => Versa::JankyPublisherEntry
attr_accessor :feed_url
# This method is required by all Feedjira parsers. To decide which
# parser to use, Feedjira cycles through each parser it knows about
# and passes the first 2000 characters of the feed to this method.
# To make sure your parser is only used when it's supposed to be used,
# test for something unique in those first 2000 characters. URLs seem
# to be a good choice.
# This parser, for example, is looking for an occurrence of
# '<link>' which we should
# only really find in the feed we are targeting.
def self.able_to_parse?(xml)
(/<link>http:\/\/www\.jankybutlovablepublisher\.com\// =~ xml)
# Example spec for testing that your parser works as it's supposed to
require 'feedjira'
require './janky_parser'
Feedjira::Feed.add_feed_class Feedjira::Parser::Versa::JankyPublisher
describe Feedjira::Parser::Versa::JankyPublisher do
before :each do
@janky_feed =, '/janky_feed.xml'))
describe '#able_to_parse?' do
it 'should return true if the <link> tag contains ""' do
Feedjira::Parser::Versa::JankyPublisher.able_to_parse?(@janky_feed).should be_true
describe 'the parser' do
it 'should pull out the entries properly' do
feed = Feedjira::Feed.parse(@janky_feed)
feed.entries.first.class.should eq(Feedjira::Parser::Versa::JankyPublisherEntry)
describe Feedjira::Parser::Versa::JankyPublisher do
describe 'a parsed entry' do
janky_feed =, '/janky_feed.xml'))
feed = Feedjira::Feed.parse(janky_feed)
entry = feed.entries.first
it 'has the correct title' do entry.title.should == "Example entry 1" end
it 'has the correct url' do entry.url.should == "" end
it 'has the correct entry_id' do entry.entry_id.should == "foo" end
it 'has the correct published time' do entry.published.should == Time.parse("Mon, 06 Sep 2009 16:20:00 +0000") end
