Skip to content

Instantly share code, notes, and snippets.

@slhck
Last active December 3, 2017 14:48
Show Gist options
  • Save slhck/1583350 to your computer and use it in GitHub Desktop.
Save slhck/1583350 to your computer and use it in GitHub Desktop.
Parses an online RSS feed and creates a PDF from it, using the Ruby Prawn library
#!/usr/bin/env ruby
# rss-to-pdf.rb
# Author: Werner Robitza
# Synopsis: A basic console RSS to PDF writer using the prawn library
# Usage: rss-to-pdf.rb <url>
# <url> being the URL of the RSS news feed
require 'open-uri'
require 'nokogiri'
require 'pp'
require 'prawn'
def show_usage
puts "Usage: rss-to-pdf.rb <url>"
puts "<url> being the URL of the RSS news feed"
exit
end
# -----------------------------------------------------------------------------
show_usage if ARGV.size != 1
# assign parameters
remote_uri = ARGV[0]
begin
@doc = Nokogiri::HTML(open(remote_uri))
@doc.remove_namespaces!
rescue
puts "Could not open URI, aborting."
exit
end
# let's find out if this is RSS or Atom
@type = :atom if @doc.root.xpath("body/feed").size == 1
@type = :rss if @doc.root.xpath("body/rss").size == 1
# generate the PDF document
file_name = "#{File.basename(remote_uri)}.pdf"
Prawn::Document.generate file_name do |pdf|
# print the title
pdf.font_size(25) do
title_xpath = "//channel/title" if @type == :rss
title_xpath = "//feed/title" if @type == :atom
pdf.text @doc.xpath(title_xpath).text
end
# print each item
item_xpath = "//channel/item" if @type == :rss
item_xpath = "//feed/entry" if @type == :atom
@doc.xpath(item_xpath).each do |item|
pdf.start_new_page
pdf.group do
# heading
pdf.font_size(18) do
title = item.xpath("title").text
link = item.xpath("text()").text.chomp!
pdf.text "<link href=\"#{link}\">#{title}</link>", :inline_format => true
end
# metadata
pdf.move_down 20
pdf.text "Published at: ", :style => :bold
pdf.text item.xpath("pubdate").text if @type == :rss
pdf.text item.xpath("published").text if @type == :atom
# check for valid image
image_link = item.xpath("enclosure/@url").text
unless image_link.empty?
image_name = File.basename image_link
unless File.exists?("#{image_name}")
open("#{image_name}", "wb") do |file|
file << open(image_link).read
end
end
pdf.move_down 20
pdf.image "#{image_name}", :position => :left
end
end
pdf.move_down 20
pdf.text "Description: ", :style => :bold
pdf.text item.xpath("description").text.gsub("]]>", "")
if @type == :rss
pdf.move_down 20
pdf.text "Content: ", :style => :bold
pdf.text item.xpath("encoded").text.gsub("]]>", "")
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment