Last active
December 3, 2017 14:48
-
-
Save slhck/1583350 to your computer and use it in GitHub Desktop.
Parses an online RSS feed and creates a PDF from it, using the Ruby Prawn library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# rss-to-pdf.rb | |
# Author: Werner Robitza | |
# Synopsis: A basic console RSS to PDF writer using the prawn library | |
# Usage: rss-to-pdf.rb <url> | |
# <url> being the URL of the RSS news feed | |
require 'open-uri' | |
require 'nokogiri' | |
require 'pp' | |
require 'prawn' | |
def show_usage | |
puts "Usage: rss-to-pdf.rb <url>" | |
puts "<url> being the URL of the RSS news feed" | |
exit | |
end | |
# ----------------------------------------------------------------------------- | |
show_usage if ARGV.size != 1 | |
# assign parameters | |
remote_uri = ARGV[0] | |
begin | |
@doc = Nokogiri::HTML(open(remote_uri)) | |
@doc.remove_namespaces! | |
rescue | |
puts "Could not open URI, aborting." | |
exit | |
end | |
# let's find out if this is RSS or Atom | |
@type = :atom if @doc.root.xpath("body/feed").size == 1 | |
@type = :rss if @doc.root.xpath("body/rss").size == 1 | |
# generate the PDF document | |
file_name = "#{File.basename(remote_uri)}.pdf" | |
Prawn::Document.generate file_name do |pdf| | |
# print the title | |
pdf.font_size(25) do | |
title_xpath = "//channel/title" if @type == :rss | |
title_xpath = "//feed/title" if @type == :atom | |
pdf.text @doc.xpath(title_xpath).text | |
end | |
# print each item | |
item_xpath = "//channel/item" if @type == :rss | |
item_xpath = "//feed/entry" if @type == :atom | |
@doc.xpath(item_xpath).each do |item| | |
pdf.start_new_page | |
pdf.group do | |
# heading | |
pdf.font_size(18) do | |
title = item.xpath("title").text | |
link = item.xpath("text()").text.chomp! | |
pdf.text "<link href=\"#{link}\">#{title}</link>", :inline_format => true | |
end | |
# metadata | |
pdf.move_down 20 | |
pdf.text "Published at: ", :style => :bold | |
pdf.text item.xpath("pubdate").text if @type == :rss | |
pdf.text item.xpath("published").text if @type == :atom | |
# check for valid image | |
image_link = item.xpath("enclosure/@url").text | |
unless image_link.empty? | |
image_name = File.basename image_link | |
unless File.exists?("#{image_name}") | |
open("#{image_name}", "wb") do |file| | |
file << open(image_link).read | |
end | |
end | |
pdf.move_down 20 | |
pdf.image "#{image_name}", :position => :left | |
end | |
end | |
pdf.move_down 20 | |
pdf.text "Description: ", :style => :bold | |
pdf.text item.xpath("description").text.gsub("]]>", "") | |
if @type == :rss | |
pdf.move_down 20 | |
pdf.text "Content: ", :style => :bold | |
pdf.text item.xpath("encoded").text.gsub("]]>", "") | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment