Skip to content

Instantly share code, notes, and snippets.

@etoyoda
Created October 6, 2019 00:47
Show Gist options
  • Save etoyoda/0ff2948353908307078f984556fabcce to your computer and use it in GitHub Desktop.
Save etoyoda/0ff2948353908307078f984556fabcce to your computer and use it in GitHub Desktop.
make a diff of two files containing Atom Feed made for JMA XML (not all Atom Feed tags handled)
#!/usr/bin/ruby
require 'rexml/parsers/baseparser'
require 'rexml/parsers/streamparser'
require 'rexml/streamlistener'
class AtomParse
include REXML::StreamListener
def initialize
@tag = nil
@rec = nil
@callback = proc
end
def text(str)
return unless @tag
@rec[@tag] = str
# will not save following text nodes
@tag = nil
end
alias :cdata :text
TAGS = /^(name|author|id|title|subtitle|updated)$/
def tag_start(name, attrs)
case name
when 'feed' then
@rec = {:feed => true}
when 'entry' then
@callback.call(@rec) if @rec and @rec[:feed]
@rec = {}
when TAGS then
@tag = name
when /^(content|rights)$/ then
@tag = "#{name}.#{attrs['type']}"
when 'link' then
aname = 'link.' + attrs['rel'].to_s
@rec[aname] = attrs['href']
else
raise Errno::EINVAL, "unsupported tag #{name}"
end
end
def tag_end(name)
case name
when 'entry' then
@callback.call(@rec)
@rec = nil
end
end
end
class FeedBuild
class << FeedBuild
def xmltext str
str.to_s.encode(Encoding::UTF_8, :xml => :text)
end
def xmlattr str
str.to_s.encode(Encoding::UTF_8, :xml => :attr)
end
def open io
fb = FeedBuild.new(io)
yield fb
ensure
fb.close
end
end
def initialize io
@io = io
@io.set_encoding 'UTF-8'
@state = :init
end
def dump hash
for key in %w(title subtitle updated id)
next unless hash.include? key
@io.puts ['<', key, '>', FeedBuild.xmltext(hash[key]), '</', key, '>'].join
end
if hash.include? 'name'
@io.puts ['<author><name>', FeedBuild.xmltext(hash['name']), '</name></author>'].join
end
for key in %w(link.related link.self link.hub link.)
next unless hash.include? key
@io.write '<link '
if rel = key.split(/\./)[1] then
@io.write 'rel='
@io.write FeedBuild.xmlattr(rel)
@io.write ' href='
@io.write FeedBuild.xmlattr(hash[key])
else
@io.write 'type="application/xml" href='
@io.write FeedBuild.xmlattr(hash[key])
end
@io.puts ' />'
end
for key in %w(rights.html content.text content.html)
next unless hash.include? key
tag, type = key.split(/\./, 2)
@io.write '<'
@io.write tag
@io.write ' type='
@io.write FeedBuild.xmlattr(type)
@io.write '>'
@io.write FeedBuild.xmltext(hash[key])
@io.write '</'
@io.write tag
@io.puts '>'
end
end
def top hash
raise "top() called twice" unless @state == :init
@state = :top
@io.puts '<?xml version="1.0" encoding="utf-8">'
@io.puts '<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="ja">'
dump(hash)
end
def entry hash
raise "entry() before top()" unless @state == :top
@io.puts '<entry>'
dump(hash)
@io.puts '</entry>'
end
def close
raise "close without top()" if @state == :init
@io.puts '</feed>'
@io.close
end
end
# main logic
db = {}
fnam = ARGV.shift
saver = AtomParse.new {|rec|
id = rec[:feed] ? :feed : rec[id]
db[id] = rec
}
File.open(fnam, 'r:UTF-8') {|fp|
begin
REXML::Parsers::StreamParser.new(fp.read, saver).parse
rescue REXML::ParseException => e
STDERR.puts "feed #{arg} - #{e.message}"
exit 16
end
}
fnam = ARGV.shift
FeedBuild.open($stdout){|builder|
differ = AtomParse.new {|rec|
if rec[:feed] then
builder.top(rec)
else
builder.entry(rec) unless db[rec['id']]
end
}
File.open(fnam, 'r:UTF-8') {|fp|
begin
REXML::Parsers::StreamParser.new(fp.read, differ).parse
rescue REXML::ParseException => e
STDERR.puts "feed #{arg} - #{e.message}"
exit 16
end
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment