Created
October 6, 2019 00:47
-
-
Save etoyoda/0ff2948353908307078f984556fabcce to your computer and use it in GitHub Desktop.
make a diff of two files containing Atom Feed made for JMA XML (not all Atom Feed tags handled)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
require 'rexml/parsers/baseparser' | |
require 'rexml/parsers/streamparser' | |
require 'rexml/streamlistener' | |
class AtomParse | |
include REXML::StreamListener | |
def initialize | |
@tag = nil | |
@rec = nil | |
@callback = proc | |
end | |
def text(str) | |
return unless @tag | |
@rec[@tag] = str | |
# will not save following text nodes | |
@tag = nil | |
end | |
alias :cdata :text | |
TAGS = /^(name|author|id|title|subtitle|updated)$/ | |
def tag_start(name, attrs) | |
case name | |
when 'feed' then | |
@rec = {:feed => true} | |
when 'entry' then | |
@callback.call(@rec) if @rec and @rec[:feed] | |
@rec = {} | |
when TAGS then | |
@tag = name | |
when /^(content|rights)$/ then | |
@tag = "#{name}.#{attrs['type']}" | |
when 'link' then | |
aname = 'link.' + attrs['rel'].to_s | |
@rec[aname] = attrs['href'] | |
else | |
raise Errno::EINVAL, "unsupported tag #{name}" | |
end | |
end | |
def tag_end(name) | |
case name | |
when 'entry' then | |
@callback.call(@rec) | |
@rec = nil | |
end | |
end | |
end | |
class FeedBuild | |
class << FeedBuild | |
def xmltext str | |
str.to_s.encode(Encoding::UTF_8, :xml => :text) | |
end | |
def xmlattr str | |
str.to_s.encode(Encoding::UTF_8, :xml => :attr) | |
end | |
def open io | |
fb = FeedBuild.new(io) | |
yield fb | |
ensure | |
fb.close | |
end | |
end | |
def initialize io | |
@io = io | |
@io.set_encoding 'UTF-8' | |
@state = :init | |
end | |
def dump hash | |
for key in %w(title subtitle updated id) | |
next unless hash.include? key | |
@io.puts ['<', key, '>', FeedBuild.xmltext(hash[key]), '</', key, '>'].join | |
end | |
if hash.include? 'name' | |
@io.puts ['<author><name>', FeedBuild.xmltext(hash['name']), '</name></author>'].join | |
end | |
for key in %w(link.related link.self link.hub link.) | |
next unless hash.include? key | |
@io.write '<link ' | |
if rel = key.split(/\./)[1] then | |
@io.write 'rel=' | |
@io.write FeedBuild.xmlattr(rel) | |
@io.write ' href=' | |
@io.write FeedBuild.xmlattr(hash[key]) | |
else | |
@io.write 'type="application/xml" href=' | |
@io.write FeedBuild.xmlattr(hash[key]) | |
end | |
@io.puts ' />' | |
end | |
for key in %w(rights.html content.text content.html) | |
next unless hash.include? key | |
tag, type = key.split(/\./, 2) | |
@io.write '<' | |
@io.write tag | |
@io.write ' type=' | |
@io.write FeedBuild.xmlattr(type) | |
@io.write '>' | |
@io.write FeedBuild.xmltext(hash[key]) | |
@io.write '</' | |
@io.write tag | |
@io.puts '>' | |
end | |
end | |
def top hash | |
raise "top() called twice" unless @state == :init | |
@state = :top | |
@io.puts '<?xml version="1.0" encoding="utf-8">' | |
@io.puts '<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="ja">' | |
dump(hash) | |
end | |
def entry hash | |
raise "entry() before top()" unless @state == :top | |
@io.puts '<entry>' | |
dump(hash) | |
@io.puts '</entry>' | |
end | |
def close | |
raise "close without top()" if @state == :init | |
@io.puts '</feed>' | |
@io.close | |
end | |
end | |
# main logic | |
db = {} | |
fnam = ARGV.shift | |
saver = AtomParse.new {|rec| | |
id = rec[:feed] ? :feed : rec[id] | |
db[id] = rec | |
} | |
File.open(fnam, 'r:UTF-8') {|fp| | |
begin | |
REXML::Parsers::StreamParser.new(fp.read, saver).parse | |
rescue REXML::ParseException => e | |
STDERR.puts "feed #{arg} - #{e.message}" | |
exit 16 | |
end | |
} | |
fnam = ARGV.shift | |
FeedBuild.open($stdout){|builder| | |
differ = AtomParse.new {|rec| | |
if rec[:feed] then | |
builder.top(rec) | |
else | |
builder.entry(rec) unless db[rec['id']] | |
end | |
} | |
File.open(fnam, 'r:UTF-8') {|fp| | |
begin | |
REXML::Parsers::StreamParser.new(fp.read, differ).parse | |
rescue REXML::ParseException => e | |
STDERR.puts "feed #{arg} - #{e.message}" | |
exit 16 | |
end | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment