Skip to content

Instantly share code, notes, and snippets.

@masao
Last active November 9, 2020 14:19
Show Gist options
  • Save masao/ae3965bcdce6e11765150f334edd8f0b to your computer and use it in GitHub Desktop.
Save masao/ae3965bcdce6e11765150f334edd8f0b to your computer and use it in GitHub Desktop.
iss-identifyer.rb
#!/usr/bin/env ruby
require "uri"
require "open-uri"
require "nokogiri"
NS = {
dcndl: "http://ndl.go.jp/dcndl/terms/",
dcterms: "http://purl.org/dc/terms/",
foaf: "http://xmlns.com/foaf/0.1/",
}
ARGV.each do |file|
File.open(file) do |io|
doc = Nokogiri::XML(io)
title = doc.xpath("//dcndl:BibResource/dcterms:title", NS).first&.content
publisher = doc.xpath("//dcndl:BibResource/dcterms:publisher/foaf:Agent/foaf:name", NS).first&.content
year = doc.xpath("//dcndl:BibResource/dcterms:issued", NS).first&.content&.to_i
if year.nil?
year = doc.xpath("//dcndl:BibResource/dcterms:date", NS).first&.content&.to_i
year = nil if year == 0
end
p [ File.basename(file), title, publisher, year ]
#p file if publisher.nil?
#p file if year.nil?
query = {
dpid: "iss-ndl-opac",
title: title.gsub(/:/, " ").gsub(/\s+/, " ").strip,
}
query[:publisher] = publisher if publisher
if year
query[:from] = year
query[:until] = year
end
query_string = {
operation: "searchRetrieve",
maximumRecords: 10,
recordSchema: "dcndl",
recordPacking: "xml",
query: query.map{|k,v| "#{k}=\"#{v}\""}.join(" AND "),
}
url = "https://iss.ndl.go.jp/api/sru?#{URI.encode_www_form query_string}"
puts url
basename = File.basename(file)
next if File.exist? basename
open(basename, "w") do |io|
io.print URI.parse(url).open.read
end
sleep 1
end
end
#!/usr/bin/env ruby
require "uri"
require "open-uri"
require "nokogiri"
NS = {
dcndl: "http://ndl.go.jp/dcndl/terms/",
dcterms: "http://purl.org/dc/terms/",
foaf: "http://xmlns.com/foaf/0.1/",
sru: "http://www.loc.gov/zing/srw/",
rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
}
ARGV.each do |file|
File.open(file) do |io|
doc = Nokogiri::XML(io)
hits = doc.xpath("//sru:numberOfRecords", NS)&.first&.content&.to_i
next if hits.nil? or hits == 0
next if hits > 10
bibids = doc.xpath('//dcterms:identifier[@rdf:datatype="http://ndl.go.jp/dcndl/terms/NDLBibID"]', NS).to_a
puts [ File.basename(file), hits, bibids.join("\t")].join("\t")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment