Last active
November 9, 2020 14:19
-
-
Save masao/ae3965bcdce6e11765150f334edd8f0b to your computer and use it in GitHub Desktop.
iss-identifyer.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require "uri" | |
require "open-uri" | |
require "nokogiri" | |
NS = { | |
dcndl: "http://ndl.go.jp/dcndl/terms/", | |
dcterms: "http://purl.org/dc/terms/", | |
foaf: "http://xmlns.com/foaf/0.1/", | |
} | |
ARGV.each do |file| | |
File.open(file) do |io| | |
doc = Nokogiri::XML(io) | |
title = doc.xpath("//dcndl:BibResource/dcterms:title", NS).first&.content | |
publisher = doc.xpath("//dcndl:BibResource/dcterms:publisher/foaf:Agent/foaf:name", NS).first&.content | |
year = doc.xpath("//dcndl:BibResource/dcterms:issued", NS).first&.content&.to_i | |
if year.nil? | |
year = doc.xpath("//dcndl:BibResource/dcterms:date", NS).first&.content&.to_i | |
year = nil if year == 0 | |
end | |
p [ File.basename(file), title, publisher, year ] | |
#p file if publisher.nil? | |
#p file if year.nil? | |
query = { | |
dpid: "iss-ndl-opac", | |
title: title.gsub(/:/, " ").gsub(/\s+/, " ").strip, | |
} | |
query[:publisher] = publisher if publisher | |
if year | |
query[:from] = year | |
query[:until] = year | |
end | |
query_string = { | |
operation: "searchRetrieve", | |
maximumRecords: 10, | |
recordSchema: "dcndl", | |
recordPacking: "xml", | |
query: query.map{|k,v| "#{k}=\"#{v}\""}.join(" AND "), | |
} | |
url = "https://iss.ndl.go.jp/api/sru?#{URI.encode_www_form query_string}" | |
puts url | |
basename = File.basename(file) | |
next if File.exist? basename | |
open(basename, "w") do |io| | |
io.print URI.parse(url).open.read | |
end | |
sleep 1 | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require "uri" | |
require "open-uri" | |
require "nokogiri" | |
NS = { | |
dcndl: "http://ndl.go.jp/dcndl/terms/", | |
dcterms: "http://purl.org/dc/terms/", | |
foaf: "http://xmlns.com/foaf/0.1/", | |
sru: "http://www.loc.gov/zing/srw/", | |
rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
} | |
ARGV.each do |file| | |
File.open(file) do |io| | |
doc = Nokogiri::XML(io) | |
hits = doc.xpath("//sru:numberOfRecords", NS)&.first&.content&.to_i | |
next if hits.nil? or hits == 0 | |
next if hits > 10 | |
bibids = doc.xpath('//dcterms:identifier[@rdf:datatype="http://ndl.go.jp/dcndl/terms/NDLBibID"]', NS).to_a | |
puts [ File.basename(file), hits, bibids.join("\t")].join("\t") | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment