Skip to content

Instantly share code, notes, and snippets.

@masao
Created March 4, 2019 13:21
Show Gist options
  • Save masao/0afab4595482aa3a8de6eae73f357d7a to your computer and use it in GitHub Desktop.
Save masao/0afab4595482aa3a8de6eae73f357d7a to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require "nokogiri"
NAMESPACES = {
"education" => "http://www.orcid.org/ns/education",
"employment" => "http://www.orcid.org/ns/employment",
"common" => "http://www.orcid.org/ns/common",
"person" => "http://www.orcid.org/ns/person",
"personal-details" => "http://www.orcid.org/ns/personal-details",
}
def extract_orgname_start_end(doc, namespace)
doc.xpath("//#{namespace}:#{namespace}-summary", NAMESPACES).each do |education|
name = education.xpath("./#{namespace}:organization/common:name", NAMESPACES).first.content
if name =~ /University of Tsukuba/i
#puts education
start_date = education.xpath("./common:start-date", NAMESPACES).first
start_date = start_date.xpath("./*").map{|e| e.content }.join("-") if start_date
end_date = education.xpath("./common:end-date", NAMESPACES).first
end_date = end_date.xpath("./*").map{|e| e.content }.join("-") if end_date
return "#{start_date}-#{end_date}"
end
end
nil
end
ARGV.each do |f|
doc = Nokogiri::XML(open(f))
data = {}
data[:education] = extract_orgname_start_end(doc, "education")
data[:employment] = extract_orgname_start_end(doc, "employment")
doc.xpath("//email:email").each do |email|
email = email.content
if email =~ /\.tsukuba.ac.jp/
data[:email] = email
end
end
if data[:education] or data[:employment] or data[:email]
name = doc.at("//person:name")
#puts name
given = name.at("./personal-details:given-names").content
family = name.at("./personal-details:family-name")
family = family.content if family
data[:name] = "#{given} #{family}".strip
data[:id] = doc.at("//common:orcid-identifier/common:path", NAMESPACES).content
puts [ data[:id], data[:name], data[:email],
data[:education], data[:employment] ].join("\t")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment