Skip to content

Instantly share code, notes, and snippets.

@tfuji
Last active June 27, 2019 13:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tfuji/239806e44218889f9d9b3c8f3fe7254d to your computer and use it in GitHub Desktop.
Save tfuji/239806e44218889f9d9b3c8f3fe7254d to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'nokogiri'
#require 'erb'
require 'pp'
require 'json'
#require 'thor'
# 1. wget https://www.ncbi.nlm.nih.gov/biosample/docs/attributes/?format=xml -O ncbi_biosample_attributes.xml
# 2. %ruby ncbi_biosample_attributes_extend2ttl.rb ncbi_biosample_attributes.xml > biosample_attributes_extend.ttl
class BioSampleAttributes
include Enumerable
def initialize(xml)
@xml =xml
end
def each
@doc = []
IO.foreach(@xml) do |line|
next if line =~/\<\?xml|BioSampleAttributes/
@doc.push('<?xml version="1.0" encoding="UTF-8"?>') if line =~/^\s*<Attribute/
@doc.push(line.chomp)
if line =~/\<\/Attribute\>/
docs = @doc.join("\n").to_s
yield Attribute.new(docs)
@doc = []
end
end
end
def to_ttl
puts "@base <http://ddbj.nig.ac.jp/ontologies/biosample> .
@prefix : <http://ddbj.nig.ac.jp/ontologies/biosample/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
"
self.each_with_index do |attr,i|
attr.to_ttl
end
end
def format
self.each_with_index do |attr,i|
puts attr.harmonizedName + "\t" + attr.format
end
end
def to_s
self.each_with_index do |attr,i|
puts attr.to_s
end
end
def to_json
self.each_with_index do |attr,i|
puts attr.to_json
end
end
end
class Attribute
def initialize(xml)
@attr = Nokogiri::XML(xml).css("Attribute")
raise NameError, "attribute element not found" unless @attr
doc = Nokogiri::XML(xml)
package = doc.xpath("/Attribute")
end
def name
@attr.css('Name').inner_text
end
def harmonizedName
@attr.css('HarmonizedName').inner_text
end
def format
@attr.css('Format').inner_text
end
def preferred_format
case self.harmonizedName
when "air_temp_regm", "annual_season_temp", "host_body_temp", "ph",
"samp_store_temp", "temp", "typ_occupant_dens", "water_temp_regm"
"numeric"
when "birth_date","collection_date","death_date"
"date"
else
"text"
end
end
def synonym
#@attr.css('Synonym').inner_text
@attr.css('Synonym').to_a.join("; ")
end
def description
@attr.css('Description').inner_text
end
def to_ttl
class_name = self.harmonizedName.capitalize + "_Attribute"
print ":" + class_name + "\tskos:altLabel\t\"" + self.name + "\""
if @attr.css('Synonym').size > 0
puts ";"
puts @attr.css('Synonym').map{ |s|
" skos:hiddenLabel \"#{s.inner_text}\""
}.join("; \n") +"."
else
puts "."
end
puts
puts ":" + class_name + "\t:preferred_format\t\"" + preferred_format + "\"."
puts
end
def to_s
#[self.name, self.harmonizedName, self.synonym, self.format, self.description].join("\t")
[self.name, @attr.css('Synonym').to_a.join("; ")].join("\t")
end
def to_json
{
'name': self.name,
'harmonizedName': self.harmonizedName,
'synonym': self.synonym,
'format': self.format,
'description': self.description
}.to_json
end
end
xml = ARGV[0] || 'ncbi_biosample_attributes.xml'
attrs = BioSampleAttributes.new(xml)
attrs.to_ttl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment