Skip to content

Instantly share code, notes, and snippets.

@tfuji
Last active May 16, 2018 00:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tfuji/5e884a6ce9e216ab9fd1f4dea0c3c1a2 to your computer and use it in GitHub Desktop.
Save tfuji/5e884a6ce9e216ab9fd1f4dea0c3c1a2 to your computer and use it in GitHub Desktop.
#require 'open-uri'
require 'rdf/raptor'
require 'json'
require 'json/ld'
require 'pp'
require "faraday"
require 'faraday_middleware'
require 'fileutils'
require 'systemu'
def validate filepath
count = 0
error = 0
type = "unknown"
cmd = %Q(rapper -g -c #{filepath})
status, stdout, stderr = systemu cmd
stderr.split("\n").each do |line|
case line
when /rapper: Parsing returned (\d+) triples/
count = $1.to_i
when /rapper: Parsing URI file:\/\/(.+) with parser guess/
#path = $1
when /rapper: Guessed parser name '(.+)'/
type = $1
when /rapper: Failed to parse file .+ guess content/
type = 'unknown'
when /rapper: (Error|Warning)/
error += 1
else
#puts "#{line}"
end
end
if type == 'unknown'
begin
io = File.open(filepath)
input = JSON.parse io.read
graph = RDF::Graph.new << JSON::LD::API.toRdf(input)
#count = graph.count
if graph.count > 0
type ='jsonld'
count = graph.count
error = 0
else
count = 0
error = 1
end
rescue
count = 0
error = 1
end
end
return [count,error,type]
end
def fetch v
begin
connection = Faraday.new do |conn|
conn.use FaradayMiddleware::FollowRedirects
conn.adapter :net_http
end
content_types = %w(text/turtle application/rdf+xml application/ld+json application/n-triples text/n3 text/html)
types = []
status = []
content_types.each do |type|
res = connection.get "#{v}" do |req|
#req.headers['Accept'] = content_types.join(',') + ';q= 0.8'
req.headers['Accept'] = type
end
#pp [type , content_type = res.headers['content-type']]
content_type = res.headers['content-type'].to_s
types.push(content_type)
status.push(res.status)
return [res, "content_type: #{content_type};"] if (content_type.include?(type) && res.success?)
end
return [nil,"content_type:(#{types.uniq!.join(',')}); status: (#{status.uniq!.join(',')}); "]
rescue => e
return [nil, "no_connection:#{e}; "]
end
end
file = ARGV.shift || "prefix-cc.jsonld"
context = File.open(file)
input =JSON.parse context.read
path = File.basename(file,'.jsonld')
FileUtils.mkdir_p("#{path}/_RDF") unless FileTest.exist?(path)
puts %w(T/F prefix uri prefix_synonym parse_file_type triple_count message).join("\t")
hash = {}
input['@context'].each do |k,v|
# Fetch
synonym = hash.key?(v) ? hash[v] : ""
hash[v] = k unless hash.key?(v)
use, count, error, type, message = 'F', 0, 0, 'unknown', ""
unless File.exist?("#{path}/#{k}")
res, message = fetch v
unless res
#puts [use, k,v, synonym, type, count, message].join("\t")
else
File.open("#{path}/#{k}","w") do |out|
out.puts res.body
end
end
end
if File.exist?("#{path}/#{k}")
count, error, type = validate "#{path}/#{k}"
message += "parse_error(#{error})" if error > 0
use = 'T' if (count > 0 && error == 0 )
end
puts [use, k,v, synonym, type, count, message].join("\t")
FileUtils.ln_sf("../#{k}", "#{path}/_RDF/#{k}") if use == 'T'
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment