Last active
May 16, 2018 00:23
-
-
Save tfuji/5e884a6ce9e216ab9fd1f4dea0c3c1a2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#require 'open-uri' | |
require 'rdf/raptor' | |
require 'json' | |
require 'json/ld' | |
require 'pp' | |
require "faraday" | |
require 'faraday_middleware' | |
require 'fileutils' | |
require 'systemu' | |
def validate filepath | |
count = 0 | |
error = 0 | |
type = "unknown" | |
cmd = %Q(rapper -g -c #{filepath}) | |
status, stdout, stderr = systemu cmd | |
stderr.split("\n").each do |line| | |
case line | |
when /rapper: Parsing returned (\d+) triples/ | |
count = $1.to_i | |
when /rapper: Parsing URI file:\/\/(.+) with parser guess/ | |
#path = $1 | |
when /rapper: Guessed parser name '(.+)'/ | |
type = $1 | |
when /rapper: Failed to parse file .+ guess content/ | |
type = 'unknown' | |
when /rapper: (Error|Warning)/ | |
error += 1 | |
else | |
#puts "#{line}" | |
end | |
end | |
if type == 'unknown' | |
begin | |
io = File.open(filepath) | |
input = JSON.parse io.read | |
graph = RDF::Graph.new << JSON::LD::API.toRdf(input) | |
#count = graph.count | |
if graph.count > 0 | |
type ='jsonld' | |
count = graph.count | |
error = 0 | |
else | |
count = 0 | |
error = 1 | |
end | |
rescue | |
count = 0 | |
error = 1 | |
end | |
end | |
return [count,error,type] | |
end | |
def fetch v | |
begin | |
connection = Faraday.new do |conn| | |
conn.use FaradayMiddleware::FollowRedirects | |
conn.adapter :net_http | |
end | |
content_types = %w(text/turtle application/rdf+xml application/ld+json application/n-triples text/n3 text/html) | |
types = [] | |
status = [] | |
content_types.each do |type| | |
res = connection.get "#{v}" do |req| | |
#req.headers['Accept'] = content_types.join(',') + ';q= 0.8' | |
req.headers['Accept'] = type | |
end | |
#pp [type , content_type = res.headers['content-type']] | |
content_type = res.headers['content-type'].to_s | |
types.push(content_type) | |
status.push(res.status) | |
return [res, "content_type: #{content_type};"] if (content_type.include?(type) && res.success?) | |
end | |
return [nil,"content_type:(#{types.uniq!.join(',')}); status: (#{status.uniq!.join(',')}); "] | |
rescue => e | |
return [nil, "no_connection:#{e}; "] | |
end | |
end | |
file = ARGV.shift || "prefix-cc.jsonld" | |
context = File.open(file) | |
input =JSON.parse context.read | |
path = File.basename(file,'.jsonld') | |
FileUtils.mkdir_p("#{path}/_RDF") unless FileTest.exist?(path) | |
puts %w(T/F prefix uri prefix_synonym parse_file_type triple_count message).join("\t") | |
hash = {} | |
input['@context'].each do |k,v| | |
# Fetch | |
synonym = hash.key?(v) ? hash[v] : "" | |
hash[v] = k unless hash.key?(v) | |
use, count, error, type, message = 'F', 0, 0, 'unknown', "" | |
unless File.exist?("#{path}/#{k}") | |
res, message = fetch v | |
unless res | |
#puts [use, k,v, synonym, type, count, message].join("\t") | |
else | |
File.open("#{path}/#{k}","w") do |out| | |
out.puts res.body | |
end | |
end | |
end | |
if File.exist?("#{path}/#{k}") | |
count, error, type = validate "#{path}/#{k}" | |
message += "parse_error(#{error})" if error > 0 | |
use = 'T' if (count > 0 && error == 0 ) | |
end | |
puts [use, k,v, synonym, type, count, message].join("\t") | |
FileUtils.ln_sf("../#{k}", "#{path}/_RDF/#{k}") if use == 'T' | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment