Last active
December 17, 2015 21:18
-
-
Save jindrichmynarz/5673305 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
require "rubygems" | |
require "rdf/rdfa" | |
require "rdf/turtle" | |
require "sparql/client" | |
require "sparql" | |
# Make this into YAML? | |
CONFIG = { | |
:dataset => "oplzz", | |
:sparql_endpoint => "http://localhost:3030/oplzz/sparql", | |
:sparql_update_endpoint => "http://localhost:3030/oplzz/update", | |
:sparql_data_endpoint => "http://localhost:3030/oplzz/data", | |
:test_dir => "../pickwick-api/CONFIG/validation-rules", | |
:files_to_load => { | |
"http://vocab.damepraci.eu" => "../data-modelling/complete.ttl", | |
"http://id.loc.gov/vocabulary/iso639-1" => "data/langs.ttl", | |
"http://data.damepraci.cz/resource/currency-codes" => "../data-modelling/codelists/currencyCodes.ttl" | |
} | |
} | |
class Validator | |
# TODO: read query from file | |
# add other bindings | |
QUERY = %Q( | |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | |
PREFIX spin: <http://spinrdf.org/spin#> | |
SELECT ?label ?violationRoot ?violationPath ?invalidValue | |
WHERE { | |
?violation a spin:ConstraintViolation ; | |
rdfs:label ?label . | |
FILTER (lang(?label) = "cs") | |
OPTIONAL { | |
?violation spin:violationRoot ?violationRoot . | |
} | |
OPTIONAL { | |
?violation spin:violationPath ?violationPath . | |
} | |
OPTIONAL { | |
?violation spin:invalidValue ?invalidValue . | |
} | |
} | |
) | |
VARIABLES = [:label, :violationRoot, :violationPath, :invalidValue] | |
def self.normalizeLiteral(literal) | |
"\"#{literal.gsub(/\n|\t/, " ").squeeze(" ")}\"" | |
end | |
def initialize(sparql_endpoint, sparql_update_endpoint, test_dir, strict = false) | |
@sparql = SPARQL::Client.new sparql_endpoint | |
@sparql_update = SPARQL::Client.new(sparql_update_endpoint, options = { | |
:method => :post, | |
:protocol => "1.1" | |
}) | |
@tests = Dir[test_dir + "/*"] | |
@strict = strict | |
end | |
def parse(data) | |
# Parse HTML with RDFa into RDF graph. | |
# Print error message if parsing fails. | |
graph = RDF::Graph.new | |
begin | |
graph << RDF::RDFa::Reader.new(data, {:validate => @strict}) | |
rescue RDF::ReaderError => error | |
abort error.message | |
end | |
graph | |
end | |
def run_test(test) | |
# Run a single test formalized as SPARQL query on the validated data | |
query = File.read test | |
results = @sparql.query query | |
graph = RDF::Graph.new | |
graph << results | |
bindings = SPARQL.execute(QUERY, graph) | |
results = bindings.map do |binding| | |
result = VARIABLES.map do |variable| | |
value = binding[variable] | |
if value.respond_to? :value | |
self.class.normalizeLiteral value.value | |
else | |
value.to_s | |
end | |
end | |
end | |
end | |
def validate(data) | |
# Validate input data with SPARQL-based tests | |
@sparql_update.clear(:default) # CLEAR default graph from data validated in the past. | |
parsed_data = self.parse data | |
puts "Extrahovaná data:\n=================\n" | |
puts parsed_data.dump(:turtle) | |
@sparql_update.insert_data parsed_data # TODO: Catch 400 Parse error | |
results = @tests.map do |test| | |
self.run_test(test) | |
end | |
puts "\nChyby:\n======\n" | |
puts results.map { |result| result.join(", ") unless result.empty? }.compact.join("\n") | |
end | |
end | |
def start | |
# Start Fuseki in-memory server in the background and load data model information | |
puts "Starting..." | |
pid = spawn "fuseki-server --update --mem /#{CONFIG[:dataset]}" | |
File.open(".pid.tmp", "w") { |file| file.write pid } # Light-weight persistence :-) | |
system("sleep 5") # Let Fuseki take a deep breath before sending data in. | |
CONFIG[:files_to_load].each do |named_graph, path| | |
system("s-put #{CONFIG[:sparql_data_endpoint]} #{named_graph} #{path}") | |
end | |
end | |
def stop | |
# Stop Fuseki server | |
puts "Stopping..." | |
pid = File.read(".pid.tmp").to_i | |
File.delete(".pid.tmp") | |
Process.kill("HUP", pid) | |
end | |
def validate(path) | |
# Validate an RDFa-annotated file on a given path | |
puts "Validating..." | |
raise IOError, "File doesn't exists" unless File.exists? path | |
data = File.read path | |
validator = Validator.new(CONFIG[:sparql_endpoint], CONFIG[:sparql_update_endpoint], CONFIG[:test_dir]) | |
validator.validate data | |
end | |
param = ARGV[0] | |
case param | |
when "start" | |
start | |
when "stop" | |
stop | |
else | |
validate param | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment