Skip to content

Instantly share code, notes, and snippets.

@jindrichmynarz
Last active December 17, 2015 21:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jindrichmynarz/5673305 to your computer and use it in GitHub Desktop.
Save jindrichmynarz/5673305 to your computer and use it in GitHub Desktop.
#!/usr/bin/ruby
require "rubygems"
require "rdf/rdfa"
require "rdf/turtle"
require "sparql/client"
require "sparql"
# Make this into YAML?
CONFIG = {
:dataset => "oplzz",
:sparql_endpoint => "http://localhost:3030/oplzz/sparql",
:sparql_update_endpoint => "http://localhost:3030/oplzz/update",
:sparql_data_endpoint => "http://localhost:3030/oplzz/data",
:test_dir => "../pickwick-api/CONFIG/validation-rules",
:files_to_load => {
"http://vocab.damepraci.eu" => "../data-modelling/complete.ttl",
"http://id.loc.gov/vocabulary/iso639-1" => "data/langs.ttl",
"http://data.damepraci.cz/resource/currency-codes" => "../data-modelling/codelists/currencyCodes.ttl"
}
}
class Validator
# TODO: read query from file
# add other bindings
QUERY = %Q(
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX spin: <http://spinrdf.org/spin#>
SELECT ?label ?violationRoot ?violationPath ?invalidValue
WHERE {
?violation a spin:ConstraintViolation ;
rdfs:label ?label .
FILTER (lang(?label) = "cs")
OPTIONAL {
?violation spin:violationRoot ?violationRoot .
}
OPTIONAL {
?violation spin:violationPath ?violationPath .
}
OPTIONAL {
?violation spin:invalidValue ?invalidValue .
}
}
)
VARIABLES = [:label, :violationRoot, :violationPath, :invalidValue]
def self.normalizeLiteral(literal)
"\"#{literal.gsub(/\n|\t/, " ").squeeze(" ")}\""
end
def initialize(sparql_endpoint, sparql_update_endpoint, test_dir, strict = false)
@sparql = SPARQL::Client.new sparql_endpoint
@sparql_update = SPARQL::Client.new(sparql_update_endpoint, options = {
:method => :post,
:protocol => "1.1"
})
@tests = Dir[test_dir + "/*"]
@strict = strict
end
def parse(data)
# Parse HTML with RDFa into RDF graph.
# Print error message if parsing fails.
graph = RDF::Graph.new
begin
graph << RDF::RDFa::Reader.new(data, {:validate => @strict})
rescue RDF::ReaderError => error
abort error.message
end
graph
end
def run_test(test)
# Run a single test formalized as SPARQL query on the validated data
query = File.read test
results = @sparql.query query
graph = RDF::Graph.new
graph << results
bindings = SPARQL.execute(QUERY, graph)
results = bindings.map do |binding|
result = VARIABLES.map do |variable|
value = binding[variable]
if value.respond_to? :value
self.class.normalizeLiteral value.value
else
value.to_s
end
end
end
end
def validate(data)
# Validate input data with SPARQL-based tests
@sparql_update.clear(:default) # CLEAR default graph from data validated in the past.
parsed_data = self.parse data
puts "Extrahovaná data:\n=================\n"
puts parsed_data.dump(:turtle)
@sparql_update.insert_data parsed_data # TODO: Catch 400 Parse error
results = @tests.map do |test|
self.run_test(test)
end
puts "\nChyby:\n======\n"
puts results.map { |result| result.join(", ") unless result.empty? }.compact.join("\n")
end
end
def start
# Start Fuseki in-memory server in the background and load data model information
puts "Starting..."
pid = spawn "fuseki-server --update --mem /#{CONFIG[:dataset]}"
File.open(".pid.tmp", "w") { |file| file.write pid } # Light-weight persistence :-)
system("sleep 5") # Let Fuseki take a deep breath before sending data in.
CONFIG[:files_to_load].each do |named_graph, path|
system("s-put #{CONFIG[:sparql_data_endpoint]} #{named_graph} #{path}")
end
end
def stop
# Stop Fuseki server
puts "Stopping..."
pid = File.read(".pid.tmp").to_i
File.delete(".pid.tmp")
Process.kill("HUP", pid)
end
def validate(path)
# Validate an RDFa-annotated file on a given path
puts "Validating..."
raise IOError, "File doesn't exists" unless File.exists? path
data = File.read path
validator = Validator.new(CONFIG[:sparql_endpoint], CONFIG[:sparql_update_endpoint], CONFIG[:test_dir])
validator.validate data
end
param = ARGV[0]
case param
when "start"
start
when "stop"
stop
else
validate param
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment