Skip to content

Instantly share code, notes, and snippets.

@evren
Last active December 1, 2017 03:59
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save evren/5612900 to your computer and use it in GitHub Desktop.
Save evren/5612900 to your computer and use it in GitHub Desktop.
Example of using Stardog for data validation for the example described at http://www.w3.org/2012/12/rdf-val/SOTA.
# Stardog commands and the output for RDF validation example
# First create the Stardog database and load data
$ ./stardog-admin db create -n sota sota-data.ttl
Bulk loading data to new database.
Loading data completed...Loaded 25 triples in 00:00:00 @ 0.4K triples/sec.
Successfully created database 'sota'.
# Then add the constraints to the database
$ ./stardog-admin icv add sota sota-constraints.ttl
Successfully added constraints in 00:00:00.
# Now run the validation command
# This command just prints which constraints are violated
$ ./stardog icv validate sota
Data is NOT valid.
The following constraints were violated:
AxiomConstraint{:reportedOn rdfs:domain :Issue}
AxiomConstraint{:related rdfs:range :Issue}
AxiomConstraint{:Issue rdfs:subClassOf (:reportedBy exactly 1 owl:Thing)}
AxiomConstraint{:reproducedBy rdfs:range foaf:Person}
AxiomConstraint{:reportedBy rdfs:range foaf:Person}
AxiomConstraint{:state rdfs:domain :Issue}
AxiomConstraint{:state rdfs:range :ValidState}
# Now run the explanation command to get details about violations
# We use the --merge option to group related violations together
# By default only one explanation is printed so we increase the limit to 10
$ ./stardog icv explain --limit 10 --merge sota
VIOLATED :reportedOn rdfs:domain :Issue
ASSERTED :issue4 :reportedOn "x0"
NOT_INFERRED :issue4 a :Issue
1.1) VIOLATED :related rdfs:range :Issue
ASSERTED :issue7 :related :issue4
NOT_INFERRED :issue4 a :Issue
1.2) VIOLATED :related rdfs:range :Issue
ASSERTED :issue7 :related :issue3
NOT_INFERRED :issue3 a :Issue
1.3) VIOLATED :related rdfs:range :Issue
ASSERTED :issue7 :related :issue2
NOT_INFERRED :issue2 a :Issue
VIOLATED :Issue rdfs:subClassOf (:reportedBy exactly 1 owl:Thing)
ASSERTED :issue7 :reportedBy :user2
ASSERTED :issue7 a :Issue
ASSERTED :issue7 a owl:Thing
ASSERTED :issue7 :reportedBy :user6
NOT_INFERRED :issue7 :reportedBy <tag:stardog:api:variable:x0>
VIOLATED :reproducedBy rdfs:range foaf:Person
ASSERTED :issue7 :reproducedBy :user1
NOT_INFERRED :user1 a foaf:Person
VIOLATED :reportedBy rdfs:range foaf:Person
ASSERTED :issue7 :reportedBy :user6
NOT_INFERRED :user6 a foaf:Person
VIOLATED :state rdfs:domain :Issue
ASSERTED :issue4 :state :unsinged
NOT_INFERRED :issue4 a :Issue
VIOLATED :state rdfs:range :ValidState
ASSERTED :issue4 :state :unsinged
NOT_INFERRED :unsinged a :ValidState
# We can also add SPARQL queries as constraints
$ ./stardog-admin icv add sota sota-query.sparql
# We can run validation with a mixture of OWL constraints and SPARQL constraints
$ ./stardog icv validate sota
Data is NOT valid.
...
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix : <http://www.w3.org/2012/12/rdf-val/SOTA-ex#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/'> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
:Issue a owl:Class ;
rdfs:subClassOf
[ owl:onProperty :state ; owl:cardinality 1 ] ,
[ owl:onProperty :reportedBy ; owl:cardinality 1 ] ,
[ owl:onProperty :reportedOn ; owl:cardinality 1 ] ,
[ owl:onProperty :reproducedBy ; owl:minCardinality 0 ] ,
[ owl:onProperty :reproducedOn ; owl:minCardinality 0 ] ,
[ owl:onProperty :related ; owl:minCardinality 0 ] .
:state a owl:ObjectProperty ,
owl:FunctionalProperty ; rdfs:domain :Issue ; rdfs:range :ValidState .
:related a owl:ObjectProperty ; rdfs:domain :Issue ; rdfs:range :Issue .
:reportedBy a owl:ObjectProperty ; rdfs:domain :Issue ; rdfs:range foaf:Person .
:reportedOn a owl:DatatypeProperty ; rdfs:domain :Issue ; rdfs:range xsd:dateTime .
:reproducedBy a owl:ObjectProperty ; rdfs:domain :Issue ; rdfs:range foaf:Person .
:reproducedOn a owl:DatatypeProperty ; rdfs:domain :Issue ; rdfs:range xsd:dateTime .
@prefix : <http://www.w3.org/2012/12/rdf-val/SOTA-ex#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/'> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@base <http://www.w3.org/2012/12/rdf-val/SOTA-ex#> .
<#issue7> a :Issue , :SecurityIssue ;
:state :unassigned ;
:reportedBy <#user6> , <#user2> ; # only one reportedBy permitted
:reportedOn "2012-12-31T23:57:00Z"^^xsd:dateTime ;
:reproducedBy <#user2>, <#user1> ;
:reproducedOn "2012-10-31T23:57:00Z"^^xsd:dateTime ; # reproduced before being reported
:related <#issue4>, <#issue3>, <#issue2> . # referenced issues not included
<#issue4> # a ??? - missing type arc
:state :unsinged ; # misspelled term in value set.# :reportedBy ??? - missing required property
:reportedOn "2012-12-31T23:57:00Z"^^xsd:dateTime .
<#user2> a foaf:Person ;
foaf:givenName "Alice" ;
foaf:familyName "Smith" ;
foaf:phone <tel:+1.555.222.2222> ;
foaf:mbox <mailto:alice@example.com> .
<#user6> a foaf:Agent ; # should be foaf:Person
foaf:givenName "Bob" ; # foaf:familyName "???" - missing required property
foaf:phone <tel:+.555.222.2222> ; # malformed tel: URL
foaf:mbox <mailto:alice@example.com> .
:assigned a :ValidState .
:unassigned a :ValidState .
// Copyright (c) 2010 - 2015, Clark & Parsia, LLC. <http://www.clarkparsia.com>
// For more information about licensing and copyright of this software, please contact
// inquiries@clarkparsia.com or visit http://stardog.com
package com.clarkparsia.pellet.examples;
import java.io.File;
import com.complexible.common.rdf.model.Namespaces;
import com.complexible.stardog.api.Connection;
import com.complexible.stardog.api.ConnectionConfiguration;
import com.complexible.stardog.api.admin.AdminConnection;
import com.complexible.stardog.api.admin.AdminConnectionConfiguration;
import com.complexible.stardog.icv.api.ICVConnection;
import com.complexible.stardog.reasoning.Proof;
import com.complexible.stardog.reasoning.ProofWriter;
import org.openrdf.rio.RDFFormat;
/**
* Example of using Stardog Integrity Constraint functionality for data validation example described at http://www.w3.org/2012/12/rdf-val/SOTA
*
* @author Evren Sirin
*/
public class SOTAExample {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: " + SOTAExample.class.getName() + " <data-file> <constraints-file>");
System.exit(1);
}
// the db name
String sota = "sota";
String dataFile = args[0];
String constraintsFile = args[1];
// first create a temporary database to use
// (if there is already a database with such a name, drop it first)
// Stardog should be running on the same machine locally for this example
AdminConnection aAdminConn = AdminConnectionConfiguration.toServer("snarl://localhost:5820").credentials("admin", "admin").connect();
if (aAdminConn.list().contains(sota)) {
aAdminConn.drop(sota);
}
// Load the data in the db while creating it
ConnectionConfiguration aConfig = aAdminConn.memory(sota).create(new File(dataFile));
// obtain a connection to the database
Connection aConn = aConfig.connect();
// ok, we have a database, now need the validator
ICVConnection aValidator = aConn.as(ICVConnection.class);
// add the constraints, must do this in a transaction
aValidator.begin();
aValidator.addConstraints().format(RDFFormat.TURTLE).file(new File(constraintsFile));
aValidator.commit();
// use namespaces to pretty print results
Namespaces aNamespaces = aValidator.namespaces();
// check validity
boolean isValid = aValidator.isValid();
// print validation result
System.out.format("Data is%s valid%n", isValid ? "" : " NOT");
// if not valid print explanations
if (!isValid) {
Iterable<Proof> aViolationProofs = aValidator.explain().countLimit(10).mergeExplanations().proofs();
for (Proof aProof : aViolationProofs) {
System.out.println(ProofWriter.toString(aNamespaces, aProof));
}
}
// always close your connections when you're done
aConn.close();
aAdminConn.close();
}
}
Bulk loading data to new database sota.
Creating indexes...finished in 00:00:00.001
Loaded 25 triples to sota from 1 file(s) in 00:00:00.009 @ 2.8K triples/sec.
Successfully created database 'sota'.
Data is NOT valid
VIOLATED :reportedOn rdfs:domain :Issue
ASSERTED :issue4 :reportedOn "x0"
NOT_INFERRED :issue4 a :Issue
1.1) VIOLATED :related rdfs:range :Issue
ASSERTED :issue7 :related :issue4
NOT_INFERRED :issue4 a :Issue
1.2) VIOLATED :related rdfs:range :Issue
ASSERTED :issue7 :related :issue3
NOT_INFERRED :issue3 a :Issue
1.3) VIOLATED :related rdfs:range :Issue
ASSERTED :issue7 :related :issue2
NOT_INFERRED :issue2 a :Issue
VIOLATED :Issue rdfs:subClassOf (:reportedBy exactly 1 owl:Thing)
ASSERTED :issue7 :reportedBy :user2
ASSERTED :issue7 a :Issue
ASSERTED :issue7 a owl:Thing
ASSERTED :issue7 :reportedBy :user6
NOT_INFERRED :issue7 :reportedBy <tag:stardog:api:variable:x0>
VIOLATED :reproducedBy rdfs:range foaf:Person
ASSERTED :issue7 :reproducedBy :user1
NOT_INFERRED :user1 a foaf:Person
VIOLATED :reportedBy rdfs:range foaf:Person
ASSERTED :issue7 :reportedBy :user6
NOT_INFERRED :user6 a foaf:Person
VIOLATED :state rdfs:domain :Issue
ASSERTED :issue4 :state :unsinged
NOT_INFERRED :issue4 a :Issue
VIOLATED :state rdfs:range :ValidState
ASSERTED :issue4 :state :unsinged
NOT_INFERRED :unsinged a :ValidState
// Copyright (c) 2010 - 2015, Clark & Parsia, LLC. <http://www.clarkparsia.com>
// For more information about licensing and copyright of this software, please contact
// inquiries@clarkparsia.com or visit http://stardog.com
package com.clarkparsia.pellet.examples;
import java.io.File;
import java.util.List;
import java.util.Set;
import com.complexible.common.iterations.Iteration;
import com.complexible.common.rdf.query.resultio.TextTableQueryResultWriter;
import com.complexible.stardog.StardogException;
import com.complexible.stardog.api.Connection;
import com.complexible.stardog.api.ConnectionConfiguration;
import com.complexible.stardog.api.admin.AdminConnection;
import com.complexible.stardog.api.admin.AdminConnectionConfiguration;
import com.complexible.stardog.icv.Constraint;
import com.complexible.stardog.icv.ConstraintFactory;
import com.complexible.stardog.icv.ConstraintViolation;
import com.complexible.stardog.icv.api.ICVConnection;
import com.google.common.collect.Sets;
import org.openrdf.query.BindingSet;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.query.impl.TupleQueryResultImpl;
import org.openrdf.query.resultio.QueryResultIO;
/**
* Example of using Stardog Integrity Constraint functionality for data validation example described at http://www.w3.org/2012/12/rdf-val/SOTA
*
* @author Evren Sirin
*/
public class SOTAQueryExample {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: " + SOTAQueryExample.class.getName() + " <data-file> <constraints-file>");
System.exit(1);
}
// the db name
String sota = "sota";
String dataFile = args[0];
String constraintFile = args[1];
// first create a temporary database to use
// (if there is already a database with such a name, drop it first)
// Stardog should be running on the same machine locally for this example
AdminConnection aAdminConn = AdminConnectionConfiguration.toServer("snarl://localhost:5820").credentials("admin", "admin").connect();
if (aAdminConn.list().contains(sota)) {
aAdminConn.drop(sota);
}
// Load the data in the db while creating it
ConnectionConfiguration aConfig = aAdminConn.memory(sota).create(new File(dataFile));
// obtain a connection to the database
Connection aConn = aConfig.connect();
// ok, we have a database, now need the validator
ICVConnection aValidator = aConn.as(ICVConnection.class);
// read the SPARQL constraint from the file
Constraint aConstraint = ConstraintFactory.constraint(Files.toString(new File(constraintFile), Charsets.UTF_8));
// validate the constraint
Iteration<ConstraintViolation<BindingSet>, StardogException> aViolations = aValidator.getViolationBindings(aConstraint);
// we should have a single violation since we validated a single constraint
Iteration<BindingSet, StardogException> aBindings = aViolations.next().getViolations();
// there might be multiple different bindings in a constraint violation so we'll print them all
TupleQueryResult aResult = convertToQueryResult(aBindings);
QueryResultIO.write(aResult, TextTableQueryResultWriter.FORMAT, System.out);
// ALWAYS close iterations and connections when you're done with them!
aBindings.close();
aViolations.close();
aConn.close();
aAdminConn.close();
}
private static TupleQueryResult convertToQueryResult(Iteration<BindingSet, StardogException> theBindings) throws StardogException {
Set<String> aVars = Sets.newLinkedHashSet();
List<BindingSet> aBindingsList = Lists.newArrayList();
while (theBindings.hasNext()) {
BindingSet aBindingSet = theBindings.next();
aVars.addAll(aBindingSet.getBindingNames());
aBindingsList.add(aBindingSet);
}
return new TupleQueryResultImpl(Lists.newArrayList(aVars), aBindingsList);
}
}
Bulk loading data to new database sota.
Creating indexes...finished in 00:00:00.001
Loaded 25 triples to sota from 1 file(s) in 00:00:00.009 @ 2.8K triples/sec.
Successfully created database 'sota'.
+--------------------------------------------------+-----------+------------+---------------+---------------+---------------------+----------------------+----------------------------------------------------------------------------------+
| issue | typeArc | stateValue | reportedByArc | reportedOnArc | reportedByArcCount | reproducedOnSequence | missingRelatedIssues |
+--------------------------------------------------+-----------+------------+---------------+---------------+---------------------+----------------------+----------------------------------------------------------------------------------+
| http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue7 | "passed" | "passed" | "passed" | "passed" | "expected 1, got 2" | "bad sequence" | "<http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue3> |
| | | | | | | | <http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue2>" |
| http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue4 | "missing" | "invalid" | "missing" | "passed" | "expected 1, got 0" | "passed" | "passed" |
+--------------------------------------------------+-----------+------------+---------------+---------------+---------------------+----------------------+----------------------------------------------------------------------------------+
Query returned 2 results in 00:00:00.012
PREFIX : <http://www.w3.org/2012/12/rdf-val/SOTA-ex#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/'>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?issue
(if(BOUND(?t), "passed", "missing") AS ?typeArc)
(if(BOUND(?state) && (?state=:unassigned || ?state=:assigned),
"passed", "invalid") AS ?stateValue)
(if(BOUND(?reportedBy), "passed", "missing") AS ?reportedByArc)
(if(BOUND(?reportedOn), "passed", "missing") AS ?reportedOnArc)
(if(!BOUND(?reportedByCount), "expected 1, got 0",
if(?reportedByCount=1, "passed",
CONCAT("expected 1, got ", STR(?reportedByCount)))) AS ?reportedByArcCount)
(if(!BOUND(?reproducedOn) || ?reproducedOn > ?reportedOn,
"passed", "bad sequence") AS ?reproducedOnSequence)
(if(BOUND(?missingRelatedIssuesStr), ?missingRelatedIssuesStr, "passed")
AS ?missingRelatedIssues)
WHERE {
# Get all viable :Issues by use of related predicates.
{ SELECT DISTINCT ?issue WHERE {
{ ?issue a :Issue }
UNION { ?issue :reportedBy|:reportedOn|:reproducedBy|:reproducedOn|:related ?rprt }
}
}
# Test for a type arc and state.
OPTIONAL { ?issue a ?t FILTER (?t = :Issue) }
OPTIONAL { ?issue :state ?state }
# Must have 1 reportedBy.
OPTIONAL { SELECT ?issue
(SAMPLE(?reportedBy1) AS ?reportedBy)
(COUNT(?reportedBy1) AS ?reportedByCount)
WHERE {
OPTIONAL { ?issue :reportedBy ?reportedBy1 }
} GROUP BY ?issue
}
OPTIONAL { ?issue :reportedOn ?reportedOn }
OPTIONAL { ?issue :reproducedBy ?reproducedBy }
OPTIONAL { ?issue :reproducedOn ?reproducedOn }
# All :related issues must be known entities.
OPTIONAL {
SELECT ?issue
(GROUP_CONCAT(CONCAT("<", STR(?referent), ">"))
AS ?missingRelatedIssuesStr) {
# List of missing issues related to ?issue.
SELECT ?issue ?referent
(SUM(if(BOUND(?referentP), 1, 0)) AS ?referentCount)
WHERE {
?issue :related ?referent
OPTIONAL { ?referent ?referentP ?referentO }
} GROUP BY ?issue ?referent
HAVING (SUM(if(BOUND(?referentP), 1, 0)) = 0)
} GROUP BY ?issue
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment