Created
January 11, 2021 15:52
-
-
Save EmmanuelOga/e2511d0093c254695e4db203bcd25fdc to your computer and use it in GitHub Desktop.
Run SPARQL queries from Saxon using Rdf4J
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.emmanueloga.xml | |
import com.emmanueloga.Config | |
import com.emmanueloga.FISHX | |
import com.emmanueloga.NAMESPACES | |
import com.emmanueloga.logger | |
import net.sf.saxon.s9api.* | |
import net.sf.saxon.s9api.ItemType.* | |
import net.sf.saxon.s9api.OccurrenceIndicator.* | |
import net.sf.saxon.s9api.SequenceType.makeSequenceType | |
import org.eclipse.rdf4j.model.* | |
import org.eclipse.rdf4j.model.impl.CalendarLiteral | |
import org.eclipse.rdf4j.model.impl.DecimalLiteral | |
import org.eclipse.rdf4j.model.impl.IntegerLiteral | |
import org.eclipse.rdf4j.repository.sail.SailRepository | |
import java.math.BigDecimal | |
import java.math.BigInteger | |
import java.util.* | |
/** | |
* Saxon extension function to execute a SPARQL query. | |
*/ | |
class XsltSparql : ExtensionFunction { | |
override fun getName() = QName(FISHX.NAMESPACE, "sparql") | |
var repo: SailRepository? = null | |
var config: Config? = null | |
override fun getArgumentTypes() = arrayOf( | |
makeSequenceType(STRING, ONE), | |
makeSequenceType(ANY_MAP, ZERO_OR_ONE), | |
makeSequenceType(ANY_ARRAY, ZERO_OR_ONE) | |
) | |
override fun getResultType() = makeSequenceType(ANY_ITEM, ZERO_OR_MORE)!! | |
// For better logging. | |
private var callId = 0 | |
override fun call(arguments: Array<XdmValue>): XdmValue { | |
callId++ | |
arguments.forEachIndexed { i, arg -> logger.info("$callId ARG$i: $arg") } | |
return repo!!.connection.use { conn -> | |
val queryName = (arguments[0].itemAt(0) as XdmAtomicValue).stringValue | |
val queryString = config!!.readFile("lib/sparql/$queryName.sparql") | |
val result = conn.prepareTupleQuery(queryString).let { q -> | |
if (arguments.size >= 2 && !arguments[1].isEmpty) { | |
(arguments[1].itemAt(0) as XdmMap).let { bindings -> | |
bindings.entrySet().forEach { (k, v) -> | |
val name = "${k.value}" | |
val value = xdmToRdf(v, repo!!) | |
logger.info("$callId BINDING $name to $value") | |
q.setBinding(name, value) | |
} | |
} | |
} | |
q.evaluate() | |
}.toList() | |
logger.info("$callId RESULT $result") | |
if (arguments.size == 3 && !arguments[2].isEmpty) { | |
// * The 3rd argument can be used to return a single value. It should be an Array of bindings | |
// in priority order. The first binding present is returned, or the empty sequence otherwise. | |
result.firstOrNull()?.let { row -> | |
(arguments[2].itemAt(0) as XdmArray).asList() | |
.map { item -> (item as XdmAtomicValue).stringValue } | |
.filter { name -> row.bindingNames.contains(name) } | |
.find { name -> row.getBinding(name).value.stringValue().isNotBlank() } | |
?.let { name -> rdfToXdm(row.getBinding(name).value) } | |
} ?: XdmEmptySequence.getInstance() | |
} else { | |
// * If the 3rd argument is missing, return every row as a sequence of maps. | |
XdmValue(result.map { row -> | |
row.fold(XdmMap()) { map, binding -> | |
map.put(XdmAtomicValue(binding.name), rdfToXdm(binding.value)) | |
} | |
}) | |
} | |
} | |
} | |
} | |
/** | |
* Perform a conversion from a XdmValue to RDF4J Value. | |
* This is used when we pass a value for a binding from XSLT to perform a SPARQL query. | |
*/ | |
fun xdmToRdf(xdmVal: XdmValue, repo: SailRepository): Value = when (xdmVal) { | |
is XdmAtomicValue -> primitiveToRdf(xdmVal, repo) | |
is XdmArray -> xdmArrayToRdf(xdmVal, repo) | |
is XdmNode -> repo.valueFactory.createLiteral(xdmVal.stringValue) // NOTE: for passing arbitrary attributes. | |
else -> throw RuntimeException("Don't know how to map $xdmVal / ${xdmVal.javaClass} to RDF4J binding.") | |
} | |
/** | |
* This is used to parameterize the type of value passed as a binding to RDF. | |
* We need to distinguish between urls or prefixed values, but parameters like numbers don't require params and so | |
* should be passed as primitives. | |
* If the value is an [XdmArray], one of the following formats is expected: | |
* * ["url", url] | |
* * ["pre", prefix, localName] | |
* * ["str", value, optionalLang] | |
*/ | |
fun xdmArrayToRdf(value: XdmArray, repo: SailRepository): Value { | |
if (value.arrayLength() < 2) throw RuntimeException("Array $value should have at least 2 elements") | |
val tag = (value[0] as XdmAtomicValue).stringValue | |
val arg1 = (value[1] as XdmAtomicValue).stringValue | |
val arg2 = if (value.arrayLength() == 3) (value[2] as XdmAtomicValue).stringValue else null | |
return when (tag) { | |
"url" -> repo.valueFactory.createIRI(arg1) | |
"pre" -> repo.valueFactory.createIRI(NAMESPACES[arg1], arg2) | |
"str" -> when (arg2) { | |
null -> repo.valueFactory.createLiteral(arg1) | |
else -> repo.valueFactory.createLiteral(arg1, arg2) | |
} | |
else -> throw RuntimeException("Array $value first element should be either 'pre', 'url' or 'str'") | |
} | |
} | |
/** | |
* Convert as simple primitive value to an RDF4J value, potentially using [strToRdf] if the value is a String. | |
*/ | |
fun primitiveToRdf(xdmVal: XdmAtomicValue, repo: SailRepository): Value = when (val value = xdmVal.value) { | |
is Boolean -> repo.valueFactory.createLiteral(value) | |
is Byte -> repo.valueFactory.createLiteral(value) | |
is Short -> repo.valueFactory.createLiteral(value) | |
is Int -> repo.valueFactory.createLiteral(value) | |
is Long -> repo.valueFactory.createLiteral(value) | |
is Float -> repo.valueFactory.createLiteral(value) | |
is Double -> repo.valueFactory.createLiteral(value) | |
is BigDecimal -> repo.valueFactory.createLiteral(value) | |
is BigInteger -> repo.valueFactory.createLiteral(value) | |
is Date -> repo.valueFactory.createLiteral(value) | |
is String -> | |
if ("${xdmVal.typeName}" == "xs:anyURI") repo.valueFactory.createIRI(value) else strToRdf(value, repo) | |
else -> throw RuntimeException("Unknown type for binding value: $value") | |
} | |
/** | |
* * If the string looks like a prefixed property, try to parse it as an IRI from known namespaces. | |
* * Returns a String literal otherwise. | |
*/ | |
fun strToRdf(value: String, repo: SailRepository): Value = | |
Regex("^[^:]+://.+$").matchEntire(value)?.let { | |
repo.valueFactory.createIRI(value) | |
} ?: Regex("^([^:]*):([^:]+)$").matchEntire(value)?.let { match -> | |
val (prefix, localName) = match.destructured | |
if (!NAMESPACES.containsKey(prefix)) throw RuntimeException("Unknown prefix: $prefix") | |
repo.valueFactory.createIRI(NAMESPACES[prefix], localName) | |
} ?: repo.valueFactory.createLiteral(value) | |
/** | |
* Return an Xdm value for a given RDF4J Value. | |
*/ | |
fun rdfToXdm(value: Value?): XdmValue = when (value) { | |
null -> XdmEmptySequence.getInstance() | |
is Triple -> XdmArray(listOf(value.subject, value.predicate, value.`object`).map(::rdfToXdm)) | |
else -> XdmAtomicValue.makeAtomicValue(rdfToPrimitive(value)) | |
} | |
/** | |
* Return an primitive value for a given RDF4J Value. | |
*/ | |
fun rdfToPrimitive(value: Value?): Any? = when (value) { | |
null -> null | |
is DecimalLiteral -> value.decimalValue() | |
is IntegerLiteral -> value.integerValue().toBigDecimal() | |
is CalendarLiteral -> value.calendarValue().toGregorianCalendar().toZonedDateTime() | |
is Triple -> listOf(value.subject, value.predicate, value.`object`).map(::rdfToPrimitive) | |
is BNode, is IRI -> value.stringValue() | |
is Literal -> { | |
val localName = value.datatype.localName | |
when { | |
localName.endsWith("byte") -> value.byteValue() | |
localName.endsWith("short") -> value.shortValue() | |
localName.endsWith("int") -> value.intValue() | |
localName.endsWith("long") -> value.longValue() | |
localName.endsWith("float") -> value.floatValue() | |
localName.endsWith("double") -> value.doubleValue() | |
localName.endsWith("boolean") -> value.booleanValue() | |
else -> value.stringValue() // NOTE: ignores language tag for now. | |
} | |
} | |
else -> throw RuntimeException("Unknown RDF4J value type $value") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment