Skip to content

Instantly share code, notes, and snippets.

@EmmanuelOga
Created January 11, 2021 15:52
Show Gist options
  • Save EmmanuelOga/e2511d0093c254695e4db203bcd25fdc to your computer and use it in GitHub Desktop.
Save EmmanuelOga/e2511d0093c254695e4db203bcd25fdc to your computer and use it in GitHub Desktop.
Run SPARQL queries from Saxon using Rdf4J
package com.emmanueloga.xml
import com.emmanueloga.Config
import com.emmanueloga.FISHX
import com.emmanueloga.NAMESPACES
import com.emmanueloga.logger
import net.sf.saxon.s9api.*
import net.sf.saxon.s9api.ItemType.*
import net.sf.saxon.s9api.OccurrenceIndicator.*
import net.sf.saxon.s9api.SequenceType.makeSequenceType
import org.eclipse.rdf4j.model.*
import org.eclipse.rdf4j.model.impl.CalendarLiteral
import org.eclipse.rdf4j.model.impl.DecimalLiteral
import org.eclipse.rdf4j.model.impl.IntegerLiteral
import org.eclipse.rdf4j.repository.sail.SailRepository
import java.math.BigDecimal
import java.math.BigInteger
import java.util.*
/**
* Saxon extension function to execute a SPARQL query.
*/
class XsltSparql : ExtensionFunction {
override fun getName() = QName(FISHX.NAMESPACE, "sparql")
var repo: SailRepository? = null
var config: Config? = null
override fun getArgumentTypes() = arrayOf(
makeSequenceType(STRING, ONE),
makeSequenceType(ANY_MAP, ZERO_OR_ONE),
makeSequenceType(ANY_ARRAY, ZERO_OR_ONE)
)
override fun getResultType() = makeSequenceType(ANY_ITEM, ZERO_OR_MORE)!!
// For better logging.
private var callId = 0
override fun call(arguments: Array<XdmValue>): XdmValue {
callId++
arguments.forEachIndexed { i, arg -> logger.info("$callId ARG$i: $arg") }
return repo!!.connection.use { conn ->
val queryName = (arguments[0].itemAt(0) as XdmAtomicValue).stringValue
val queryString = config!!.readFile("lib/sparql/$queryName.sparql")
val result = conn.prepareTupleQuery(queryString).let { q ->
if (arguments.size >= 2 && !arguments[1].isEmpty) {
(arguments[1].itemAt(0) as XdmMap).let { bindings ->
bindings.entrySet().forEach { (k, v) ->
val name = "${k.value}"
val value = xdmToRdf(v, repo!!)
logger.info("$callId BINDING $name to $value")
q.setBinding(name, value)
}
}
}
q.evaluate()
}.toList()
logger.info("$callId RESULT $result")
if (arguments.size == 3 && !arguments[2].isEmpty) {
// * The 3rd argument can be used to return a single value. It should be an Array of bindings
// in priority order. The first binding present is returned, or the empty sequence otherwise.
result.firstOrNull()?.let { row ->
(arguments[2].itemAt(0) as XdmArray).asList()
.map { item -> (item as XdmAtomicValue).stringValue }
.filter { name -> row.bindingNames.contains(name) }
.find { name -> row.getBinding(name).value.stringValue().isNotBlank() }
?.let { name -> rdfToXdm(row.getBinding(name).value) }
} ?: XdmEmptySequence.getInstance()
} else {
// * If the 3rd argument is missing, return every row as a sequence of maps.
XdmValue(result.map { row ->
row.fold(XdmMap()) { map, binding ->
map.put(XdmAtomicValue(binding.name), rdfToXdm(binding.value))
}
})
}
}
}
}
/**
* Perform a conversion from a XdmValue to RDF4J Value.
* This is used when we pass a value for a binding from XSLT to perform a SPARQL query.
*/
fun xdmToRdf(xdmVal: XdmValue, repo: SailRepository): Value = when (xdmVal) {
is XdmAtomicValue -> primitiveToRdf(xdmVal, repo)
is XdmArray -> xdmArrayToRdf(xdmVal, repo)
is XdmNode -> repo.valueFactory.createLiteral(xdmVal.stringValue) // NOTE: for passing arbitrary attributes.
else -> throw RuntimeException("Don't know how to map $xdmVal / ${xdmVal.javaClass} to RDF4J binding.")
}
/**
* This is used to parameterize the type of value passed as a binding to RDF.
* We need to distinguish between urls or prefixed values, but parameters like numbers don't require params and so
* should be passed as primitives.
* If the value is an [XdmArray], one of the following formats is expected:
* * ["url", url]
* * ["pre", prefix, localName]
* * ["str", value, optionalLang]
*/
fun xdmArrayToRdf(value: XdmArray, repo: SailRepository): Value {
if (value.arrayLength() < 2) throw RuntimeException("Array $value should have at least 2 elements")
val tag = (value[0] as XdmAtomicValue).stringValue
val arg1 = (value[1] as XdmAtomicValue).stringValue
val arg2 = if (value.arrayLength() == 3) (value[2] as XdmAtomicValue).stringValue else null
return when (tag) {
"url" -> repo.valueFactory.createIRI(arg1)
"pre" -> repo.valueFactory.createIRI(NAMESPACES[arg1], arg2)
"str" -> when (arg2) {
null -> repo.valueFactory.createLiteral(arg1)
else -> repo.valueFactory.createLiteral(arg1, arg2)
}
else -> throw RuntimeException("Array $value first element should be either 'pre', 'url' or 'str'")
}
}
/**
* Convert as simple primitive value to an RDF4J value, potentially using [strToRdf] if the value is a String.
*/
fun primitiveToRdf(xdmVal: XdmAtomicValue, repo: SailRepository): Value = when (val value = xdmVal.value) {
is Boolean -> repo.valueFactory.createLiteral(value)
is Byte -> repo.valueFactory.createLiteral(value)
is Short -> repo.valueFactory.createLiteral(value)
is Int -> repo.valueFactory.createLiteral(value)
is Long -> repo.valueFactory.createLiteral(value)
is Float -> repo.valueFactory.createLiteral(value)
is Double -> repo.valueFactory.createLiteral(value)
is BigDecimal -> repo.valueFactory.createLiteral(value)
is BigInteger -> repo.valueFactory.createLiteral(value)
is Date -> repo.valueFactory.createLiteral(value)
is String ->
if ("${xdmVal.typeName}" == "xs:anyURI") repo.valueFactory.createIRI(value) else strToRdf(value, repo)
else -> throw RuntimeException("Unknown type for binding value: $value")
}
/**
* * If the string looks like a prefixed property, try to parse it as an IRI from known namespaces.
* * Returns a String literal otherwise.
*/
fun strToRdf(value: String, repo: SailRepository): Value =
Regex("^[^:]+://.+$").matchEntire(value)?.let {
repo.valueFactory.createIRI(value)
} ?: Regex("^([^:]*):([^:]+)$").matchEntire(value)?.let { match ->
val (prefix, localName) = match.destructured
if (!NAMESPACES.containsKey(prefix)) throw RuntimeException("Unknown prefix: $prefix")
repo.valueFactory.createIRI(NAMESPACES[prefix], localName)
} ?: repo.valueFactory.createLiteral(value)
/**
* Return an Xdm value for a given RDF4J Value.
*/
fun rdfToXdm(value: Value?): XdmValue = when (value) {
null -> XdmEmptySequence.getInstance()
is Triple -> XdmArray(listOf(value.subject, value.predicate, value.`object`).map(::rdfToXdm))
else -> XdmAtomicValue.makeAtomicValue(rdfToPrimitive(value))
}
/**
* Return an primitive value for a given RDF4J Value.
*/
fun rdfToPrimitive(value: Value?): Any? = when (value) {
null -> null
is DecimalLiteral -> value.decimalValue()
is IntegerLiteral -> value.integerValue().toBigDecimal()
is CalendarLiteral -> value.calendarValue().toGregorianCalendar().toZonedDateTime()
is Triple -> listOf(value.subject, value.predicate, value.`object`).map(::rdfToPrimitive)
is BNode, is IRI -> value.stringValue()
is Literal -> {
val localName = value.datatype.localName
when {
localName.endsWith("byte") -> value.byteValue()
localName.endsWith("short") -> value.shortValue()
localName.endsWith("int") -> value.intValue()
localName.endsWith("long") -> value.longValue()
localName.endsWith("float") -> value.floatValue()
localName.endsWith("double") -> value.doubleValue()
localName.endsWith("boolean") -> value.booleanValue()
else -> value.stringValue() // NOTE: ignores language tag for now.
}
}
else -> throw RuntimeException("Unknown RDF4J value type $value")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment