Skip to content

Instantly share code, notes, and snippets.

@kasei
Created August 7, 2022 00:26
Show Gist options
  • Save kasei/580f6b407915d5b091531c853dfc08a8 to your computer and use it in GitHub Desktop.
Save kasei/580f6b407915d5b091531c853dfc08a8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
=head1 NAME
attean-sparql-list-operators.plpl - Example of using list functions and a new EXPLODE operator in SPARQL
=head1 DESCRIPTION
New extension functions operating over literals with datatype `ex:List`:
=over 4
=item C<< ex:split(xsd:string, xsd:string) -> ex:List >>
=item C<< ex:zip(ex:List, ex:List) -> ex:List >>
=item C<< ex:listGet(ex:List, xsd:integer) -> RDFTerm >>
=back
A new C<<EXPLODE>> operator which syntactically mirrors C<<BIND>>, but which produces any number of results:
=over 4
=item C<< EXPLODE(expr AS ?var) >>
expr evaluating to ex:List, produces one result for each element of the encoded list
=back
=cut
use v5.14;
use autodie;
use utf8;
use Attean;
use Attean::RDF;
use Attean::SimpleQueryEvaluator;
use AtteanX::Functions::CompositeLists;
# Enable the extension functions
AtteanX::Functions::CompositeLists->register();
my $sparql = <<"END";
PREFIX ex: <http://example.org/>
CONSTRUCT {
?project ex:principalInvestigatorContact ?piContact ;
ex::principalInvestigator ?pi .
?researcher a ex:Researcher ;
ex:name ?name .
}
WHERE {
# Original data
VALUES (?project_id ?ids ?names) {
(
"123"
"1858722 (contact); 1883064; 3150248;"
"BUCK, JOCHEN (contact); LEVIN, LONNY R; VISCONTI, PABLO E.;"
)
}
# Split names and ids into individual records, contained in a ex:List-typed
# literal.
BIND(ex:split(?ids, "; ") AS ?idList)
BIND(ex:split(?names, "; ") AS ?nameList)
# Make a single list of (name, id) pairs
BIND(ex:zip(?nameList, ?idList) AS ?pairs)
# Make one result per (name, id) pair
EXPLODE(?pairs AS ?pair)
# Extract the name and id from the pair ("with annotation" because they
# might contain the trailing " (contact)" string)
BIND(ex:listGet(?pair, 0) AS ?nameWithAnnotation)
BIND(ex:listGet(?pair, 1) AS ?idWithAnnotation)
# Strip off the " (contact)" annotation, if present
BIND(REPLACE(?nameWithAnnotation, " [(]contact[)]", "") AS ?name)
BIND(REPLACE(?idWithAnnotation, " [(]contact[)]", "") AS ?id)
# Set a flag if this record is marked as the contact
BIND(STRENDS(?idWithAnnotation, " (contact)") AS ?isContact)
# Construct the ?researcher IRI
BIND(URI(CONCAT("researcher/", ?id)) AS ?researcher)
# Construct the ?project
BIND(URI(CONCAT("project/", ?project_id)) AS ?project)
# Using IRI() with either the bound ?researcher value or the (necessarily)
# unbound ?undef will result in ?piContact (?pi, respectively) being bound
# only if (not if, respectively) the ?isContact variable is true (false).
# The `false` value will cause a type error and result in the variable
# being unbound.
BIND(IRI(IF(?isContact, ?researcher, ?undef)) AS ?piContact)
BIND(IRI(IF(?isContact, ?undef, ?researcher)) AS ?pi)
}
END
my $parser = Attean->get_parser('SPARQL')->new();
my $store = Attean->get_store('Memory')->new();
my $model = Attean::MutableQuadModel->new( store => $store );
my $graph = Attean::IRI->new('http://example.org/graph');
my $e = Attean::SimpleQueryEvaluator->new( model => $model, default_graph => $graph );
my ($algebra) = $parser->parse($sparql);
my $results = $e->evaluate($algebra, $graph);
my $serializer = Attean->get_serializer('turtle')->new();
$serializer->serialize_iter_to_io(\*STDOUT, $results);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment