Last active October 3, 2016
# This SPPARQL query requires SCRY, the scry-blast service, locally accessible BLAST binaries,
# and a sequence database containing sequence of every protein in the Human Protein Atlas.
# It combines this with a sequence downloaded from UniProt and the RDF dump available from the HPA
# to determine 1) which tissues express the query protein (P68871) and 2) how many of its homologs
# are coexpressed in those tissues.
PREFIX enriched: <>
PREFIX uniprot: <>
PREFIX bfo: <>
PREFIX nif: <>
PREFIX tissue: <>
PREFIX scry: <>
PREFIX blast: <>
PREFIX in: <>
PREFIX out: <>
SELECT ?tissue_name (COUNT(DISTINCT ?homolog) AS ?homologs) {
# Bind the UniProt ID URI of the query protein
BIND (uniprot:P68871 as ?uniprot)
# Invoke BLAST through SCRY and retrieve the ID lines of hits
SERVICE <http://my.scry.endpoint> {
GRAPH ?g1 {blast:fetch_sequence in: ?uniprot ; out: ?seq }
GRAPH ?g2 {blast:blast in: ?seq ;
in:evalue "1e-20" ;
out:id ?id .}
# Parse the UniProt ID section out of the ID lines -- this relies on using a properly formatted database at the SCRY instance
BIND(STRBEFORE(STRAFTER(?id,'|'),'|') AS ?up_id)
# Cast the ID into a URI
BIND(URI(CONCAT("",?up_id)) AS ?homolog)
FILTER(?homolog != ?uniprot)
# Find the query protein's expression levels in all known tissues; filter tissues where it is not detected
?ensg enriched:xref ?uniprot .
GRAPH ?nanopub {
?ensg bfo:BFO_0000066 ?hpa_tissue ;
nif:nlx_qual_1010003 ?level .
?hpa_tissue a ?tissue
FILTER (!regex(?level, "Not"))
# Same for the homologs, but only check for tissues which are already bound...
?hom_ensg enriched:xref ?homolog .
GRAPH ?hom_nanopub {
?hom_ensg bfo:BFO_0000066 ?hpa_tissue ;
nif:nlx_qual_1010003 ?hom_level .
FILTER (!regex(?hom_level, "Not"))
# Last but not least, name all the tissues
GRAPH enriched:CALOHA {
?tissue tissue:name ?tissue_name .
} GROUP BY ?tissue_name
# Install SCRY and the scry-math service, used in the statistics use case
# (Requires pip and internet access to the Python Package Index)
$ pip install scry
$ scry service install scry-math
# Launch SCRY (at localhost:5000 by default) so that it can receive queries
$ scry start
# Adding this Python script to SCRY's "service" directory will register a procedure that inserts
# the literal "Hello, world!" in SCRY's queried graph in the position of '?greeting', if queries
# contain the graph pattern:
# { <> <> ?greeting }
# Import the appropriate classes from SCRY and RDFlib
from import Argument, Procedure
from rdflib.term import URIRef, Literal
# Instantiate the Procedure, with the associated URI as argument
proc = Procedure(URIRef(''))
# Define the function SCRY should execute upon encountering the URI above
# SCRY will invoke the function with three arguments:
# 1. a dictionary with inputs parsed from the SPARQL query
# 2. a list with which outputs are expected based on the graph pattern
# 3. a pointer to the handler instance executing the procedure
# For simplicity's sake, regardless of these arguments, this procedure just returns the "Hello, world!" literal
proc.function = lamba inputs,outputs,query_env: Literal('Hello, world!')
# Describe which outputs can be generated by this procedure
proc.add_output(Argument('message',description='A friendly greeting.'))
Online footnotes for the publication titled:
"SCRY: extending SPARQL with custom data
processing methods for the life sciences"
submitted to the Semantic Web Applications and
Tools for Life Science conference of 2016.
