Last active
October 3, 2016 20:18
-
-
Save bas-stringer/4b484eb2d78afc5329fa455a6ca1b8c5 to your computer and use it in GitHub Desktop.
SCRY @ SWAT4LS 2016
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This SPPARQL query requires SCRY, the scry-blast service, locally accessible BLAST binaries, | |
# and a sequence database containing sequence of every protein in the Human Protein Atlas. | |
# It combines this with a sequence downloaded from UniProt and the RDF dump available from the HPA | |
# to determine 1) which tissues express the query protein (P68871) and 2) how many of its homologs | |
# are coexpressed in those tissues. | |
PREFIX enriched: <http://www.scry.com/enriched/> | |
PREFIX uniprot: <http://www.uniprot.org/uniprot/> | |
PREFIX bfo: <http://purl.obolibrary.org/obo/> | |
PREFIX nif: <http://ontology.neuinfo.org/NIF/Backend/NIF-Quality.owl#> | |
PREFIX tissue: <http://purl.obolibrary.org/obo/caloha.obo#> | |
PREFIX scry: <http://scry.rocks/> | |
PREFIX blast: <http://scry.rocks/blast/> | |
PREFIX in: <http://scry.rocks/input?> | |
PREFIX out: <http://scry.rocks/output?> | |
SELECT ?tissue_name (COUNT(DISTINCT ?homolog) AS ?homologs) { | |
# Bind the UniProt ID URI of the query protein | |
BIND (uniprot:P68871 as ?uniprot) | |
# Invoke BLAST through SCRY and retrieve the ID lines of hits | |
SERVICE <http://my.scry.endpoint> { | |
GRAPH ?g1 {blast:fetch_sequence in: ?uniprot ; out: ?seq } | |
GRAPH ?g2 {blast:blast in: ?seq ; | |
in:evalue "1e-20" ; | |
out:id ?id .} | |
} | |
# Parse the UniProt ID section out of the ID lines -- this relies on using a properly formatted database at the SCRY instance | |
BIND(STRBEFORE(STRAFTER(?id,'|'),'|') AS ?up_id) | |
# Cast the ID into a URI | |
BIND(URI(CONCAT("http://www.uniprot.org/uniprot/",?up_id)) AS ?homolog) | |
FILTER(?homolog != ?uniprot) | |
# Find the query protein's expression levels in all known tissues; filter tissues where it is not detected | |
?ensg enriched:xref ?uniprot . | |
GRAPH ?nanopub { | |
?ensg bfo:BFO_0000066 ?hpa_tissue ; | |
nif:nlx_qual_1010003 ?level . | |
?hpa_tissue a ?tissue | |
} | |
FILTER (!regex(?level, "Not")) | |
# Same for the homologs, but only check for tissues which are already bound... | |
?hom_ensg enriched:xref ?homolog . | |
GRAPH ?hom_nanopub { | |
?hom_ensg bfo:BFO_0000066 ?hpa_tissue ; | |
nif:nlx_qual_1010003 ?hom_level . | |
} | |
FILTER (!regex(?hom_level, "Not")) | |
# Last but not least, name all the tissues | |
GRAPH enriched:CALOHA { | |
?tissue tissue:name ?tissue_name . | |
} | |
} GROUP BY ?tissue_name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install SCRY and the scry-math service, used in the statistics use case | |
# (Requires pip and internet access to the Python Package Index) | |
$ pip install scry | |
$ scry service install scry-math | |
# Launch SCRY (at localhost:5000 by default) so that it can receive queries | |
$ scry start |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adding this Python script to SCRY's "service" directory will register a procedure that inserts | |
# the literal "Hello, world!" in SCRY's queried graph in the position of '?greeting', if queries | |
# contain the graph pattern: | |
# | |
# { <http://scry.rocks/hello_world> <http://scry.rocks/output> ?greeting } | |
# Import the appropriate classes from SCRY and RDFlib | |
from scry.services.classes import Argument, Procedure | |
from rdflib.term import URIRef, Literal | |
# Instantiate the Procedure, with the associated URI as argument | |
proc = Procedure(URIRef('http://scry.rocks/hello_world')) | |
# Define the function SCRY should execute upon encountering the URI above | |
# SCRY will invoke the function with three arguments: | |
# 1. a dictionary with inputs parsed from the SPARQL query | |
# 2. a list with which outputs are expected based on the graph pattern | |
# 3. a pointer to the handler instance executing the procedure | |
# For simplicity's sake, regardless of these arguments, this procedure just returns the "Hello, world!" literal | |
proc.function = lamba inputs,outputs,query_env: Literal('Hello, world!') | |
# Describe which outputs can be generated by this procedure | |
proc.add_output(Argument('message',description='A friendly greeting.')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Online footnotes for the publication titled: | |
"SCRY: extending SPARQL with custom data | |
processing methods for the life sciences" | |
submitted to the Semantic Web Applications and | |
Tools for Life Science conference of 2016. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment