Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save SandyRogers/36f04c12739a0bf7a644c4cf218232c7 to your computer and use it in GitHub Desktop.
Save SandyRogers/36f04c12739a0bf7a644c4cf218232c7 to your computer and use it in GitHub Desktop.
Package SANNTIS annotations as RO-Crates for MGnify
<!DOCTYPE html>
<html>
<head>
<title>{{ crate.name or "New RO Crate" }}</title>
<meta name="keywords" content="RO Crate">
<style type="text/css">
html {
margin: 0;
padding: 0;
}
body {
font-family: Arial, sans-serif;
color: #333;
font-size: 14px;
background: #eee;
margin: 0;
padding: 0;
}
.main {
max-width: 900px;
margin: auto;
background: #fff;
padding: 4em;
}
.data-entity, .context-entity {
margin: 0.5em 0;
background: #fafafa;
border-left: 3px solid #18974c;
padding: 0.5em;
border-radius: 4px;
}
.context-entity {
border-left: 3px solid #734595;
}
dt {
font-weight: bold;
}
dd {
margin-bottom: 10px;
}
h1 {
font-size: 40px;
}
.wf_image {
padding: 10px 0 10px 0;
width: 100%;
}
.RO_crate_logo {
display: block;
margin-left: auto;
margin-right: auto;
width: 150px;
padding:20px;
}
</style>
</head>
<body>
<div class='main'>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg width="100%" height="100%" viewBox="0 0 100 100" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;" class="RO_crate_logo">
<g transform="matrix(1.21845,0,0,1.21845,-10.9223,-13.4809)">
<g transform="matrix(1,0,0,1,-40.4006,31.1723)">
<path d="M94.866,31.747L94.889,23.304L110.569,23.304L110.569,43.265C108.45,45.316 105.379,47.122 101.357,48.683C97.335,50.244 93.262,51.024 89.138,51.024C83.897,51.024 79.328,49.925 75.432,47.726C71.536,45.527 68.607,42.382 66.648,38.292C64.688,34.202 63.708,29.753 63.708,24.945C63.708,19.727 64.802,15.09 66.99,11.034C69.177,6.978 72.379,3.868 76.594,1.703C79.807,0.039 83.806,-0.792 88.591,-0.792C94.812,-0.792 103.169,3.121 103.169,3.121L95.889,9.991C95.889,9.991 91.508,7.855 88.591,7.855C84.171,7.855 80.656,9.257 78.047,12.059C75.438,14.862 74.133,19.021 74.133,24.535C74.133,30.482 75.455,34.943 78.098,37.916C80.741,40.89 84.205,42.377 88.489,42.377C90.608,42.377 92.733,41.961 94.863,41.129C96.994,40.297 98.822,39.289 100.349,38.104L100.349,31.747L94.866,31.747Z" style="fill:rgb(26,28,26);fill-rule:nonzero;"/>
</g>
<g transform="matrix(0.850731,0,0,1.02684,-17.0584,15.0796)">
<rect x="82.3" y="38.322" width="27.9" height="8.467" style="fill:rgb(26,28,26);"/>
</g>
<g id="g8171" transform="matrix(-0.282039,0,0,0.282039,93.5815,-1.03103)">
<g id="rect8173" transform="matrix(0.912195,0.409756,-0.409756,0.912195,0,0)">
<rect x="151.738" y="6.821" width="15.036" height="131.925" style="fill:rgb(26,28,26);stroke:white;stroke-width:0.78px;"/>
</g>
<g id="rect8175" transform="matrix(0.900702,0.434438,-0.402871,0.915257,0,0)">
<rect x="144.088" y="33.443" width="28.964" height="86.68" style="fill:rgb(26,28,26);stroke:white;stroke-width:0.98px;"/>
</g>
<circle id="circle8177" cx="143.08" cy="132.429" r="22.553" style="fill:rgb(26,28,26);stroke:white;stroke-width:0.78px;"/>
</g>
</g>
<g transform="matrix(1,0,0,1,-7.44699,-6.09141)">
<g transform="matrix(85.3333,0,0,85.3333,5.42212,79.8384)">
<path d="M0.028,-0.299C0.053,-0.299 0.073,-0.306 0.089,-0.319C0.105,-0.332 0.115,-0.35 0.121,-0.372C0.126,-0.395 0.129,-0.433 0.129,-0.487C0.129,-0.542 0.13,-0.577 0.132,-0.595C0.135,-0.622 0.14,-0.644 0.148,-0.661C0.156,-0.677 0.166,-0.691 0.178,-0.7C0.189,-0.71 0.204,-0.718 0.223,-0.723C0.235,-0.726 0.255,-0.728 0.283,-0.728L0.311,-0.728L0.311,-0.651L0.295,-0.651C0.262,-0.651 0.239,-0.645 0.228,-0.633C0.217,-0.621 0.211,-0.594 0.211,-0.551C0.211,-0.466 0.21,-0.412 0.206,-0.39C0.2,-0.355 0.19,-0.328 0.176,-0.309C0.162,-0.29 0.14,-0.273 0.109,-0.259C0.145,-0.244 0.171,-0.221 0.187,-0.19C0.203,-0.159 0.211,-0.109 0.211,-0.039C0.211,0.024 0.212,0.062 0.213,0.074C0.216,0.096 0.223,0.112 0.233,0.121C0.244,0.129 0.264,0.134 0.295,0.134L0.311,0.134L0.311,0.21L0.283,0.21C0.251,0.21 0.228,0.208 0.214,0.203C0.193,0.195 0.176,0.183 0.162,0.166C0.148,0.149 0.14,0.128 0.135,0.103C0.131,0.077 0.129,0.035 0.129,-0.024C0.129,-0.083 0.126,-0.123 0.121,-0.146C0.115,-0.168 0.105,-0.186 0.089,-0.199C0.073,-0.212 0.053,-0.219 0.028,-0.219L0.028,-0.299Z" style="fill:rgb(26,28,26);fill-rule:nonzero;"/>
</g>
</g>
<g transform="matrix(-1,0,0,1,107.306,-6.09141)">
<g transform="matrix(85.3333,0,0,85.3333,5.42212,79.8384)">
<path d="M0.028,-0.299C0.053,-0.299 0.073,-0.306 0.089,-0.319C0.105,-0.332 0.115,-0.35 0.121,-0.372C0.126,-0.395 0.129,-0.433 0.129,-0.487C0.129,-0.542 0.13,-0.577 0.132,-0.595C0.135,-0.622 0.14,-0.644 0.148,-0.661C0.156,-0.677 0.166,-0.691 0.178,-0.7C0.189,-0.71 0.204,-0.718 0.223,-0.723C0.235,-0.726 0.255,-0.728 0.283,-0.728L0.311,-0.728L0.311,-0.651L0.295,-0.651C0.262,-0.651 0.239,-0.645 0.228,-0.633C0.217,-0.621 0.211,-0.594 0.211,-0.551C0.211,-0.466 0.21,-0.412 0.206,-0.39C0.2,-0.355 0.19,-0.328 0.176,-0.309C0.162,-0.29 0.14,-0.273 0.109,-0.259C0.145,-0.244 0.171,-0.221 0.187,-0.19C0.203,-0.159 0.211,-0.109 0.211,-0.039C0.211,0.024 0.212,0.062 0.213,0.074C0.216,0.096 0.223,0.112 0.233,0.121C0.244,0.129 0.264,0.134 0.295,0.134L0.311,0.134L0.311,0.21L0.283,0.21C0.251,0.21 0.228,0.208 0.214,0.203C0.193,0.195 0.176,0.183 0.162,0.166C0.148,0.149 0.14,0.128 0.135,0.103C0.131,0.077 0.129,0.035 0.129,-0.024C0.129,-0.083 0.126,-0.123 0.121,-0.146C0.115,-0.168 0.105,-0.186 0.089,-0.199C0.073,-0.212 0.053,-0.219 0.028,-0.219L0.028,-0.299Z" style="fill:rgb(26,28,26);fill-rule:nonzero;"/>
</g>
</g>
</svg>
<h1>{{ crate.name or "New RO Crate" }}</h1>
<p>
{% if crate.description %}
{{ crate.description }}
{%endif %}
</p>
{% if crate.image %}
<img src="{{ crate.image }}" class="wf_image"/>
{%endif %}
<dl>
{% if crate.creator %}
{% if is_object_list(crate.creator) %}
<dt>Creators</dt>
{% for obj in crate.creator %}
<dd>{{ stringify(obj) }}</dd>
{% endfor %}
{% else %}
<dt>Creator</dt>
<dd>{{ stringify(crate.creator) }}</dd>
{%endif %}
{%endif %}
{% if crate.publisher %}
{% if is_object_list(crate.publisher) %}
<dt>Publishers</dt>
{% for obj in crate.publisher %}
<dd>{{ stringify(obj) }}</dd>
{% endfor %}
{% else %}
<dt>Publisher</dt>
<dd>{{ stringify(crate.publisher) }}</dd>
{%endif %}
{%endif %}
{% if crate.url %}
<dt>URL</dt>
<dd><a href="{{ crate.url }}" target="{{ crate.url }}"></a></dd>
{%endif %}
{% if crate.license %}
<dt>License</dt>
<dd>{{ crate.license }}</dd>
{%endif %}
{% if crate.keywords %}
<dt>Keyword(s)</dt>
<dd>{{ stringify(crate.keywords) }}</dd>
{%endif %}
{% if crate.isBasedOn %}
<dt>isBasedOn</dt>
<dd>{{ crate.isBasedOn }}</dd>
{%endif %}
{% if crate.datePublished %}
<dt>datePublished</dt>
<dd>{{ crate.datePublished }}</dd>
{%endif %}
{% if crate.CreativeWorkStatus %}
<dt>CreativeWorkStatus</dt>
<dd>{{ crate.CreativeWorkStatus }}</dd>
{%endif %}
</dl>
<h2>Contents</h2>
<div id="contents">
{% for entry in data %}
<div class="data-entity" id="">
<strong>Data entity</strong>
<a class="data-entity-link" href="{{ entry['@id'] }}">{{ entry['@id'] }}</a>
<p>Type: {{ stringify(entry['@type']) }}</p>
{% if entry['programmingLanguage'] %}
<p>ProgrammingLanguage: {{ entry['programmingLanguage']['@id'] }}</p>
{% endif %}
</div>
{% endfor %}
</div>
{% if crate.root_dataset.get("variableMeasured") %}
{% if is_object_list(crate.root_dataset.get("variableMeasured")) %}
<h2>Dataset variables</h2>
<div id="variables">
<dt>Variable definitions</dt>
{% for obj in crate.root_dataset.get("variableMeasured") %}
<dd><a href="#{{obj['@id']}}">{{ stringify(obj) }}</a></dd>
{% endfor %}
{% else %}
{{ stringify(crate.publisher) }}
{% endif %}
{% endif %}
<h2>Metadata</h2>
<div id="metadata">
{% for entry in context %}
<a id="{{entry['@id']}}"/>
<div class="context-entity" id="">
<strong>{{ stringify(entry['@type']) }}</strong>
{% if entry['@name'] %}
{% if entry['@name'].startswith('http') %}
<a class="data-entity-link" href="{{ entry['@name'] }}">{{ entry['@name'] }}</a>
{% else %}
{{ entry['@name'] }}
{% endif %}
{% else %}
{% if entry['@id'].startswith('http') %}
<a class="data-entity-link" href="{{ entry['@id'] }}">{{ entry['@id'] }}</a>
{% else %}
{{ entry['@id'] }}
{% endif %}
{% endif %}
<p>
{% for detail in details(entry) %}
<dt>{{ stringify(detail) }}</dt>
<dd>{{ stringify(entry[detail]) }}</dd>
{% endfor %}
</p>
</div>
{% endfor %}
</div>
</div>
</body>
</html>
from rocrate.rocrate import ROCrate
from rocrate.model.dataset import Dataset
from rocrate.model.preview import Preview
from rocrate.model.contextentity import ContextEntity
from uuid import uuid4
import os
from datetime import datetime
from jinja2 import Template
class MGnifyPreview(Preview):
def generate_html(self):
template = open('mgnify-rocrate-preview-template.html.j2')
src = Template(template.read())
def template_function(func):
src.globals[func.__name__] = func
return func
@template_function
def stringify(a):
if type(a) is list:
return ', '.join([stringify(aa) for aa in a])
elif type(a) is str:
return a
elif hasattr(a, '_jsonld') and a._jsonld.get('name'):
return a._jsonld['name']
elif type(a) is dict:
return stringify(list(a.values()))
else:
return a
@template_function
def is_object_list(a):
if type(a) is list:
for obj in a:
if obj is not str:
return True
else:
return False
@template_function
def details(a):
if type(a) is dict:
return {k: v for k, v in a.items() if k not in ['@id', '@type']}
template.close()
context_entities = []
data_entities = []
for entity in self.crate.contextual_entities:
context_entities.append(entity._jsonld)
for entity in self.crate.data_entities:
data_entities.append(entity._jsonld)
out_html = src.render(crate=self.crate, context=context_entities, data=data_entities)
return out_html
def create_sanntis_rocrate(gff_path: str):
try:
assembly = 'ERZ' + gff_path.split('ERZ')[1].split('.')[0].split('_')[0]
except:
print(f'Could not determine assembly accession from path {gff_path}')
return
crate = ROCrate(gen_preview=False)
crate.add(MGnifyPreview(crate))
# Conform to the WFRUN profile
PC_PROFILE_ID = "https://w3id.org/ro/wfrun/process/0.1"
pc_profile = crate.add(ContextEntity(crate, PC_PROFILE_ID, properties={
"@type": "CreativeWork",
"name": "Process Run Crate",
"version": "0.1"
}))
crate.root_dataset["conformsTo"] = pc_profile
crate.name = f'SANNTIS predictions for assembly {assembly}'
crate.description = f"""SanntiS (SMBGC Annotation using Neural Networks Trained on Interpro Signatures) predicts secondary metabolite biosynthetic gene clusters.
This is the output (a GFF feature file) of SanntiS being run on the MGnify assembly {assembly}."""
# Workflow Provenance
sourcecode = crate.add(ContextEntity(crate, "https://github.com/Finn-Lab/SanntiS", properties={
"@type": "SoftwareSourceCode",
"name": "SanntiS: SMBGC Annotation using Neural Networks Trained on Interpro Signatures",
"alternateName": "emeraldBGC",
"url": "https://github.com/Finn-Lab/SanntiS",
"codeRepository": "https://github.com/Finn-Lab/SanntiS",
"version": "0.2.3",
}))
bbsrc = crate.add(ContextEntity(crate, "https://ror.org/00cwqg982", properties={
"@type": "Organization",
"name": "BBSRC",
"alternateName": "Biotechnology and Biological Sciences Research Council",
"url": "http://www.bbsrc.ac.uk/"
}))
emerald_grant = crate.add(ContextEntity(crate, "BB/S009043/1", properties={
"@type": "Grant",
"name": "EMERALD - Enriching MEtagenomics Results using Artificial intelligence and Literature Data",
"url": "https://gtr.ukri.org/projects?ref=BB%2FS009043%2F1"
}))
emerald_grant.append_to("funder", bbsrc)
sourcecode.append_to("funding", emerald_grant)
# The run
agent = crate.add(ContextEntity(crate, "https://ror.org/02catss52", properties={
"@type": "Organization",
"name": "EMBL-EBI",
"url": "https://www.ebi.ac.uk/metagenomics"
}))
crate.creator = agent
fin = os.path.getctime(gff_path)
## Add GFF output file
gff = crate.add_file(
gff_path,
properties={
"name": "annotations gff",
"encodingFormat": "text/x-gff3"
}
)
## Add link
run_id = uuid4().hex
run = crate.add(ContextEntity(crate, run_id, properties={
"@type": "CreateAction",
"name": f"SanntiS run on {assembly}",
"endTime": datetime.fromtimestamp(fin).isoformat(),
"description": "",
}))
run.append_to("result", gff)
run.append_to("agent", agent)
run.append_to("instrument", sourcecode)
## Describe the GFF columns of interest
gff_cols = [
crate.add(ContextEntity(crate, 'gff_attribute_nearest_mibig', properties={
"@type": "PropertyValue",
"name": "Nearest MiBIG",
"url": "https://mibig.secondarymetabolites.org/repository",
"description": "The nearest_MiBIG attribute in the GFF column 9 is the closest predicted BGC from the MiBIG ontology.",
"value": "nearest_MiBIG",
"propertyId": "https://mibig.secondarymetabolites.org/repository/@value",
})),
crate.add(ContextEntity(crate, 'gff_attribute_nearest_mibig_class', properties={
"@type": "PropertyValue",
"name": "Nearest MiBIG class",
"url": "https://mibig.secondarymetabolites.org",
"description": "The nearest_MiBIG_class attribute in the GFF column 9 is one of the 6 (or other) BGC types from the MiBIG ontology.",
"value": "nearest_MiBIG_class",
"propertyId": "https://mibig.secondarymetabolites.org/",
}))
]
for col in gff_cols:
crate.root_dataset.append_to("variableMeasured", col)
crate.write_zip(f"./crates/sanntis_{sourcecode['version']}_{assembly}.zip")
with open('gffPaths.txt', 'r') as paths:
for path in paths.readlines():
print(path)
create_sanntis_rocrate(path.strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment