Skip to content

Instantly share code, notes, and snippets.

@labra
Last active September 3, 2019 11:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save labra/82318c5bad57adb7cd19221bdf9cd3b0 to your computer and use it in GitHub Desktop.
Save labra/82318c5bad57adb7cd19221bdf9cd3b0 to your computer and use it in GitHub Desktop.
Examples at BioHackathon'19
prefix schema: <http://schema.org/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix bts: <http://discovery.biothings.io/bts/>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix dct: <http://purl.org/dc/terms/>
bts:CTSADatasetShape {
schema:name xsd:string // dct:description "The name of the CTSA Dataset" ;
schema:description xsd:string // dct:description "A description of the CTSA Dataset" ;
schema:url IRI
;
schema:sameAs IRI* ;
schema:keywords xsd:string * ;
schema:datePublished schema:Date ;
schema:dateModified schema:Date or schema:DateTime ;
schema:citation @bts:Citation ;
schema:version xsd:string ;
schema:identifier xsd:string ;
schema:measurementTechnique [
"whole-genome sequencing"
"whole-exome sequencing"
"mass spectrum"
] ;
schema:creator @bts:Person * ;
schema:contributor @bts:Person * ;
schema:publisher @bts:Publisher ;
schema:license . ? ; # TODO
schema:citation . *; # TODO
schema:samples . * ; # TODO
schema:funder . ?; # TODO
schema:includedInDataCatalog @bts:Catalog
}
bts:Person {
schema:name xsd:string ;
schema:url IRI
} // dct:description "Reusable person definition"
bts:Publisher {
schema:name xsd:string ;
schema:url IRI
}
bts:Citation {
schema:identifier .
}
bts:Catalog {
schema:name [ "CTSA Datasets" ] ;
schema:url [ <https://ctsa.ncats.nih.gov/cd2h/> ]
}
{
"@context": {
"schema": "http://schema.org/",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"bts": "http://discovery.biothings.io/bts/"
},
"@graph": [{
"@id": "bts:CTSADataset",
"@type": "rdfs:Class",
"rdfs:comment": "A schema describing Dataset from CTSA center",
"rdfs:label": "CTSADataset",
"rdfs:subClassOf": {
"@id": "schema:Dataset"
},
"$validation": {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"name": {
"$comment": "the starting of inherented fields from schema:Dataset",
"description": "The name of the CTSA Dataset",
"type": "string"
},
"description": {
"description": "A description of the CTSA Dataset",
"type": "string"
},
"url": {
"description": "URL of the Dataset page if available.",
"type": "string",
"format": "uri"
},
"sameAs": {
"description": "URL of a reference Web page that unambiguously indicates the item's identity. E.g. the URL of the item's Wikipedia page, Wikidata entry, or official website.",
"type": "uri"
},
"keywords": {
"description": "Keywords or tags used to describe this content. Multiple entries in a keywords list are typically delimited by commas.",
"type": "array",
"items": {
"type": "string"
}
},
"datePublished": {
"description": "Date of first publication of this dataset.",
"oneOf": [{
"type": "string",
"format": "date-time"
},
{
"type": "string",
"format": "date"
}
]
},
"dateModified": {
"description": "The date on which the dataset was most recently modified.",
"oneOf": [{
"type": "string",
"format": "date-time"
},
{
"type": "string",
"format": "date"
}
]
},
"version": {
"description": "The version of the dataset.",
"type": "string"
},
"identifier": {
"description": "The identifier of the dataset if available, e.g. ID from repositories GEO, EGA.",
"type": "string"
},
"measurementTechnique": {
"description": "The technique or method used to produce the Dataset.",
"enum": [
"whole-genome sequencing",
"whole-exome sequencing",
"mass spectrum"
]
},
"creator": {
"description": "The creater (often the primary contact) of this dataset.",
"$ref": "#/definitions/person"
},
"contributor": {
"description": "The other contributor(s) of this dataset, besides the creator",
"oneOf": [{
"$ref": "#/definitions/person"
},
{
"type": "array",
"items": {
"$ref": "#/definitions/person"
}
}
]
},
"publisher": {
"description": "The organization publish this dataset, usually the host institute.",
"type": "object",
"properties": {
"name": {
"type": "string"
},
"url": {
"type": "string",
"format": "uri"
}
},
"required": ["name"]
},
"license": {
"description": "Specify the data access license",
"type": "object",
"properties": {
"text": {
"type": "string"
},
"url": {
"type": "string",
"format": "uri"
}
},
"required": ["url"]
},
"citation": {
"description": "The citation of this dataset.",
"type": "object",
"properties": {
"text": {
"type": "string"
},
"url": {
"type": "string",
"format": "uri"
},
"identifier": {
"type": "string",
"description": "use PubMed id here"
}
},
"required": ["text"]
},
"samples": {
"description": "sample info of this dataset."
},
"funder": {
"description": "funder info of this dataset."
},
"includedInDataCatalog": {
"description": "This is a fixed CTSA DataCatalog.",
"type": "object",
"properties": {
"name": {
"const": "CTSA Datasets"
},
"url": {
"const": "https://ctsa.ncats.nih.gov/cd2h/"
}
}
}
},
"required": [
"name",
"description",
"creator",
"publisher",
"identifier"
],
"definitions": {
"person": {
"description": "Reusable person definition",
"type": "object",
"properties": {
"name": {
"type": "string"
},
"url": {
"type": "string",
"format": "uri"
}
},
"required": ["name"]
}
}
}
},
{
"@id": "bts:samples",
"@type": "rdf:Property",
"rdfs:comment": "Sample information including size, organism, tissus, etc.",
"rdfs:label": "samples",
"schema:domainIncludes": {
"@id": "bts:CTSADataset"
},
"schema:rangeIncludes": {
"@id": "bts:DataSamples"
}
},
{
"@id": "bts:funder",
"@type": "rdf:Property",
"rdfs:comment": "Funder information",
"rdfs:label": "funder",
"schema:domainIncludes": {
"@id": "bts:CTSADataset"
},
"schema:rangeIncludes": {
"@id": "bts:BioMedicalGrant"
}
},
{
"@id": "bts:DataSamples",
"@type": "rdfs:Class",
"rdfs:comment": "Sample information including size, organism, tissue, etc.",
"rdfs:label": "DataSamples",
"rdfs:subClassOf": {
"@id": "schema:Thing"
},
"$validation": {
"type": "object",
"properties": {
"size": {
"description": "the number of samples",
"type": "integer",
"minimum": 1
},
"organism": {
"description": "the organism of samples come from",
"oneOf": [{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
},
"tissue": {
"description": "the tissue of samples come from",
"oneOf": [{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
}
}
}
},
{
"@id": "bts:size",
"@type": "rdf:Property",
"rdfs:comment": "Sample size",
"rdfs:label": "size",
"schema:domainIncludes": {
"@id": "bts:DataSamples"
},
"schema:rangeIncludes": {
"@id": "schema:Integer"
}
},
{
"@id": "bts:organism",
"@type": "rdf:Property",
"rdfs:comment": "Sample organism",
"rdfs:label": "organism",
"schema:domainIncludes": {
"@id": "bts:DataSamples"
},
"schema:rangeIncludes": {
"@id": "schema:Text"
}
},
{
"@id": "bts:tissue",
"@type": "rdf:Property",
"rdfs:comment": "Sample tissue",
"rdfs:label": "tissue",
"schema:domainIncludes": {
"@id": "bts:DataSamples"
},
"schema:rangeIncludes": {
"@id": "schema:Text"
}
},
{
"@id": "bts:BioMedicalGrant",
"@type": "rdfs:Class",
"rdfs:comment": "BioMedical Grant information, typically from NIH or NSF",
"rdfs:label": "BioMedicalGrant",
"rdfs:subClassOf": {
"@id": "schema:Grant"
},
"$validation": {
"type": "object",
"properties": {
"identifier": {
"description": "the grant identifier, grant number or FAIN",
"type": "string"
},
"institute": {
"description": "The name of the funding institute, e.g. NIH or NSF",
"oneOf": [{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
},
"agency": {
"description": "The agency under the funding institute, e.g. NCATS, NHGRI, NCI etc.",
"oneOf": [{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
}
}
}
},
{
"@id": "bts:institute",
"@type": "rdf:Property",
"rdfs:comment": "The name of the funding institute, e.g. NIH or NSF",
"rdfs:label": "institute",
"schema:domainIncludes": {
"@id": "bts:BioMedicalGrant"
},
"schema:rangeIncludes": {
"@id": "schema:Text"
}
},
{
"@id": "bts:agency",
"@type": "rdf:Property",
"rdfs:comment": "The agency under the funding institute, e.g. NCATS, NHGRI, NCI etc.",
"rdfs:label": "agency",
"schema:domainIncludes": {
"@id": "bts:BioMedicalGrant"
},
"schema:rangeIncludes": {
"@id": "schema:Text"
}
}
]
}
{
"@context": "http://schema.org/",
"@type": "Dataset",
"identifier": "EGAD00001003941",
"name": "Wellderly Dataset from Scripps CTSA center",
"description": "Whole genome sequences of 511 individuals of a cohort of more than 1,400 healthy elderly individuals recruited from across the United States. Ages range from 80 to 105. Participants have not developed any common chronic medical conditions or diseases. Whole genome sequences generated on the Complete Genomics platform.",
"keywords": [
"genomics",
"genetics",
"healthy",
"aging",
"elderly",
"longevity",
"cardiovascular",
"alzheimer's",
"cognition"
],
"url": "https://www.scripps.edu/science-and-medicine/translational-institute/translational-research/genomic-medicine/wellderly/",
"version": "1.0",
"datePublished": "2016-04-16",
"dateModified": "2018-06-27",
"license": {
"@type": "URL",
"text": "Custom Data Access Agreement",
"url": "https://redcapstsi.scripps.edu/redcap/surveys/?s=NT4N7A3KJD"
},
"sameAs": "https://www.ebi.ac.uk/ega/datasets/EGAD00001003941",
"measurementTechnique": "whole-genome sequencing",
"publisher": {
"@type": "Organization",
"name": "Scripps Research",
"url": "http://scripps.edu"
},
"creator": {
"@type": "Person",
"name": "Ali Torkamani",
"url": "https://www.scripps.edu/faculty/torkamani/"
},
"citation": {
"text": "Galina A. Erikson, Dale L. Bodian, Manuel Rueda, Bhuvan Molparia, Erick R. Scott, Ashley A. Scott-Van Zeeland, Sarah E. Topol, Nathan E. Wineinger, John E. Niederhuber, Eric J. Topol, Ali Torkamani, Whole-Genome Sequencing of a Healthy Aging Cohort, Cell, Volume 165, Issue 4, 2016, Pages 1002-1011, ISSN 0092-8674.",
"url": "https://doi.org/10.1016/j.cell.2016.03.022",
"identifier": "27114037"
},
"samples": {
"size": 511,
"organism": "human",
"tissue": "peripheral blood"
},
"funder": {
"identifier": "UL1TR002550",
"institute": "NIH",
"agency": "NCATS"
},
"includedInDataCatalog": {
"@type": "DataCatalog",
"name": "CTSA Datasets",
"url": "https://ctsa.ncats.nih.gov/cd2h/"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment