Skip to content

Instantly share code, notes, and snippets.

@jsstevenson
Last active August 23, 2021 18:27
Show Gist options
  • Save jsstevenson/9dcaa69f21e9563890e4f1cc75a5ea49 to your computer and use it in GitHub Desktop.
Save jsstevenson/9dcaa69f21e9563890e4f1cc75a5ea49 to your computer and use it in GitHub Desktop.
//
// top level qs:
// * any Array-style components require members?
// * any scalar-style components require values? (ie can be Optional?)
const example = {
// ** reading frame preserved
// mandatory?
// how to (if needed?) represent 'unknown'?
// not required to be included -- fusor // null
// curation tool -- should be yes or no
"r_frame_preserved": true,
// ** reg elements
"regulatory_elements": [
{
"type": "promoter",
"value_id": "hgnc:100",
"label": "ASIC1"
},
{
"type": "enhancer",
"value_id": "hgnc:200",
"label": "G1"
},
// DO THIS:
// * either fill in gene descriptor or gene descriptor ID
// * fill in optional
// what if not normalize?
// * have to provide valid gene
{
"type": "enhancer",
"gene_descriptor_id": "fusion.gene:G1"
}
// should do something like this instead?
// just include gene descriptor id and have a GeneDescriptors property below including them
{
"type": "enhancer",
"gene": { // name the field 'gene'?
"type": "GeneDescriptor"
"id": "fusion.gene:G1", // id should be "fusion.gene:
"value_id": "hgnc:200",
"label": "G1",
}
}
],
// ** protein domains
// what if domain name doesn't normalize to an interpro ID? leave as unknown?
// what if gene symbol doesn't normalize to a gene concept ID?
// what should we put for `id`? something like "fusion.gene:NTRK1"?
// gene descriptor id is just "gene:NTRK1"
// for protein domain, we should already ask for gene, give preloaded list of possible domain names
"protein_domains": [
{
"status": "preserved",
"name": "tyrosine kinase catalytic domain",
"id": "interpro:IPR020635",
"gene": {
// id field here --- "id": "fusion.gene:NTRK1"?
"value_id": "hgnc:8031",
"label": "NTRK1",
"type": "GeneDescriptor"
}
}
],
// ** causative event
// option for 'unknown'? no need
"causative_event": {
"event_type": "rearrangement" // enum: {'rearrangement', 'read-through', 'trans-splicing'}
},
// ** transcript components
//
// components to add/fit?
// * gene (just known gene symbol)
// * unknown
//
// always at least
// gene component:
// make it a descriptor
// unknown_gene: unknown
// must be length >= 2
"transcript_components": [
// unknown genes
// * front end: user chooses either ChromosomeLocation or SequenceLocation?
{
"component_type": "unknown_gene"
"region": {
"sequence_id": "ncbi:NC_000001.11",
"interval": {
"type": "SimpleInterval"
"start": 1000,
"end": 5000,
},
"type": "SequenceLocation"
},
},
{
"component_type": "unknown_gene"
"region": {
"species_id": "<whatever>",
"interval": {
"type": "CytobandInterval"
"start": "p12.1",
"end": "p12.16",
},
"chr": "12",
"type": "ChromosomeLocation"
},
},
{
"component_type": "unknown_gene"
"region": null, // use for "unknown" component?
},
// linker
{
"component_type": "linker_sequence",
"linker_sequence": "ATTATTA" // rename property to "sequence"?
},
// transcript segment component
{
"component_type": "transcript_segment",
"transcript": "NM:002529.3",
"exon_start": 10,
"exon_start_offset": -8,
"exon_end": 17,
"exon_end_offset": 8,
"gene": {
"id": "fusion.gene:NTRK1"
"value_id": "hgnc:8031",
"label": "NTRK1",
"type": "GeneDescriptor"
},
"component_genomic_region": {
"type": "LocationDescription"
"description": null, // ??
"value": {
"seq_id": "ncbi:NC_000001.13",
"start": 154170401,
"end": 1556346348,
"type": "SequenceLocation" // any case where this is ever a ChromosomeLocation? NO
},
},
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment