Last active
August 23, 2021 18:27
-
-
Save jsstevenson/9dcaa69f21e9563890e4f1cc75a5ea49 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// top level qs: | |
// * any Array-style components require members? | |
// * any scalar-style components require values? (ie can be Optional?) | |
const example = { | |
// ** reading frame preserved | |
// mandatory? | |
// how to (if needed?) represent 'unknown'? | |
// not required to be included -- fusor // null | |
// curation tool -- should be yes or no | |
"r_frame_preserved": true, | |
// ** reg elements | |
"regulatory_elements": [ | |
{ | |
"type": "promoter", | |
"value_id": "hgnc:100", | |
"label": "ASIC1" | |
}, | |
{ | |
"type": "enhancer", | |
"value_id": "hgnc:200", | |
"label": "G1" | |
}, | |
// DO THIS: | |
// * either fill in gene descriptor or gene descriptor ID | |
// * fill in optional | |
// what if not normalize? | |
// * have to provide valid gene | |
{ | |
"type": "enhancer", | |
"gene_descriptor_id": "fusion.gene:G1" | |
} | |
// should do something like this instead? | |
// just include gene descriptor id and have a GeneDescriptors property below including them | |
{ | |
"type": "enhancer", | |
"gene": { // name the field 'gene'? | |
"type": "GeneDescriptor" | |
"id": "fusion.gene:G1", // id should be "fusion.gene: | |
"value_id": "hgnc:200", | |
"label": "G1", | |
} | |
} | |
], | |
// ** protein domains | |
// what if domain name doesn't normalize to an interpro ID? leave as unknown? | |
// what if gene symbol doesn't normalize to a gene concept ID? | |
// what should we put for `id`? something like "fusion.gene:NTRK1"? | |
// gene descriptor id is just "gene:NTRK1" | |
// for protein domain, we should already ask for gene, give preloaded list of possible domain names | |
"protein_domains": [ | |
{ | |
"status": "preserved", | |
"name": "tyrosine kinase catalytic domain", | |
"id": "interpro:IPR020635", | |
"gene": { | |
// id field here --- "id": "fusion.gene:NTRK1"? | |
"value_id": "hgnc:8031", | |
"label": "NTRK1", | |
"type": "GeneDescriptor" | |
} | |
} | |
], | |
// ** causative event | |
// option for 'unknown'? no need | |
"causative_event": { | |
"event_type": "rearrangement" // enum: {'rearrangement', 'read-through', 'trans-splicing'} | |
}, | |
// ** transcript components | |
// | |
// components to add/fit? | |
// * gene (just known gene symbol) | |
// * unknown | |
// | |
// always at least | |
// gene component: | |
// make it a descriptor | |
// unknown_gene: unknown | |
// must be length >= 2 | |
"transcript_components": [ | |
// unknown genes | |
// * front end: user chooses either ChromosomeLocation or SequenceLocation? | |
{ | |
"component_type": "unknown_gene" | |
"region": { | |
"sequence_id": "ncbi:NC_000001.11", | |
"interval": { | |
"type": "SimpleInterval" | |
"start": 1000, | |
"end": 5000, | |
}, | |
"type": "SequenceLocation" | |
}, | |
}, | |
{ | |
"component_type": "unknown_gene" | |
"region": { | |
"species_id": "<whatever>", | |
"interval": { | |
"type": "CytobandInterval" | |
"start": "p12.1", | |
"end": "p12.16", | |
}, | |
"chr": "12", | |
"type": "ChromosomeLocation" | |
}, | |
}, | |
{ | |
"component_type": "unknown_gene" | |
"region": null, // use for "unknown" component? | |
}, | |
// linker | |
{ | |
"component_type": "linker_sequence", | |
"linker_sequence": "ATTATTA" // rename property to "sequence"? | |
}, | |
// transcript segment component | |
{ | |
"component_type": "transcript_segment", | |
"transcript": "NM:002529.3", | |
"exon_start": 10, | |
"exon_start_offset": -8, | |
"exon_end": 17, | |
"exon_end_offset": 8, | |
"gene": { | |
"id": "fusion.gene:NTRK1" | |
"value_id": "hgnc:8031", | |
"label": "NTRK1", | |
"type": "GeneDescriptor" | |
}, | |
"component_genomic_region": { | |
"type": "LocationDescription" | |
"description": null, // ?? | |
"value": { | |
"seq_id": "ncbi:NC_000001.13", | |
"start": 154170401, | |
"end": 1556346348, | |
"type": "SequenceLocation" // any case where this is ever a ChromosomeLocation? NO | |
}, | |
}, | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment