Skip to content

Instantly share code, notes, and snippets.

@magaliruffier
Last active August 29, 2015 14:03
Show Gist options
  • Save magaliruffier/dc82aa402afee95d7177 to your computer and use it in GitHub Desktop.
Save magaliruffier/dc82aa402afee95d7177 to your computer and use it in GitHub Desktop.
{
{
"input": "1 230845794 230845794 A G . . .",
"colocated_variants": [
{
"aa_maf": 0.173173,
"ea_maf": 0.425814,
"amr_maf": 0.36,
"strand": 1,
"id": "rs699",
"asn_maf": 0.16,
"allele_string": "A/G",
"minor_allele_freq": 0.3384,
"afr_maf": 0.13,
"clin_sig": [
"other"
],
"eur_maf": 0.41,
"end": 230845794,
"minor_allele": "A",
"start": 230845794,
"pubmed": [
18513389,
19131662,
19263529,
19330901,
19559392,
18603647,
23021345,
20570668,
18069999,
18248681,
18279468,
18637188,
18653189,
18698212,
18953568,
19108684,
19932491,
20029521,
20061926,
20185782,
20486282,
20592051,
21261619,
21304999,
21306748,
21438754,
21988197,
22531885,
22569109,
22858200,
23132613,
23251296,
23333443,
23354977,
23681449,
23716723
]
},
{
"strand": 1,
"id": "CM920010",
"allele_string": "HGMD_MUTATION",
"end": 230845794,
"start": 230845794
},
{
"strand": 1,
"id": "COSM425562",
"allele_string": "A/G",
"end": 230845794,
"start": 230845794
}
],
"end": 230845794,
"seq_region_name": "1",
"transcript_consequences": [
{
"hgvsp": "ENSP00000355627.4:p.Met268Thr",
"variant_allele": "G",
"polyphen_score": 0,
"cdna_end": 1018,
"codons": "aTg/aCg",
"hgvsc": "ENST00000366667.4:c.803N>C",
"protein_end": 268,
"amino_acids": "M/T",
"strand": -1,
"gene_symbol": "AGT",
"transcript_id": "ENST00000366667",
"cdna_start": 1018,
"gene_id": "ENSG00000135744",
"cds_start": 803,
"canonical": 1,
"polyphen_prediction": "benign",
"sift_prediction": "tolerated",
"protein_start": 268,
"biotype": "protein_coding",
"gene_symbol_source": "HGNC",
"sift_score": 1,
"cds_end": 803,
"consequence_terms": [
"missense_variant"
]
},
{
"variant_allele": "G",
"distance": 650,
"strand": -1,
"gene_symbol": "RP11-99J16__A.2",
"transcript_id": "ENST00000412344",
"gene_id": "ENSG00000244137",
"biotype": "antisense",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"downstream_gene_variant"
]
}
],
"strand": 1,
"regulatory_feature_consequences": [
{
"variant_allele": "G",
"consequence_terms": [
"regulatory_region_variant"
],
"regulatory_feature_id": "ENSR00001529861"
}
],
"id": "230845794",
"most_severe_consequence": "missense_variant",
"allele_string": "A/G",
"start": 230845794
},
{
"input": "11 128496 128496 T G . . .",
"colocated_variants": [
{
"strand": 1,
"id": "rs111606699",
"allele_string": "T/G",
"end": 128496,
"start": 128496
}
],
"end": 128496,
"seq_region_name": "11",
"motif_feature_consequences": [
{
"variant_allele": "G",
"motif_feature_id": "MA0162.1",
"motif_name": "Jaspar_Matrix_Egr1:MA0162.1",
"high_inf_pos": "N",
"consequence_terms": [
"TF_binding_site_variant"
],
"motif_pos": 5,
"motif_score_change": -0.027,
"strand": 1
}
],
"transcript_consequences": [
{
"variant_allele": "G",
"distance": 2712,
"strand": -1,
"gene_symbol": "RP11-304M2.6",
"transcript_id": "ENST00000529266",
"gene_id": "ENSG00000254468",
"canonical": 1,
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"upstream_gene_variant"
]
},
{
"variant_allele": "G",
"distance": 783,
"strand": 1,
"gene_symbol": "RP11-304M2.3",
"transcript_id": "ENST00000527297",
"gene_id": "ENSG00000255229",
"canonical": 1,
"biotype": "antisense",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"upstream_gene_variant"
]
},
{
"variant_allele": "G",
"hgvsc": "ENST00000526704.2:n.2160-848N>C",
"strand": -1,
"gene_symbol": "LINC01001",
"transcript_id": "ENST00000526704",
"gene_id": "ENSG00000230724",
"biotype": "lincRNA",
"gene_symbol_source": "HGNC",
"consequence_terms": [
"intron_variant",
"nc_transcript_variant"
]
},
{
"variant_allele": "G",
"distance": 2373,
"strand": 1,
"gene_symbol": "CICP23",
"transcript_id": "ENST00000605013",
"gene_id": "ENSG00000270921",
"canonical": 1,
"biotype": "processed_pseudogene",
"gene_symbol_source": "HGNC",
"consequence_terms": [
"downstream_gene_variant"
]
},
{
"variant_allele": "G",
"hgvsc": "ENST00000540375.1:n.1760-480N>C",
"strand": -1,
"gene_symbol": "LINC01001",
"transcript_id": "ENST00000540375",
"gene_id": "ENSG00000230724",
"canonical": 1,
"biotype": "lincRNA",
"gene_symbol_source": "HGNC",
"consequence_terms": [
"intron_variant",
"nc_transcript_variant"
]
}
],
"strand": 1,
"regulatory_feature_consequences": [
{
"variant_allele": "G",
"consequence_terms": [
"regulatory_region_variant"
],
"regulatory_feature_id": "ENSR00000556729"
}
],
"id": "128496",
"most_severe_consequence": "intron_variant",
"allele_string": "T/G",
"start": 128496
},
{
"input": "21 9462266 9462266 G T . . .",
"colocated_variants": [
{
"strand": 1,
"id": "rs71252742",
"allele_string": "G/T",
"end": 9462266,
"start": 9462266
},
{
"strand": 1,
"id": "rs373173311",
"allele_string": "G/T",
"end": 9462266,
"start": 9462266
}
],
"end": 9462266,
"seq_region_name": "21",
"strand": 1,
"id": "9462266",
"most_severe_consequence": "intergenic_variant",
"intergenic_consequences": [
{
"variant_allele": "T",
"consequence_terms": [
"intergenic_variant"
]
}
],
"allele_string": "G/T",
"start": 9462266
},
{
"input": "1 160283 sv1 . <DUP> . . SVTYPE=DUP;END=471362",
"end": 471362,
"seq_region_name": "1",
"variant_class": "duplication",
"strand": 1,
"transcript_consequences": [
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 895,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000432964",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 91,
"strand": -1,
"gene_symbol": "AL732372.1",
"transcript_id": "ENST00000540477",
"gene_id": "ENSG00000256186",
"canonical": 1,
"biotype": "pseudogene",
"gene_symbol_source": "Clone_based_ensembl_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 6870,
"strand": -1,
"gene_symbol": "AP006222.2",
"transcript_id": "ENST00000442116",
"gene_id": "ENSG00000228463",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 180,
"strand": 1,
"gene_symbol": "RP4-669L17.4",
"transcript_id": "ENST00000445840",
"gene_id": "ENSG00000224813",
"canonical": 1,
"biotype": "transcribed_unprocessed_pseudogene",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 3698,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000425496",
"gene_id": "ENSG00000237094",
"canonical": 1,
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 31142,
"strand": -1,
"gene_symbol": "AP006222.2",
"transcript_id": "ENST00000424587",
"gene_id": "ENSG00000228463",
"canonical": 1,
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 363,
"strand": -1,
"gene_symbol": "WBP1LP7",
"transcript_id": "ENST00000437905",
"gene_id": "ENSG00000269732",
"canonical": 1,
"biotype": "processed_pseudogene",
"gene_symbol_source": "HGNC",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 732,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000431321",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 1323,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000599771",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 4300,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000423728",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 2879,
"strand": -1,
"gene_symbol": "RP11-34P13.13",
"transcript_id": "ENST00000491962",
"gene_id": "ENSG00000241860",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 1080,
"strand": 1,
"gene_symbol": "RP11-34P13.9",
"transcript_id": "ENST00000496488",
"gene_id": "ENSG00000241599",
"canonical": 1,
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 6848,
"strand": -1,
"gene_symbol": "RP4-669L17.2",
"transcript_id": "ENST00000450983",
"gene_id": "ENSG00000236601",
"canonical": 1,
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 385,
"strand": 1,
"gene_symbol": "RP4-669L17.1",
"transcript_id": "ENST00000458203",
"gene_id": "ENSG00000236679",
"canonical": 1,
"biotype": "processed_pseudogene",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 14358,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000440163",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 10645,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000601814",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 1763,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000601486",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 1854,
"strand": -1,
"gene_symbol": "AP006222.2",
"transcript_id": "ENST00000335577",
"gene_id": "ENSG00000228463",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 2624,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000608420",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 2299,
"strand": 1,
"gene_symbol": "RP5-857K21.15",
"transcript_id": "ENST00000441866",
"gene_id": "ENSG00000236743",
"canonical": 1,
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 10664,
"strand": -1,
"gene_symbol": "AP006222.2",
"transcript_id": "ENST00000448958",
"gene_id": "ENSG00000228463",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 4488,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000431812",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 112027,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000455207",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 995,
"strand": 1,
"gene_symbol": "OR4F29",
"transcript_id": "ENST00000426406",
"gene_id": "ENSG00000235249",
"canonical": 1,
"biotype": "protein_coding",
"gene_symbol_source": "HGNC",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 6639,
"strand": -1,
"gene_symbol": "RP4-669L17.2",
"transcript_id": "ENST00000412666",
"gene_id": "ENSG00000236601",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"distance": 2397,
"strand": -1,
"gene_symbol": "RNU6-1100P",
"transcript_id": "ENST00000410691",
"gene_id": "ENSG00000222623",
"canonical": 1,
"biotype": "snRNA",
"gene_symbol_source": "HGNC",
"consequence_terms": [
"upstream_gene_variant"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 20729,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000455464",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 3056,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000453935",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 363,
"strand": -1,
"gene_symbol": "AP006222.1",
"transcript_id": "ENST00000424429",
"gene_id": "ENSG00000241670",
"biotype": "processed_pseudogene",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 7154,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000440038",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 49.42,
"bp_overlap": 13579,
"strand": -1,
"gene_symbol": "RP11-34P13.13",
"transcript_id": "ENST00000466557",
"gene_id": "ENSG00000241860",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"non_coding_exon_variant",
"intron_variant",
"nc_transcript_variant"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 2806,
"strand": -1,
"gene_symbol": "CICP7",
"transcript_id": "ENST00000432723",
"gene_id": "ENSG00000233653",
"canonical": 1,
"biotype": "processed_pseudogene",
"gene_symbol_source": "HGNC",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 2284,
"strand": 1,
"gene_symbol": "RP4-669L17.10",
"transcript_id": "ENST00000419160",
"gene_id": "ENSG00000237094",
"biotype": "lincRNA",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 2017,
"strand": 1,
"gene_symbol": "RP4-669L17.8",
"transcript_id": "ENST00000514436",
"gene_id": "ENSG00000250575",
"canonical": 1,
"biotype": "unprocessed_pseudogene",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
},
{
"variant_allele": "duplication",
"percentage_overlap": 100,
"bp_overlap": 457,
"strand": -1,
"gene_symbol": "AP006222.1",
"transcript_id": "ENST00000450734",
"gene_id": "ENSG00000241670",
"canonical": 1,
"biotype": "processed_pseudogene",
"gene_symbol_source": "Clone_based_vega_gene",
"consequence_terms": [
"transcript_amplification"
]
}
],
"regulatory_feature_consequences": [
{
"variant_allele": "duplication",
"percentage_overlap": 87.67,
"consequence_terms": [
"regulatory_region_variant"
],
"bp_overlap": 576,
"regulatory_feature_id": "ENSR00000278273"
}
],
"id": "sv1",
"most_severe_consequence": "transcript_amplification",
"start": 471362
}
}
@heuermh
Copy link

heuermh commented Jul 6, 2014

Great, thanks!

Would it be possible to include the reference allele as a separate field? It is usually present in "allele_string": "A/G" but not always "allele_string": "HGMD_MUTATION" (although that is an odd one).

@andrewyatz
Copy link

Numbers which are strings are a problem with Perl and any text based transfer protocol. Perl says it doesn't care if it's a number until we want to eval it as one and then it'll do the checking/conversion. As @magaliruffier says it's a simple fix by just either doing +0 or *1 to any number.

As for the Boolean situation yes true is the correct way to represent this information. Our Json library does handle this via Types::Serialiser and it's true and false values. Though this does mean rewriting the output for a number of endpoints. I'm in favour of the change but this will have to be evaluated as to the impact of the change. If it's too large we should then schedule the change for the next major bump.

@magaliruffier
Copy link
Author

The reference allele should correspond to the first allele in your input (if searching by region), as well as the first allele in the allele_string field
in cases where the allele_string does not contain that information, we don't always know what the reference allele is
HGMD_mutation tells us there is a known mutation in the location, but we don't know what reference sequence they used, which might be different from our reference (refseq gene vs GRCh37 assembly for example)
We will look into adding the reference_allele field, but not in this implementation

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment