Skip to content

Instantly share code, notes, and snippets.

@theferrit32
Created March 14, 2024 17:37
Show Gist options
  • Save theferrit32/c9347dd7a5db88b986055bfde7f434c4 to your computer and use it in GitHub Desktop.
Save theferrit32/c9347dd7a5db88b986055bfde7f434c4 to your computer and use it in GitHub Desktop.
vrs-python-issue-363
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://github.com/ga4gh/vrs-python/issues/363\n",
"\n",
"```md\n",
"NC_000016.9:g.89306725_89339913del\n",
"curl -X 'GET' \\\n",
" 'https://normalize.cancervariants.org/variation/translate_from?variation=NC_000016.9%3Ag.89306725_89339913del&fmt=hgvs' \\\n",
" -H 'accept: application/json'\n",
"```\n",
"\n",
"```json\n",
"{\n",
" \"query\": {\n",
" \"variation\": \"NC_000016.9:g.89306725_89339913del\",\n",
" \"fmt\": \"hgvs\"\n",
" },\n",
" \"warnings\": [],\n",
" \"service_meta_\": {\n",
" \"name\": \"variation-normalizer\",\n",
" \"version\": \"0.8.1\",\n",
" \"response_datetime\": \"2024-03-14T16:06:49.593965Z\",\n",
" \"url\": \"https://github.com/cancervariants/variation-normalization\"\n",
" },\n",
" \"vrs_python_meta_\": {\n",
" \"name\": \"vrs-python\",\n",
" \"version\": \"2.0.0a2\",\n",
" \"url\": \"https://github.com/ga4gh/vrs-python\"\n",
" },\n",
" \"variation\": {\n",
" \"id\": \"ga4gh:VA.jamp0iMJTRCvYcbawsyBODCIAkwldCM7\",\n",
" \"type\": \"Allele\",\n",
" \"location\": {\n",
" \"id\": \"ga4gh:SL.hwDN-Cy3-sBsiuMvCl-X50U4rGeCLWAI\",\n",
" \"type\": \"SequenceLocation\",\n",
" \"sequenceReference\": {\n",
" \"type\": \"SequenceReference\",\n",
" \"refgetAccession\": \"SQ.W6wLoIFOn4G7cjopxPxYNk2lcEqhLQFb\"\n",
" },\n",
" \"start\": 89306723,\n",
" \"end\": 89339913\n",
" },\n",
" \"state\": {\n",
" \"type\": \"ReferenceLengthExpression\",\n",
" \"length\": 1,\n",
" \"sequence\": \"T\",\n",
" \"repeatSubunitLength\": 33189\n",
" }\n",
" }\n",
"}\n",
"\n",
"```\n",
"\n",
"```md\n",
"NC_000016.9:g.89306725_89339913del\n",
"\n",
"--- 1-indexed positional coordinates (hgvs)\n",
" 89306725....89339913\n",
" |---------| \n",
" G T T C ... A T C ...\n",
"--- 0-indexed interbased coordinates (vrs)\n",
" G T T C ... A T C ...\n",
" | |\n",
" 89306723.....89339913\n",
"\n",
"RLE.length = 1, sequence = \"T\"\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"id\": \"ga4gh:VA.mROPdv6I2EsC8qwKdiBj4EedteLTRctP\",\n",
" \"type\": \"Allele\",\n",
" \"digest\": \"mROPdv6I2EsC8qwKdiBj4EedteLTRctP\",\n",
" \"location\": {\n",
" \"id\": \"ga4gh:SL.BqujMxSzNcw-N3LntN8YSYrFqx8Sx2ig\",\n",
" \"type\": \"SequenceLocation\",\n",
" \"digest\": \"BqujMxSzNcw-N3LntN8YSYrFqx8Sx2ig\",\n",
" \"sequenceReference\": {\n",
" \"type\": \"SequenceReference\",\n",
" \"refgetAccession\": \"SQ.W6wLoIFOn4G7cjopxPxYNk2lcEqhLQFb\"\n",
" },\n",
" \"start\": 89306723,\n",
" \"end\": 89339913\n",
" },\n",
" \"state\": {\n",
" \"type\": \"ReferenceLengthExpression\",\n",
" \"length\": 1,\n",
" \"sequence\": \"T\",\n",
" \"repeatSubunitLength\": 33189\n",
" }\n",
"}\n",
"Original sequence length: 33188\n",
"Normalized sequence length: 33190\n",
"VRS repeatSubunitLength: 33189\n",
"original_sequence[:20]='TCGAGACCAGCCTGGCCAAC'\n",
"normalized_sequence[:20]='TTCGAGACCAGCCTGGCCAA'\n",
"original_sequence[-20:]='AGGTCAAGAGATCGAGACCA'\n",
"normalized_sequence[-20:]='GGTCAAGAGATCGAGACCAT'\n"
]
}
],
"source": [
"import json\n",
"from ga4gh.vrs.extras.translator import AlleleTranslator\n",
"from biocommons.seqrepo import SeqRepo\n",
"from biocommons.seqrepo.dataproxy import SeqRepoDataProxy\n",
"\n",
"seqrepo_path = \"/Users/kferrite/dev/biocommons.seqrepo/seqrepo/2021-01-29\"\n",
"dataproxy = SeqRepoDataProxy(SeqRepo(seqrepo_path))\n",
"\n",
"hgvs_expr = \"NC_000016.9:g.89306725_89339913del\"\n",
"\n",
"# hgvs coordinates minus 1 to convert to 0-based\n",
"start_0 = 89306724\n",
"end_0 = 89339912\n",
"original_sequence = dataproxy.get_sequence(\"NC_000016.9\", start_0, end_0)\n",
"\n",
"translator = AlleleTranslator(dataproxy)\n",
"\n",
"allele = translator._from_hgvs(hgvs_expr)\n",
"print(json.dumps(allele.model_dump(exclude_none=True), indent=2))\n",
"\n",
"normalized_start = allele.location.start\n",
"normalized_end = allele.location.end\n",
"normalized_sequence = dataproxy.get_sequence(\n",
" f\"ga4gh:{allele.location.sequenceReference.refgetAccession}\",\n",
" normalized_start,\n",
" normalized_end,\n",
")\n",
"print(f\"Original sequence length: {len(original_sequence)}\")\n",
"print(f\"Normalized sequence length: {len(normalized_sequence)}\")\n",
"print(f\"VRS repeatSubunitLength: {allele.state.repeatSubunitLength}\")\n",
"print(f\"{original_sequence[:20]=}\")\n",
"print(f\"{normalized_sequence[:20]=}\")\n",
"print(f\"{original_sequence[-20:]=}\")\n",
"print(f\"{normalized_sequence[-20:]=}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment