Created
April 3, 2018 21:44
-
-
Save DSuveges/c2759cc001a2b3c3100b4d87b65b6c87 to your computer and use it in GitHub Desktop.
Get COSMIC variants overlapping with a region using the Ensembl REST API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A bash function to retrieve COSMIC variants (ID, location(b38), consequence and phenotype) based on | |
# genomic location using Ensembl's REST API. | |
# The first solution uses only bash's wget and Perl with using the JSON package, | |
# while the second, more compact solution utilizes jq: https://stedolan.github.io/jq/manual/ | |
# The output of the solutions are expected to be the same. | |
# Without jq: | |
function get_COSMIC { wget -q "http://rest.ensembl.org/overlap/region/human/${1}?feature=somatic_variation;content-type=application/json" -O - | perl -MJSON -lane 'foreach $v (@{decode_json($_)}){ print join "\t", $v->{id} if $v->{source} == "COSMIC"}' | while read ID; do wget -q "http://rest.ensembl.org/variation/human/${ID}?content-type=application/json&phenotypes=1" -O - | perl -MJSON -lane '$v = decode_json($_); $id = $v->{name}; $loc = $v->{mappings}->[0]->{location}; $cons = $v->{most_severe_consequence}; my @pheno = (); foreach $p (@{$v->{phenotypes}}){push @pheno, $p->{trait}}; $pheno = join "|", @pheno; $pheno =~ s/ +/_/g; print join "\t", $id, $cons, $pheno, $loc'; done; } | |
# With jq (v.1.5 or higher): | |
function get_COSMIC { wget -q "http://rest.ensembl.org/overlap/region/human/${1}?feature=somatic_variation;content-type=application/json" -O - | jq -r '.[] | .id' | while read ID; do wget -q "http://rest.ensembl.org/variation/human/${ID}?content-type=application/json&phenotypes=1" -O - | jq -r '"\(.name) \(.most_severe_consequence) \([.phenotypes[] | .trait ] | join("|") | gsub(" "; "_")) \(.mappings[0].location)"'; done; } | |
# Sample input: | |
region="7:140424943-140624564" | |
# Usage: | |
get_COSMIC ${region} | |
## Sample output: | |
# COSM5904881 splice_region_variant Skin_tumour 7:140425874-140425874 | |
# COSM5482096 coding_sequence_variant Large_intestine_tumour 7:140425889-140425889 | |
# COSM4492612 coding_sequence_variant Skin_tumour 7:140425896-140425896 | |
# COSM2861240 coding_sequence_variant Large_intestine_tumour 7:140425908-140425908 | |
# COSM4804782 coding_sequence_variant Liver_tumour 7:140425914-140425914 | |
# COSM1312749 coding_sequence_variant Urinary_tract_tumour 7:140425923-140425923 | |
# COSM356225 coding_sequence_variant Lung_tumour 7:140425957-140425957 | |
# COSM3634220 coding_sequence_variant Large_intestine_tumour|Skin_tumour 7:140425965-140425965 | |
# COSM4498272 coding_sequence_variant Skin_tumour|Upper_aerodigestive_tract_tumour 7:140426012-140426012 | |
# COSM1086225 coding_sequence_variant Endometrium_tumour 7:140426013-140426013 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment