Created
January 20, 2018 01:40
-
-
Save neksa/2a648c98fef01630dc61f78e97276512 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene" | |
def get_profile(fname, assembly=37): | |
""" | |
Calling MutaGene REST API to convert a VCF file into a mutational profile (96 context-dependent mutational probabilities) | |
and profile_counts (counts of mutations for each of the 96 context-dependent mutations) | |
It is important to specify genome assembly correctly. Curently 19, 37 and 38 will work | |
""" | |
url = MUTAGENE_URL + '/pub/api/identify/profile' | |
files = {'file': open(fname, 'rb')} | |
r = requests.post(url, files=files, data={'assembly': assembly}) | |
# print("STATUS", r.status_code) | |
if r.status_code == 200: | |
return r.json()['result_counts'] | |
def get_decomposition(profile_counts, signatures='COSMIC30'): | |
""" | |
Decomposition of mutational profiles into a combination of signatures. | |
It is highly recommended to use profile_counts instead of profile in order to use Maximum Likelihood method | |
*signatures* should be one of COSMIC30 MUTAGENE5 MUTAGENE10 | |
*others_threshold* is used for not reporting signatures with exposure less or equal than the threshold and reporting the sum of their exposures as "Other signatures". | |
Set *others_threshold* to 0 if not needed. The MutaGene website uses others_threshold = 0.05 by default. | |
""" | |
url = MUTAGENE_URL + '/pub/api/identify/decomposition' | |
r = requests.post(url, data={'profile_counts': json.dumps(profile_counts), 'signatures': signatures, 'others_threshold': 0.0}) | |
# print("STATUS", r.status_code) | |
if r.status_code == 200: | |
return r.json()['decomposition'] | |
def print_profile_counts(profile_counts): | |
""" | |
Printing context-dependent mutational profile | |
""" | |
for mutation, value in profile.items(): | |
print("{}\t{:.0f}".format(mutation, value)) | |
print() | |
def print_decomposition(decomposition): | |
""" | |
Printing the results of decomposition | |
""" | |
for component in decomposition: | |
print("{}\t{:.2f}\t{:.0f}".format(component['name'], component['score'], component['mutations'])) | |
print() | |
if __name__ == '__main__': | |
vcf_files = ['test.vcf', ] | |
for file_name in vcf_files: | |
profile = get_profile(file_name, assembly=37) | |
print_profile_counts(profile) | |
if profile is not None: | |
for signature_type in ('COSMIC30', 'MUTAGENE5', 'MUTAGENE10'): | |
decomposition = get_decomposition(profile, signature_type) | |
print_decomposition(decomposition) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Test output for my VCF is