Skip to content

Instantly share code, notes, and snippets.

@neksa
Created January 20, 2018 01:40
Show Gist options
  • Save neksa/2a648c98fef01630dc61f78e97276512 to your computer and use it in GitHub Desktop.
Save neksa/2a648c98fef01630dc61f78e97276512 to your computer and use it in GitHub Desktop.
import requests
import json
MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"
def get_profile(fname, assembly=37):
"""
Calling MutaGene REST API to convert a VCF file into a mutational profile (96 context-dependent mutational probabilities)
and profile_counts (counts of mutations for each of the 96 context-dependent mutations)
It is important to specify genome assembly correctly. Curently 19, 37 and 38 will work
"""
url = MUTAGENE_URL + '/pub/api/identify/profile'
files = {'file': open(fname, 'rb')}
r = requests.post(url, files=files, data={'assembly': assembly})
# print("STATUS", r.status_code)
if r.status_code == 200:
return r.json()['result_counts']
def get_decomposition(profile_counts, signatures='COSMIC30'):
"""
Decomposition of mutational profiles into a combination of signatures.
It is highly recommended to use profile_counts instead of profile in order to use Maximum Likelihood method
*signatures* should be one of COSMIC30 MUTAGENE5 MUTAGENE10
*others_threshold* is used for not reporting signatures with exposure less or equal than the threshold and reporting the sum of their exposures as "Other signatures".
Set *others_threshold* to 0 if not needed. The MutaGene website uses others_threshold = 0.05 by default.
"""
url = MUTAGENE_URL + '/pub/api/identify/decomposition'
r = requests.post(url, data={'profile_counts': json.dumps(profile_counts), 'signatures': signatures, 'others_threshold': 0.0})
# print("STATUS", r.status_code)
if r.status_code == 200:
return r.json()['decomposition']
def print_profile_counts(profile_counts):
"""
Printing context-dependent mutational profile
"""
for mutation, value in profile.items():
print("{}\t{:.0f}".format(mutation, value))
print()
def print_decomposition(decomposition):
"""
Printing the results of decomposition
"""
for component in decomposition:
print("{}\t{:.2f}\t{:.0f}".format(component['name'], component['score'], component['mutations']))
print()
if __name__ == '__main__':
vcf_files = ['test.vcf', ]
for file_name in vcf_files:
profile = get_profile(file_name, assembly=37)
print_profile_counts(profile)
if profile is not None:
for signature_type in ('COSMIC30', 'MUTAGENE5', 'MUTAGENE10'):
decomposition = get_decomposition(profile, signature_type)
print_decomposition(decomposition)
@neksa
Copy link
Author

neksa commented Jan 20, 2018

Test output for my VCF is

            A[C>A]A 119
            A[C>A]C 114
            A[C>A]G 86
            A[C>A]T 64
            A[C>G]A 174
            A[C>G]C 90
            A[C>G]G 123
            A[C>G]T 125
            A[C>T]A 475
            A[C>T]C 372
            A[C>T]G 1423
            A[C>T]T 480
            A[T>A]A 53
            A[T>A]C 74
            A[T>A]G 68
            A[T>A]T 58
            A[T>C]A 466
            A[T>C]C 330
            A[T>C]G 1344
            A[T>C]T 512
            A[T>G]A 69
            A[T>G]C 59
            A[T>G]G 95
            A[T>G]T 67
            C[C>A]A 126
            C[C>A]C 136
            C[C>A]G 157
            C[C>A]T 96
            C[C>G]A 129
            C[C>G]C 147
            C[C>G]G 197
            C[C>G]T 165
            C[C>T]A 372
            C[C>T]C 397
            C[C>T]G 1594
            C[C>T]T 538
            C[T>A]A 25
            C[T>A]C 60
            C[T>A]G 114
            C[T>A]T 81
            C[T>C]A 316
            C[T>C]C 389
            C[T>C]G 1356
            C[T>C]T 542
            C[T>G]A 45
            C[T>G]C 153
            C[T>G]G 170
            C[T>G]T 90
            G[C>A]A 123
            G[C>A]C 110
            G[C>A]G 163
            G[C>A]T 77
            G[C>G]A 145
            G[C>G]C 144
            G[C>G]G 143
            G[C>G]T 129
            G[C>T]A 330
            G[C>T]C 396
            G[C>T]G 1067
            G[C>T]T 481
            G[T>A]A 42
            G[T>A]C 54
            G[T>A]G 81
            G[T>A]T 52
            G[T>C]A 295
            G[T>C]C 359
            G[T>C]G 912
            G[T>C]T 441
            G[T>G]A 67
            G[T>G]C 90
            G[T>G]G 154
            G[T>G]T 84
            T[C>A]A 75
            T[C>A]C 87
            T[C>A]G 48
            T[C>A]T 60
            T[C>G]A 157
            T[C>G]C 137
            T[C>G]G 130
            T[C>G]T 181
            T[C>T]A 290
            T[C>T]C 331
            T[C>T]G 728
            T[C>T]T 399
            T[T>A]A 18
            T[T>A]C 52
            T[T>A]G 28
            T[T>A]T 54
            T[T>C]A 251
            T[T>C]C 332
            T[T>C]G 633
            T[T>C]T 344
            T[T>G]A 60
            T[T>G]C 124
            T[T>G]G 109
            T[T>G]T 88

            COSMIC 1    0.29    7113
            COSMIC 26   0.21    5195
            COSMIC 3    0.18    4526
            COSMIC 20   0.15    3743
            COSMIC 12   0.10    2432
            COSMIC 30   0.06    1591
            COSMIC 24   0.01    265

            MUTAGENE A.3    0.52    12979
            MUTAGENE A.5    0.25    6347
            MUTAGENE A.4    0.11    2621
            MUTAGENE A.1    0.07    1745
            MUTAGENE A.2    0.05    1201

            MUTAGENE B.6    0.35    8676
            MUTAGENE B.1    0.15    3855
            MUTAGENE B.2    0.13    3302
            MUTAGENE B.7    0.11    2836
            MUTAGENE B.9    0.06    1545
            MUTAGENE B.3    0.05    1300
            MUTAGENE B.10   0.05    1206
            MUTAGENE B.4    0.04    985
            MUTAGENE B.5    0.03    863
            MUTAGENE B.8    0.01    328

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment