Skip to content

Instantly share code, notes, and snippets.

@carolinecunningham
Created August 9, 2018 21:41
Show Gist options
  • Save carolinecunningham/bcd5895c67b85226d94cda80fe5d8869 to your computer and use it in GitHub Desktop.
Save carolinecunningham/bcd5895c67b85226d94cda80fe5d8869 to your computer and use it in GitHub Desktop.
API for motifs and signatures
import requests
import json
import csv
import glob
# MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"
#MUTAGENE_URL = "https://dev.ncbi.nlm.nih.gov/research/mutagene"
MUTAGENE_URL_M = "http://mwebdev2/research/mutagene"
# MUTAGENE_URL = "http://localhost:5000"
MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"
path = '/Users/cunninghamck/PycharmProjects/motifs/*.txt'
def get_motifs(fname, assembly=37):
"""
Identify mutational motifs in a VCF or MAF sample
"""
url = MUTAGENE_URL_M + '/pub/api/identify/motifs'
files = {'file': open(fname, 'rb')}
r = requests.post(url, files=files, data={'assembly': assembly})
if r.status_code == 200:
return r.json()['motifs']
def print_motifs(motifs):
"""
Printing the results of motif identification
"""
if motifs is None:
#print("Empty")
return
#for m in motifs:
#print(m['motif'], m['mutations'])
def return_motifs(motifs):
"""
Printing the results of motif identification
"""
listed = {"\t":"\t"}
if motifs is None:
listed['APOBEC'] = 0
listed['AGEING'] = 0
listed['Tobacco'] = 0
listed['AID'] = 0
listed['UV Light'] = 0
listed['Aristolochic Acid (AA)'] = 0
return listed
for m in motifs:
listed[m['name']] = int(m['mutations'])
return listed
def get_profile(fname, assembly=37):
"""
Calling MutaGene REST API to convert a VCF file into a mutational profile (96 context-dependent mutational probabilities)
and profile_counts (counts of mutations for each of the 96 context-dependent mutations)
It is important to specify genome assembly correctly. Curently 19, 37 and 38 will work
"""
url = MUTAGENE_URL + '/pub/api/identify/profile'
files = {'file': open(fname, 'rb')}
r = requests.post(url, files=files, data={'assembly': assembly})
# print("STATUS", r.status_code)
if r.status_code == 200:
return r.json()['result_counts']
def get_decomposition(profile_counts, signatures='COSMIC30'):
"""
Decomposition of mutational profiles into a combination of signatures.
It is highly recommended to use profile_counts instead of profile in order to use Maximum Likelihood method
*signatures* should be one of COSMIC30 MUTAGENE5 MUTAGENE10
*others_threshold* is used for not reporting signatures with exposure less or equal than the threshold and reporting the sum of their exposures as "Other signatures".
Set *others_threshold* to 0 if not needed. The MutaGene website uses others_threshold = 0.05 by default.
"""
url = MUTAGENE_URL + '/pub/api/identify/decomposition'
r = requests.post(url, data={'profile_counts': json.dumps(profile_counts), 'signatures': signatures, 'others_threshold': 0.0})
# print("STATUS", r.status_code)
if r.status_code == 200:
return r.json()['decomposition']
def print_profile_counts(profile_counts):
"""
Printing context-dependent mutational profile
"""
for mutation, value in profile.items():
print("{}\t{:.0f}".format(mutation, value))
print()
def print_decomposition(decomposition):
"""
Printing the results of decomposition
"""
for component in decomposition:
print("{}\t{:.2f}\t{:.0f}".format(component['name'], component['score'], component['mutations']))
print()
def return_decomposition(decomposition):
"""
Printing the results of decomposition
"""
listed = {}
for component in decomposition:
name = str(component['name'])
mut = component['mutations']
listed[name] = mut
return listed
if __name__ == '__main__':
for filename in glob.glob(path):
vcf_files = [filename, ]
with open("further_analysis.csv", "a") as csvfile: # create csv file
for file_name in vcf_files:
motif_name = get_motifs(file_name, assembly=37)
value_list = return_motifs(motif_name)
mut = return_motifs(motif_name)
if len(mut) == 1:
continue
profile = get_profile(file_name, assembly=37)
if profile is not None:
for signature_type in ('MUTAGENE5', 'MUTAGENE10'):
decomposition = get_decomposition(profile, signature_type)
if signature_type in 'MUTAGENE5':
result_a = return_decomposition(decomposition)
if signature_type in 'MUTAGENE10':
result_b = return_decomposition(decomposition)
filewriter = csv.writer(csvfile, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(["File name", "\t", "T[C>K]W", "[C>T]G", "Y[C>T]", "N[T>S]W", "[C>R]G", "C[T>A]G]", "\t", "A.1 Tobacco", 'A.2 AID/APOBEC', "A.3 Unknown Etiology ",
"A.4 UV Light", "A.5 Deanimation of Methyl-Cytosine",
"B.1 Deanimation of Methyl-Cytosine", 'B.2 Unknown Etiology',
"B.3 Aristolochic Acid", "B.4 Tobacco or Aflotoxin", "B.5 AID/APOBEC",
"B.6 Unknown Etiology", "B.7 Unknown Etiology", "B.8 DNA Polymerase Epsilon",
"B.9 UV Radiation", "B.10 Defective DNA Mismatch Repair"])
filewriter.writerow([filename[43:], mut["\t"], mut['APOBEC'], mut['AGEING'], mut['UV Light'], mut['AID'],
mut['Tobacco'], mut['Aristolochic Acid (AA)'], mut["\t"],
result_a["MUTAGENE A.1"], result_a["MUTAGENE A.2"], result_a["MUTAGENE A.3"],
result_a["MUTAGENE A.4"], result_a["MUTAGENE A.5"], result_b["MUTAGENE B.1"],
result_b["MUTAGENE B.2"], result_b["MUTAGENE B.3"], result_b["MUTAGENE B.4"],
result_b["MUTAGENE B.5"], result_b["MUTAGENE B.6"], result_b["MUTAGENE B.7"],
result_b["MUTAGENE B.8"], result_b["MUTAGENE B.9"], result_b["MUTAGENE B.10"]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment