carolinecunningham/test_motif_sig_api.py

## test_motif_sig_api.py
import requests
import json
import csv
import glob

# MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"
#MUTAGENE_URL = "https://dev.ncbi.nlm.nih.gov/research/mutagene"
MUTAGENE_URL_M = "http://mwebdev2/research/mutagene"
# MUTAGENE_URL = "http://localhost:5000"
MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"

path = '/Users/cunninghamck/PycharmProjects/motifs/*.txt'

def get_motifs(fname, assembly=37):
    """
    Identify mutational motifs in a VCF or MAF sample
    """
    url = MUTAGENE_URL_M + '/pub/api/identify/motifs'
    files = {'file': open(fname, 'rb')}
    r = requests.post(url, files=files, data={'assembly': assembly})
    if r.status_code == 200:
        return r.json()['motifs']


def print_motifs(motifs):
    """
    Printing the results of motif identification
    """
    if motifs is None:
        #print("Empty")
        return

    #for m in motifs:
        #print(m['motif'], m['mutations'])

def return_motifs(motifs):
    """
    Printing the results of motif identification
    """
    listed = {"\t":"\t"}
    if motifs is None:
        listed['APOBEC'] = 0
        listed['AGEING'] = 0
        listed['Tobacco'] = 0
        listed['AID'] = 0
        listed['UV Light'] = 0
        listed['Aristolochic Acid (AA)'] = 0
        return listed
    for m in motifs:
        listed[m['name']] = int(m['mutations'])
    return listed

def get_profile(fname, assembly=37):
    """
    Calling MutaGene REST API to convert a VCF file into a mutational profile (96 context-dependent mutational probabilities)
    and profile_counts (counts of mutations for each of the 96 context-dependent mutations)
    It is important to specify genome assembly correctly. Curently 19, 37 and 38 will work
    """
    url = MUTAGENE_URL + '/pub/api/identify/profile'
    files = {'file': open(fname, 'rb')}
    r = requests.post(url, files=files, data={'assembly': assembly})
    # print("STATUS", r.status_code)
    if r.status_code == 200:
        return r.json()['result_counts']


def get_decomposition(profile_counts, signatures='COSMIC30'):
    """
    Decomposition of mutational profiles into a combination of signatures.
    It is highly recommended to use profile_counts instead of profile in order to use Maximum Likelihood method
    *signatures* should be one of COSMIC30  MUTAGENE5 MUTAGENE10
    *others_threshold* is used for not reporting signatures with exposure less or equal than the threshold and reporting the sum of their exposures as "Other signatures".
    Set *others_threshold* to 0 if not needed. The MutaGene website uses others_threshold = 0.05 by default.
    """
    url = MUTAGENE_URL + '/pub/api/identify/decomposition'
    r = requests.post(url, data={'profile_counts': json.dumps(profile_counts), 'signatures': signatures, 'others_threshold': 0.0})
    # print("STATUS", r.status_code)
    if r.status_code == 200:
        return r.json()['decomposition']


def print_profile_counts(profile_counts):
    """
    Printing context-dependent mutational profile
    """
    for mutation, value in profile.items():
        print("{}\t{:.0f}".format(mutation, value))
    print()


def print_decomposition(decomposition):
    """
    Printing the results of decomposition
    """
    for component in decomposition:
        print("{}\t{:.2f}\t{:.0f}".format(component['name'], component['score'], component['mutations']))
    print()

def return_decomposition(decomposition):
    """
    Printing the results of decomposition
    """
    listed = {}
    for component in decomposition:
        name = str(component['name'])
        mut = component['mutations']
        listed[name] = mut
    return listed


if __name__ == '__main__':
    for filename in glob.glob(path):
        vcf_files = [filename, ]
        with open("further_analysis.csv", "a") as csvfile:  # create csv file
            for file_name in vcf_files:
                motif_name = get_motifs(file_name, assembly=37)
                value_list = return_motifs(motif_name)
                mut = return_motifs(motif_name)
                if len(mut) == 1:
                    continue
                profile = get_profile(file_name, assembly=37)
                if profile is not None:
                    for signature_type in ('MUTAGENE5', 'MUTAGENE10'):
                        decomposition = get_decomposition(profile, signature_type)
                        if signature_type in 'MUTAGENE5':
                            result_a = return_decomposition(decomposition)
                        if signature_type in 'MUTAGENE10':
                            result_b = return_decomposition(decomposition)
                filewriter = csv.writer(csvfile, delimiter=",", quotechar="|", quoting=csv.QUOTE_MINIMAL)
                filewriter.writerow(["File name", "\t", "T[C>K]W", "[C>T]G", "Y[C>T]", "N[T>S]W", "[C>R]G", "C[T>A]G]", "\t", "A.1 Tobacco", 'A.2 AID/APOBEC', "A.3 Unknown Etiology ",
                                         "A.4 UV Light", "A.5 Deanimation of Methyl-Cytosine",
                                         "B.1 Deanimation of Methyl-Cytosine", 'B.2 Unknown Etiology',
                                         "B.3 Aristolochic Acid", "B.4 Tobacco or Aflotoxin", "B.5 AID/APOBEC",
                                         "B.6 Unknown Etiology", "B.7 Unknown Etiology", "B.8 DNA Polymerase Epsilon",
                                         "B.9 UV Radiation", "B.10 Defective DNA Mismatch Repair"])
                filewriter.writerow([filename[43:], mut["\t"], mut['APOBEC'], mut['AGEING'], mut['UV Light'], mut['AID'],
                                         mut['Tobacco'], mut['Aristolochic Acid (AA)'], mut["\t"],
                                         result_a["MUTAGENE A.1"], result_a["MUTAGENE A.2"], result_a["MUTAGENE A.3"],
                                         result_a["MUTAGENE A.4"], result_a["MUTAGENE A.5"], result_b["MUTAGENE B.1"],
                                         result_b["MUTAGENE B.2"], result_b["MUTAGENE B.3"], result_b["MUTAGENE B.4"],
                                         result_b["MUTAGENE B.5"], result_b["MUTAGENE B.6"], result_b["MUTAGENE B.7"],
                                         result_b["MUTAGENE B.8"], result_b["MUTAGENE B.9"], result_b["MUTAGENE B.10"]])
	import requests
	import json
	import csv
	import glob

	# MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"
	#MUTAGENE_URL = "https://dev.ncbi.nlm.nih.gov/research/mutagene"
	MUTAGENE_URL_M = "http://mwebdev2/research/mutagene"
	# MUTAGENE_URL = "http://localhost:5000"
	MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"

	path = '/Users/cunninghamck/PycharmProjects/motifs/*.txt'

	def get_motifs(fname, assembly=37):
	"""
	Identify mutational motifs in a VCF or MAF sample
	"""
	url = MUTAGENE_URL_M + '/pub/api/identify/motifs'
	files = {'file': open(fname, 'rb')}
	r = requests.post(url, files=files, data={'assembly': assembly})
	if r.status_code == 200:
	return r.json()['motifs']


	def print_motifs(motifs):
	"""
	Printing the results of motif identification
	"""
	if motifs is None:
	#print("Empty")
	return

	#for m in motifs:
	#print(m['motif'], m['mutations'])

	def return_motifs(motifs):
	"""
	Printing the results of motif identification
	"""
	listed = {"\t":"\t"}
	if motifs is None:
	listed['APOBEC'] = 0
	listed['AGEING'] = 0
	listed['Tobacco'] = 0
	listed['AID'] = 0
	listed['UV Light'] = 0
	listed['Aristolochic Acid (AA)'] = 0
	return listed
	for m in motifs:
	listed[m['name']] = int(m['mutations'])
	return listed

	def get_profile(fname, assembly=37):
	"""
	Calling MutaGene REST API to convert a VCF file into a mutational profile (96 context-dependent mutational probabilities)
	and profile_counts (counts of mutations for each of the 96 context-dependent mutations)
	It is important to specify genome assembly correctly. Curently 19, 37 and 38 will work
	"""
	url = MUTAGENE_URL + '/pub/api/identify/profile'
	files = {'file': open(fname, 'rb')}
	r = requests.post(url, files=files, data={'assembly': assembly})
	# print("STATUS", r.status_code)
	if r.status_code == 200:
	return r.json()['result_counts']


	def get_decomposition(profile_counts, signatures='COSMIC30'):
	"""
	Decomposition of mutational profiles into a combination of signatures.
	It is highly recommended to use profile_counts instead of profile in order to use Maximum Likelihood method
	signatures should be one of COSMIC30 MUTAGENE5 MUTAGENE10
	others_threshold is used for not reporting signatures with exposure less or equal than the threshold and reporting the sum of their exposures as "Other signatures".
	Set others_threshold to 0 if not needed. The MutaGene website uses others_threshold = 0.05 by default.
	"""
	url = MUTAGENE_URL + '/pub/api/identify/decomposition'
	r = requests.post(url, data={'profile_counts': json.dumps(profile_counts), 'signatures': signatures, 'others_threshold': 0.0})
	# print("STATUS", r.status_code)
	if r.status_code == 200:
	return r.json()['decomposition']


	def print_profile_counts(profile_counts):
	"""
	Printing context-dependent mutational profile
	"""
	for mutation, value in profile.items():
	print("{}\t{:.0f}".format(mutation, value))
	print()


	def print_decomposition(decomposition):
	"""
	Printing the results of decomposition
	"""
	for component in decomposition:
	print("{}\t{:.2f}\t{:.0f}".format(component['name'], component['score'], component['mutations']))
	print()

	def return_decomposition(decomposition):
	"""
	Printing the results of decomposition
	"""
	listed = {}
	for component in decomposition:
	name = str(component['name'])
	mut = component['mutations']
	listed[name] = mut
	return listed


	if __name__ == '__main__':
	for filename in glob.glob(path):
	vcf_files = [filename, ]
	with open("further_analysis.csv", "a") as csvfile: # create csv file
	for file_name in vcf_files:
	motif_name = get_motifs(file_name, assembly=37)
	value_list = return_motifs(motif_name)
	mut = return_motifs(motif_name)
	if len(mut) == 1:
	continue
	profile = get_profile(file_name, assembly=37)
	if profile is not None:
	for signature_type in ('MUTAGENE5', 'MUTAGENE10'):
	decomposition = get_decomposition(profile, signature_type)
	if signature_type in 'MUTAGENE5':
	result_a = return_decomposition(decomposition)
	if signature_type in 'MUTAGENE10':
	result_b = return_decomposition(decomposition)
	filewriter = csv.writer(csvfile, delimiter=",", quotechar="\|", quoting=csv.QUOTE_MINIMAL)
	filewriter.writerow(["File name", "\t", "T[C>K]W", "[C>T]G", "Y[C>T]", "N[T>S]W", "[C>R]G", "C[T>A]G]", "\t", "A.1 Tobacco", 'A.2 AID/APOBEC', "A.3 Unknown Etiology ",
	"A.4 UV Light", "A.5 Deanimation of Methyl-Cytosine",
	"B.1 Deanimation of Methyl-Cytosine", 'B.2 Unknown Etiology',
	"B.3 Aristolochic Acid", "B.4 Tobacco or Aflotoxin", "B.5 AID/APOBEC",
	"B.6 Unknown Etiology", "B.7 Unknown Etiology", "B.8 DNA Polymerase Epsilon",
	"B.9 UV Radiation", "B.10 Defective DNA Mismatch Repair"])
	filewriter.writerow([filename[43:], mut["\t"], mut['APOBEC'], mut['AGEING'], mut['UV Light'], mut['AID'],
	mut['Tobacco'], mut['Aristolochic Acid (AA)'], mut["\t"],
	result_a["MUTAGENE A.1"], result_a["MUTAGENE A.2"], result_a["MUTAGENE A.3"],
	result_a["MUTAGENE A.4"], result_a["MUTAGENE A.5"], result_b["MUTAGENE B.1"],
	result_b["MUTAGENE B.2"], result_b["MUTAGENE B.3"], result_b["MUTAGENE B.4"],
	result_b["MUTAGENE B.5"], result_b["MUTAGENE B.6"], result_b["MUTAGENE B.7"],
	result_b["MUTAGENE B.8"], result_b["MUTAGENE B.9"], result_b["MUTAGENE B.10"]])