mwalzer/LigandoQC.py

## LigandoQC.py
#!/usr/bin/env python
__author__ = 'walzer'
import sys
import os
import subprocess
from datetime import datetime
import logging
import argparse
#import pickle

VERSION = "0.2"

#QC:0000044  -> ms2s.csv
#QC:0000038 -> ids.csv

EXPMAP_RPLOT = {'R': "ProduceQCFigures_precursormap.R", 'qp_att_acc': 'QC:0000004', 'cv_acc': 'QC:0000055', 'qpcv_export_list': ['QC:0000044']}
IDMAP_RPLOT = {'R': "ProduceQCFigures_idmap.R", 'qp_att_acc': 'QC:0000035', 'cv_acc': 'QC:0000052', 'qpcv_export_list': ['QC:0000038', 'QC:0000044']}
ERRORDISTR_RPLOT = {'R': "ProduceQCFigures_acc.R", 'qp_att_acc': 'QC:0000041', 'cv_acc': 'QC:0000053', 'qpcv_export_list': ['QC:0000038']}
ERRORTIME_RPLOT = {'R': "ProduceQCFigures_acc_time.R", 'qp_att_acc': 'QC:0000041', 'cv_acc': 'QC:0000054', 'qpcv_export_list': ['QC:0000038', 'QC:0000044']}
TIC_RPLOT = {'R': "ProduceQCFigures_precursor_histogram.R", 'qp_att_acc': 'QC:0000023', 'cv_acc': 'MS:1000235', 'qpcv_export_list': ['QC:0000044']}
CHARGE_RPLOT = {'R': "ProduceQCFigures_charge_histogram.R", 'qp_att_acc': 'QC:0000025', 'cv_acc': 'QC:0000051', 'qpcv_export_list': ['QC:0000038']}


def create_qp(qcml, temp_dir, qc_metric, rdir):
    parent_cv = qc_metric['qp_att_acc']
    plot_cv = qc_metric['cv_acc']
    rscript = qc_metric['R']
    qpcv_export_list = qc_metric['qpcv_export_list']
    export_files = list()
    try:
        for export_qccv in qpcv_export_list:
            export_path = str(temp_dir) + '/' + str(export_qccv).replace(':', '_') + '.csv'
            eexe = "QCExtractor -in '{inpu}' -out_csv '{out}' -qp '{qp}'".format(inpu=qcml, out=export_path, qp=str(export_qccv))
            o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True)
            # TODO possible to check if output is legit?!
            if o:
                export_files.append(export_path)
            else:
                raise('no ' + export_qccv)
    except Exception as e:
        logging.warning("Could not execute QCExtractor successfully for " + str(qcml) +
                        " - no QC metric " + str(export_qccv) + "(.." + str(e) + ")")
        return None

    plot_file = str(temp_dir) + '/' + str(plot_cv).replace(':', '_') + ".png"
    try:
        rexe = "Rscript {rdir}/{inpu} {out}".format(inpu=' '.join([rscript] + export_files), out=plot_file, rdir=rdir)
        o = subprocess.check_output(rexe, stderr=subprocess.STDOUT, shell=True)
        if not o:
            raise('no ' + plot_file)

    except Exception as e:
        logging.warning("Could not execute R " + str(rscript) + ' successfully for ' + str(qcml) +
                        " - no " + str(rscript) + " QC metric" + "(.." + str(e) + ")")
        return None

    try:
        eexe = "QCEmbedder -in '{inpu}' -out '{out}' -plot '{plot}' -qp_att_acc {parent} -cv_acc {type}".format(
            inpu=qcml, out=qcml, plot=plot_file, parent=parent_cv, type=plot_cv)
        o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True)
        if not o:
            raise('no ' + qcml)

    except Exception as e:
        logging.warning("Could not execute QCEmbedder successfully for " + str(plot_file) +
                        " - no " + str(rscript) + " QC metric" + "(.." + str(e) + ")")
        return None
    return qcml


def create_qcml(mzid, spectra, feature, directory):
    qcml = str(directory) + '/' + os.path.splitext(os.path.basename(spectra))[0] + '.qcML'
    exe = "QCCalculator -in '{inpu}' -out '{out}' -id '{id}' -feature '{feat}'".format(inpu=spectra, out=qcml, id=mzid, feat=feature)
    logging.warning("Starting to create qcml for  " + str(mzid) + ', ' + str(spectra) + " ...")
    try:
        o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True)
    except Exception as e:
        logging.warning("Could not create qcml for  " + str(mzid) + ', ' + str(spectra) + ', ' + str(feature) +
                        " - skipping. (" + str(e) + ")")
    logging.warning("Created qcml for  " + str(mzid) + ', ' + str(spectra) + ', ' + str(feature) + " - " + qcml)
    return qcml


def image_qcml(qcml, spectra, temp_dir):
    img = str(temp_dir) + '/' + 'img.png'
    exe = "ImageCreator -in '{inpu}' -out '{out}' -out_type png -width 1024 -height 1024".format(inpu=spectra, out=img)
    logging.warning("Create run map image for " + str(spectra) + " ...")
    try:
        o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True)
    except Exception as e:
        logging.warning("Could not create image for  " + str(spectra) + " - skipping. (" + str(e) + ")")

    try:
        eexe = "QCEmbedder -in '{inpu}' -out '{out}' -plot '{plot}' -qp_att_acc {parent} -cv_acc {type}".format(
            inpu=qcml, out=qcml, plot=img, parent="QC:0000004", type="QC:0000055")
        o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True)
        if not o:
            raise('no ' + qcml)

    except Exception as e:
        logging.warning("Could not execute QCEmbedder successfully for " + str(img) )
        return None

    logging.warning("Embedded run map image for  " + str(spectra) + " - " + qcml)
    return qcml


def filter_mzid(mzid, temp_dir):
    fmzid = str(temp_dir) + '/' + os.path.basename(mzid)
    exe = "IDFilter -delete_hits_beneath_file_threshold -in '{inpu}' -out '{out}'".format(inpu=mzid, out=fmzid)
    logging.warning("Filtering for passed threshold in " + str(mzid) + " ...")
    try:
        o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True)
    except Exception as e:
        logging.warning("Could not do filtering for passed threshold for  " + str(mzid) + " - skipping. (" + str(e) + ")")
    return fmzid


def __main__():
    parser = argparse.ArgumentParser(version=VERSION)
    parser.add_argument('-id', '--mzid', dest="mzid_file", help='<Required> full path to the input mzid', required=True)
    parser.add_argument('-sp', '--spectras', nargs='+', dest="spectra_list",
                        help='<Required> list of the spectra carrying files referenced in the given mzid - each full path', required=True)
    parser.add_argument('-od', "--outdir", dest="outfiles_directory", help="<Required> Outfile for qcml", required=True)
    parser.add_argument('-rd', "--Rdir", dest="Rscripts_directory", help="<Required> Rscript directory", required=True)
    parser.add_argument('-of', "--outfile", dest="outfile_path", help="Outfile for final qcml.")
    parser.add_argument('-temp', "--tempdir", dest="temp_path", help="Temporary directory for intermediate files.")

    #parser.add_option("--outdir", "-od", action="store", dest="outfile_directory", help="Outfile for predictions")
    options = parser.parse_args()
    if len(sys.argv) <= 1:
        parser.print_help()
        sys.exit(1)

    if not (options.mzid_file or options.spectra_list or options.outfile_directory):
        parser.print_help()
        sys.exit(1)
    #~ else:
    #~ print dir(options)

    if options.temp_path:
        temp_dir = options.temp_path
    else:
        temp_dir = "/tmp/"

    logging.basicConfig(filename=options.outfiles_directory + "/qcml{:%d-%m-%Y_%H-%M-%S}".format(datetime.now()) + '.log',
                        filemode='w+', level=logging.DEBUG)  #, format='%(levelname)s:%(message)s'
    logging.info("Starting Ligando-QC for " + options.mzid_file + " at " + str(datetime.now()))
    args = parser.parse_args()
    logging.warning("verbosity turned on")

    #print options.mzid_file, options.spectra_list, options.outfile_directory
    # TODO go through all spectra files and create one by one the qcmls
    metrics_list = [
       ERRORDISTR_RPLOT,
       ERRORTIME_RPLOT,
       TIC_RPLOT
    ]
    for mse in options.spectra_list:
        fmzid = filter_mzid(mzid=options.mzid_file, temp_dir=temp_dir)
        qcml = create_qcml(mzid=fmzid, spectra=options.ms, feature=options.feats, directory=options.outfiles_directory)
        image_qcml(qcml=qcml, spectra=options.ms, temp_dir=temp_dir)
        for metric in metrics_list:
            create_qp(qcml, temp_dir, metric, options.Rscripts_directory)

    logging.info("Finishing Ligando-QC for " + options.mzid_file + " at " + str(datetime.now()))


if __name__ == '__main__':
    __main__()
	#!/usr/bin/env python
	__author__ = 'walzer'
	import sys
	import os
	import subprocess
	from datetime import datetime
	import logging
	import argparse
	#import pickle

	VERSION = "0.2"

	#QC:0000044 -> ms2s.csv
	#QC:0000038 -> ids.csv

	EXPMAP_RPLOT = {'R': "ProduceQCFigures_precursormap.R", 'qp_att_acc': 'QC:0000004', 'cv_acc': 'QC:0000055', 'qpcv_export_list': ['QC:0000044']}
	IDMAP_RPLOT = {'R': "ProduceQCFigures_idmap.R", 'qp_att_acc': 'QC:0000035', 'cv_acc': 'QC:0000052', 'qpcv_export_list': ['QC:0000038', 'QC:0000044']}
	ERRORDISTR_RPLOT = {'R': "ProduceQCFigures_acc.R", 'qp_att_acc': 'QC:0000041', 'cv_acc': 'QC:0000053', 'qpcv_export_list': ['QC:0000038']}
	ERRORTIME_RPLOT = {'R': "ProduceQCFigures_acc_time.R", 'qp_att_acc': 'QC:0000041', 'cv_acc': 'QC:0000054', 'qpcv_export_list': ['QC:0000038', 'QC:0000044']}
	TIC_RPLOT = {'R': "ProduceQCFigures_precursor_histogram.R", 'qp_att_acc': 'QC:0000023', 'cv_acc': 'MS:1000235', 'qpcv_export_list': ['QC:0000044']}
	CHARGE_RPLOT = {'R': "ProduceQCFigures_charge_histogram.R", 'qp_att_acc': 'QC:0000025', 'cv_acc': 'QC:0000051', 'qpcv_export_list': ['QC:0000038']}


	def create_qp(qcml, temp_dir, qc_metric, rdir):
	parent_cv = qc_metric['qp_att_acc']
	plot_cv = qc_metric['cv_acc']
	rscript = qc_metric['R']
	qpcv_export_list = qc_metric['qpcv_export_list']
	export_files = list()
	try:
	for export_qccv in qpcv_export_list:
	export_path = str(temp_dir) + '/' + str(export_qccv).replace(':', '_') + '.csv'
	eexe = "QCExtractor -in '{inpu}' -out_csv '{out}' -qp '{qp}'".format(inpu=qcml, out=export_path, qp=str(export_qccv))
	o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True)
	# TODO possible to check if output is legit?!
	if o:
	export_files.append(export_path)
	else:
	raise('no ' + export_qccv)
	except Exception as e:
	logging.warning("Could not execute QCExtractor successfully for " + str(qcml) +
	" - no QC metric " + str(export_qccv) + "(.." + str(e) + ")")
	return None

	plot_file = str(temp_dir) + '/' + str(plot_cv).replace(':', '_') + ".png"
	try:
	rexe = "Rscript {rdir}/{inpu} {out}".format(inpu=' '.join([rscript] + export_files), out=plot_file, rdir=rdir)
	o = subprocess.check_output(rexe, stderr=subprocess.STDOUT, shell=True)
	if not o:
	raise('no ' + plot_file)

	except Exception as e:
	logging.warning("Could not execute R " + str(rscript) + ' successfully for ' + str(qcml) +
	" - no " + str(rscript) + " QC metric" + "(.." + str(e) + ")")
	return None

	try:
	eexe = "QCEmbedder -in '{inpu}' -out '{out}' -plot '{plot}' -qp_att_acc {parent} -cv_acc {type}".format(
	inpu=qcml, out=qcml, plot=plot_file, parent=parent_cv, type=plot_cv)
	o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True)
	if not o:
	raise('no ' + qcml)

	except Exception as e:
	logging.warning("Could not execute QCEmbedder successfully for " + str(plot_file) +
	" - no " + str(rscript) + " QC metric" + "(.." + str(e) + ")")
	return None
	return qcml


	def create_qcml(mzid, spectra, feature, directory):
	qcml = str(directory) + '/' + os.path.splitext(os.path.basename(spectra))[0] + '.qcML'
	exe = "QCCalculator -in '{inpu}' -out '{out}' -id '{id}' -feature '{feat}'".format(inpu=spectra, out=qcml, id=mzid, feat=feature)
	logging.warning("Starting to create qcml for " + str(mzid) + ', ' + str(spectra) + " ...")
	try:
	o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True)
	except Exception as e:
	logging.warning("Could not create qcml for " + str(mzid) + ', ' + str(spectra) + ', ' + str(feature) +
	" - skipping. (" + str(e) + ")")
	logging.warning("Created qcml for " + str(mzid) + ', ' + str(spectra) + ', ' + str(feature) + " - " + qcml)
	return qcml


	def image_qcml(qcml, spectra, temp_dir):
	img = str(temp_dir) + '/' + 'img.png'
	exe = "ImageCreator -in '{inpu}' -out '{out}' -out_type png -width 1024 -height 1024".format(inpu=spectra, out=img)
	logging.warning("Create run map image for " + str(spectra) + " ...")
	try:
	o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True)
	except Exception as e:
	logging.warning("Could not create image for " + str(spectra) + " - skipping. (" + str(e) + ")")

	try:
	eexe = "QCEmbedder -in '{inpu}' -out '{out}' -plot '{plot}' -qp_att_acc {parent} -cv_acc {type}".format(
	inpu=qcml, out=qcml, plot=img, parent="QC:0000004", type="QC:0000055")
	o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True)
	if not o:
	raise('no ' + qcml)

	except Exception as e:
	logging.warning("Could not execute QCEmbedder successfully for " + str(img) )
	return None

	logging.warning("Embedded run map image for " + str(spectra) + " - " + qcml)
	return qcml


	def filter_mzid(mzid, temp_dir):
	fmzid = str(temp_dir) + '/' + os.path.basename(mzid)
	exe = "IDFilter -delete_hits_beneath_file_threshold -in '{inpu}' -out '{out}'".format(inpu=mzid, out=fmzid)
	logging.warning("Filtering for passed threshold in " + str(mzid) + " ...")
	try:
	o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True)
	except Exception as e:
	logging.warning("Could not do filtering for passed threshold for " + str(mzid) + " - skipping. (" + str(e) + ")")
	return fmzid


	def __main__():
	parser = argparse.ArgumentParser(version=VERSION)
	parser.add_argument('-id', '--mzid', dest="mzid_file", help='<Required> full path to the input mzid', required=True)
	parser.add_argument('-sp', '--spectras', nargs='+', dest="spectra_list",
	help='<Required> list of the spectra carrying files referenced in the given mzid - each full path', required=True)
	parser.add_argument('-od', "--outdir", dest="outfiles_directory", help="<Required> Outfile for qcml", required=True)
	parser.add_argument('-rd', "--Rdir", dest="Rscripts_directory", help="<Required> Rscript directory", required=True)
	parser.add_argument('-of', "--outfile", dest="outfile_path", help="Outfile for final qcml.")
	parser.add_argument('-temp', "--tempdir", dest="temp_path", help="Temporary directory for intermediate files.")

	#parser.add_option("--outdir", "-od", action="store", dest="outfile_directory", help="Outfile for predictions")
	options = parser.parse_args()
	if len(sys.argv) <= 1:
	parser.print_help()
	sys.exit(1)

	if not (options.mzid_file or options.spectra_list or options.outfile_directory):
	parser.print_help()
	sys.exit(1)
	#~ else:
	#~ print dir(options)

	if options.temp_path:
	temp_dir = options.temp_path
	else:
	temp_dir = "/tmp/"

	logging.basicConfig(filename=options.outfiles_directory + "/qcml{:%d-%m-%Y_%H-%M-%S}".format(datetime.now()) + '.log',
	filemode='w+', level=logging.DEBUG) #, format='%(levelname)s:%(message)s'
	logging.info("Starting Ligando-QC for " + options.mzid_file + " at " + str(datetime.now()))
	args = parser.parse_args()
	logging.warning("verbosity turned on")

	#print options.mzid_file, options.spectra_list, options.outfile_directory
	# TODO go through all spectra files and create one by one the qcmls
	metrics_list = [
	ERRORDISTR_RPLOT,
	ERRORTIME_RPLOT,
	TIC_RPLOT
	]
	for mse in options.spectra_list:
	fmzid = filter_mzid(mzid=options.mzid_file, temp_dir=temp_dir)
	qcml = create_qcml(mzid=fmzid, spectra=options.ms, feature=options.feats, directory=options.outfiles_directory)
	image_qcml(qcml=qcml, spectra=options.ms, temp_dir=temp_dir)
	for metric in metrics_list:
	create_qp(qcml, temp_dir, metric, options.Rscripts_directory)

	logging.info("Finishing Ligando-QC for " + options.mzid_file + " at " + str(datetime.now()))


	if __name__ == '__main__':
	__main__()