Created
January 21, 2020 09:58
-
-
Save mwalzer/6437f418775701e6129b89dbe05f26eb to your computer and use it in GitHub Desktop.
old version of a ligandomics qc script (approx. 2014)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__author__ = 'walzer' | |
import sys | |
import os | |
import subprocess | |
from datetime import datetime | |
import logging | |
import argparse | |
#import pickle | |
VERSION = "0.2" | |
#QC:0000044 -> ms2s.csv | |
#QC:0000038 -> ids.csv | |
EXPMAP_RPLOT = {'R': "ProduceQCFigures_precursormap.R", 'qp_att_acc': 'QC:0000004', 'cv_acc': 'QC:0000055', 'qpcv_export_list': ['QC:0000044']} | |
IDMAP_RPLOT = {'R': "ProduceQCFigures_idmap.R", 'qp_att_acc': 'QC:0000035', 'cv_acc': 'QC:0000052', 'qpcv_export_list': ['QC:0000038', 'QC:0000044']} | |
ERRORDISTR_RPLOT = {'R': "ProduceQCFigures_acc.R", 'qp_att_acc': 'QC:0000041', 'cv_acc': 'QC:0000053', 'qpcv_export_list': ['QC:0000038']} | |
ERRORTIME_RPLOT = {'R': "ProduceQCFigures_acc_time.R", 'qp_att_acc': 'QC:0000041', 'cv_acc': 'QC:0000054', 'qpcv_export_list': ['QC:0000038', 'QC:0000044']} | |
TIC_RPLOT = {'R': "ProduceQCFigures_precursor_histogram.R", 'qp_att_acc': 'QC:0000023', 'cv_acc': 'MS:1000235', 'qpcv_export_list': ['QC:0000044']} | |
CHARGE_RPLOT = {'R': "ProduceQCFigures_charge_histogram.R", 'qp_att_acc': 'QC:0000025', 'cv_acc': 'QC:0000051', 'qpcv_export_list': ['QC:0000038']} | |
def create_qp(qcml, temp_dir, qc_metric, rdir): | |
parent_cv = qc_metric['qp_att_acc'] | |
plot_cv = qc_metric['cv_acc'] | |
rscript = qc_metric['R'] | |
qpcv_export_list = qc_metric['qpcv_export_list'] | |
export_files = list() | |
try: | |
for export_qccv in qpcv_export_list: | |
export_path = str(temp_dir) + '/' + str(export_qccv).replace(':', '_') + '.csv' | |
eexe = "QCExtractor -in '{inpu}' -out_csv '{out}' -qp '{qp}'".format(inpu=qcml, out=export_path, qp=str(export_qccv)) | |
o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True) | |
# TODO possible to check if output is legit?! | |
if o: | |
export_files.append(export_path) | |
else: | |
raise('no ' + export_qccv) | |
except Exception as e: | |
logging.warning("Could not execute QCExtractor successfully for " + str(qcml) + | |
" - no QC metric " + str(export_qccv) + "(.." + str(e) + ")") | |
return None | |
plot_file = str(temp_dir) + '/' + str(plot_cv).replace(':', '_') + ".png" | |
try: | |
rexe = "Rscript {rdir}/{inpu} {out}".format(inpu=' '.join([rscript] + export_files), out=plot_file, rdir=rdir) | |
o = subprocess.check_output(rexe, stderr=subprocess.STDOUT, shell=True) | |
if not o: | |
raise('no ' + plot_file) | |
except Exception as e: | |
logging.warning("Could not execute R " + str(rscript) + ' successfully for ' + str(qcml) + | |
" - no " + str(rscript) + " QC metric" + "(.." + str(e) + ")") | |
return None | |
try: | |
eexe = "QCEmbedder -in '{inpu}' -out '{out}' -plot '{plot}' -qp_att_acc {parent} -cv_acc {type}".format( | |
inpu=qcml, out=qcml, plot=plot_file, parent=parent_cv, type=plot_cv) | |
o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True) | |
if not o: | |
raise('no ' + qcml) | |
except Exception as e: | |
logging.warning("Could not execute QCEmbedder successfully for " + str(plot_file) + | |
" - no " + str(rscript) + " QC metric" + "(.." + str(e) + ")") | |
return None | |
return qcml | |
def create_qcml(mzid, spectra, feature, directory): | |
qcml = str(directory) + '/' + os.path.splitext(os.path.basename(spectra))[0] + '.qcML' | |
exe = "QCCalculator -in '{inpu}' -out '{out}' -id '{id}' -feature '{feat}'".format(inpu=spectra, out=qcml, id=mzid, feat=feature) | |
logging.warning("Starting to create qcml for " + str(mzid) + ', ' + str(spectra) + " ...") | |
try: | |
o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True) | |
except Exception as e: | |
logging.warning("Could not create qcml for " + str(mzid) + ', ' + str(spectra) + ', ' + str(feature) + | |
" - skipping. (" + str(e) + ")") | |
logging.warning("Created qcml for " + str(mzid) + ', ' + str(spectra) + ', ' + str(feature) + " - " + qcml) | |
return qcml | |
def image_qcml(qcml, spectra, temp_dir): | |
img = str(temp_dir) + '/' + 'img.png' | |
exe = "ImageCreator -in '{inpu}' -out '{out}' -out_type png -width 1024 -height 1024".format(inpu=spectra, out=img) | |
logging.warning("Create run map image for " + str(spectra) + " ...") | |
try: | |
o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True) | |
except Exception as e: | |
logging.warning("Could not create image for " + str(spectra) + " - skipping. (" + str(e) + ")") | |
try: | |
eexe = "QCEmbedder -in '{inpu}' -out '{out}' -plot '{plot}' -qp_att_acc {parent} -cv_acc {type}".format( | |
inpu=qcml, out=qcml, plot=img, parent="QC:0000004", type="QC:0000055") | |
o = subprocess.check_output(eexe, stderr=subprocess.STDOUT, shell=True) | |
if not o: | |
raise('no ' + qcml) | |
except Exception as e: | |
logging.warning("Could not execute QCEmbedder successfully for " + str(img) ) | |
return None | |
logging.warning("Embedded run map image for " + str(spectra) + " - " + qcml) | |
return qcml | |
def filter_mzid(mzid, temp_dir): | |
fmzid = str(temp_dir) + '/' + os.path.basename(mzid) | |
exe = "IDFilter -delete_hits_beneath_file_threshold -in '{inpu}' -out '{out}'".format(inpu=mzid, out=fmzid) | |
logging.warning("Filtering for passed threshold in " + str(mzid) + " ...") | |
try: | |
o = subprocess.check_output(exe, stderr=subprocess.STDOUT, shell=True) | |
except Exception as e: | |
logging.warning("Could not do filtering for passed threshold for " + str(mzid) + " - skipping. (" + str(e) + ")") | |
return fmzid | |
def __main__(): | |
parser = argparse.ArgumentParser(version=VERSION) | |
parser.add_argument('-id', '--mzid', dest="mzid_file", help='<Required> full path to the input mzid', required=True) | |
parser.add_argument('-sp', '--spectras', nargs='+', dest="spectra_list", | |
help='<Required> list of the spectra carrying files referenced in the given mzid - each full path', required=True) | |
parser.add_argument('-od', "--outdir", dest="outfiles_directory", help="<Required> Outfile for qcml", required=True) | |
parser.add_argument('-rd', "--Rdir", dest="Rscripts_directory", help="<Required> Rscript directory", required=True) | |
parser.add_argument('-of', "--outfile", dest="outfile_path", help="Outfile for final qcml.") | |
parser.add_argument('-temp', "--tempdir", dest="temp_path", help="Temporary directory for intermediate files.") | |
#parser.add_option("--outdir", "-od", action="store", dest="outfile_directory", help="Outfile for predictions") | |
options = parser.parse_args() | |
if len(sys.argv) <= 1: | |
parser.print_help() | |
sys.exit(1) | |
if not (options.mzid_file or options.spectra_list or options.outfile_directory): | |
parser.print_help() | |
sys.exit(1) | |
#~ else: | |
#~ print dir(options) | |
if options.temp_path: | |
temp_dir = options.temp_path | |
else: | |
temp_dir = "/tmp/" | |
logging.basicConfig(filename=options.outfiles_directory + "/qcml{:%d-%m-%Y_%H-%M-%S}".format(datetime.now()) + '.log', | |
filemode='w+', level=logging.DEBUG) #, format='%(levelname)s:%(message)s' | |
logging.info("Starting Ligando-QC for " + options.mzid_file + " at " + str(datetime.now())) | |
args = parser.parse_args() | |
logging.warning("verbosity turned on") | |
#print options.mzid_file, options.spectra_list, options.outfile_directory | |
# TODO go through all spectra files and create one by one the qcmls | |
metrics_list = [ | |
ERRORDISTR_RPLOT, | |
ERRORTIME_RPLOT, | |
TIC_RPLOT | |
] | |
for mse in options.spectra_list: | |
fmzid = filter_mzid(mzid=options.mzid_file, temp_dir=temp_dir) | |
qcml = create_qcml(mzid=fmzid, spectra=options.ms, feature=options.feats, directory=options.outfiles_directory) | |
image_qcml(qcml=qcml, spectra=options.ms, temp_dir=temp_dir) | |
for metric in metrics_list: | |
create_qp(qcml, temp_dir, metric, options.Rscripts_directory) | |
logging.info("Finishing Ligando-QC for " + options.mzid_file + " at " + str(datetime.now())) | |
if __name__ == '__main__': | |
__main__() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment