mwormleonhard/lowResMass.py

## lowResMass.py
# -*- coding: utf-8 -*-
"""
Created on 2016-03-05
@author: Martin Worm-Leonhard

----------------------------------------------------------------------------
"THE BEER-WARE LICENSE" (Revision 42):
<mwormleonhard@gmail.com> wrote this file. As long as you retain this notice you
can do whatever you want with this stuff. If we meet some day, and you think
this stuff is worth it, you can buy me a beer in return.   Martin Worm-Leonhard
----------------------------------------------------------------------------
"""
import csv
import sys
import re
import tkFileDialog
from os.path import dirname, basename, join

def downsampleFile(filename):

    outheader = 'Name\tR.T. (s)\tRetention Index\tCAS\tSample Concentration\tQuant Masses\tUniqueMass\tQuant S/N\tArea\tSimilarity\tWeight\tClassifications\tType\tSpectra\r\n'

    outfilename = join(dirname(filename), "LOWRES_" + basename(filename))

    with open(filename, 'rb') as infile:
        with open(outfilename, 'wb') as outfile:
            reader = csv.reader(infile, delimiter='\t')
            writer = csv.writer(outfile, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar='~')  # Quotechar hack to avoid quoting quotes
            try:
                outfile.write(outheader)
                reader.next()  # Skip header line
                for row in reader:
                    rt1, rt2 = str.split(row[1], ',')  # Rounding of RT1
                    row[1] = '"' + str(round(float(rt1))) + ',' + rt2 + '"'  # Rounding of RT1
                    row[5] = 'T'  # Quant Masses replaced by the letter "T"
                    row[6] = str(round(float(row[6])))  # Rounding of Unique mass
                    if row[10] != '':   # If Weight fiels is empty, then leave it alone
                        row[10] = str(round(float(row[10])))  # Rounding of Weight
                    # Rounding of spectrum
                    spectrum = row[13]
                    speciter = re.finditer(r'(\d+\.\d+):(\d+\.\d+)', spectrum)
                    lowresspec = {}
                    for s in speciter:
                        mass, intensity = int(round(float(s.group(1)))), float(s.group(2))
                        if mass in lowresspec:
                            lowresspec[mass] += intensity
                        else:
                            lowresspec[mass] = intensity
                    lowresspecstring = ''
                    for k in sorted(lowresspec.keys()):
                        lowresspecstring += str(k) + ':' + str(int(round(lowresspec[k]))) + ' '  # Intentisies need to be integers as well
                    row[13] = lowresspecstring
                    writer.writerow(row)
            except csv.Error as e:
                sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))


infilenames = tkFileDialog.askopenfilenames(defaultextension='.txt',
                                          filetypes=[('TXT file', '.txt')],
                                          title='Select files to be downsampled')

if len(infilenames) > 0:
    for name in infilenames:
        downsampleFile(name)
        print "Done processing", name
else:
    sys.exit('No files selected. Exiting.')
	# -- coding: utf-8 --
	"""
	Created on 2016-03-05
	@author: Martin Worm-Leonhard

	----------------------------------------------------------------------------
	"THE BEER-WARE LICENSE" (Revision 42):
	<mwormleonhard@gmail.com> wrote this file. As long as you retain this notice you
	can do whatever you want with this stuff. If we meet some day, and you think
	this stuff is worth it, you can buy me a beer in return. Martin Worm-Leonhard
	----------------------------------------------------------------------------
	"""
	import csv
	import sys
	import re
	import tkFileDialog
	from os.path import dirname, basename, join

	def downsampleFile(filename):

	outheader = 'Name\tR.T. (s)\tRetention Index\tCAS\tSample Concentration\tQuant Masses\tUniqueMass\tQuant S/N\tArea\tSimilarity\tWeight\tClassifications\tType\tSpectra\r\n'

	outfilename = join(dirname(filename), "LOWRES_" + basename(filename))

	with open(filename, 'rb') as infile:
	with open(outfilename, 'wb') as outfile:
	reader = csv.reader(infile, delimiter='\t')
	writer = csv.writer(outfile, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar='~') # Quotechar hack to avoid quoting quotes
	try:
	outfile.write(outheader)
	reader.next() # Skip header line
	for row in reader:
	rt1, rt2 = str.split(row[1], ',') # Rounding of RT1
	row[1] = '"' + str(round(float(rt1))) + ',' + rt2 + '"' # Rounding of RT1
	row[5] = 'T' # Quant Masses replaced by the letter "T"
	row[6] = str(round(float(row[6]))) # Rounding of Unique mass
	if row[10] != '': # If Weight fiels is empty, then leave it alone
	row[10] = str(round(float(row[10]))) # Rounding of Weight
	# Rounding of spectrum
	spectrum = row[13]
	speciter = re.finditer(r'(\d+\.\d+):(\d+\.\d+)', spectrum)
	lowresspec = {}
	for s in speciter:
	mass, intensity = int(round(float(s.group(1)))), float(s.group(2))
	if mass in lowresspec:
	lowresspec[mass] += intensity
	else:
	lowresspec[mass] = intensity
	lowresspecstring = ''
	for k in sorted(lowresspec.keys()):
	lowresspecstring += str(k) + ':' + str(int(round(lowresspec[k]))) + ' ' # Intentisies need to be integers as well
	row[13] = lowresspecstring
	writer.writerow(row)
	except csv.Error as e:
	sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))


	infilenames = tkFileDialog.askopenfilenames(defaultextension='.txt',
	filetypes=[('TXT file', '.txt')],
	title='Select files to be downsampled')

	if len(infilenames) > 0:
	for name in infilenames:
	downsampleFile(name)
	print "Done processing", name
	else:
	sys.exit('No files selected. Exiting.')