Skip to content

Instantly share code, notes, and snippets.

@mwormleonhard
Created July 14, 2016 17:24
Show Gist options
  • Save mwormleonhard/ec0903bf70e04418c73be962926c5842 to your computer and use it in GitHub Desktop.
Save mwormleonhard/ec0903bf70e04418c73be962926c5842 to your computer and use it in GitHub Desktop.
Script for downsampling of accurate mass data before peak alignment
# -*- coding: utf-8 -*-
"""
Created on 2016-03-05
@author: Martin Worm-Leonhard
----------------------------------------------------------------------------
"THE BEER-WARE LICENSE" (Revision 42):
<mwormleonhard@gmail.com> wrote this file. As long as you retain this notice you
can do whatever you want with this stuff. If we meet some day, and you think
this stuff is worth it, you can buy me a beer in return. Martin Worm-Leonhard
----------------------------------------------------------------------------
"""
import csv
import sys
import re
import tkFileDialog
from os.path import dirname, basename, join
def downsampleFile(filename):
outheader = 'Name\tR.T. (s)\tRetention Index\tCAS\tSample Concentration\tQuant Masses\tUniqueMass\tQuant S/N\tArea\tSimilarity\tWeight\tClassifications\tType\tSpectra\r\n'
outfilename = join(dirname(filename), "LOWRES_" + basename(filename))
with open(filename, 'rb') as infile:
with open(outfilename, 'wb') as outfile:
reader = csv.reader(infile, delimiter='\t')
writer = csv.writer(outfile, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar='~') # Quotechar hack to avoid quoting quotes
try:
outfile.write(outheader)
reader.next() # Skip header line
for row in reader:
rt1, rt2 = str.split(row[1], ',') # Rounding of RT1
row[1] = '"' + str(round(float(rt1))) + ',' + rt2 + '"' # Rounding of RT1
row[5] = 'T' # Quant Masses replaced by the letter "T"
row[6] = str(round(float(row[6]))) # Rounding of Unique mass
if row[10] != '': # If Weight fiels is empty, then leave it alone
row[10] = str(round(float(row[10]))) # Rounding of Weight
# Rounding of spectrum
spectrum = row[13]
speciter = re.finditer(r'(\d+\.\d+):(\d+\.\d+)', spectrum)
lowresspec = {}
for s in speciter:
mass, intensity = int(round(float(s.group(1)))), float(s.group(2))
if mass in lowresspec:
lowresspec[mass] += intensity
else:
lowresspec[mass] = intensity
lowresspecstring = ''
for k in sorted(lowresspec.keys()):
lowresspecstring += str(k) + ':' + str(int(round(lowresspec[k]))) + ' ' # Intentisies need to be integers as well
row[13] = lowresspecstring
writer.writerow(row)
except csv.Error as e:
sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))
infilenames = tkFileDialog.askopenfilenames(defaultextension='.txt',
filetypes=[('TXT file', '.txt')],
title='Select files to be downsampled')
if len(infilenames) > 0:
for name in infilenames:
downsampleFile(name)
print "Done processing", name
else:
sys.exit('No files selected. Exiting.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment