Created
July 14, 2016 17:24
-
-
Save mwormleonhard/ec0903bf70e04418c73be962926c5842 to your computer and use it in GitHub Desktop.
Script for downsampling of accurate mass data before peak alignment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on 2016-03-05 | |
@author: Martin Worm-Leonhard | |
---------------------------------------------------------------------------- | |
"THE BEER-WARE LICENSE" (Revision 42): | |
<mwormleonhard@gmail.com> wrote this file. As long as you retain this notice you | |
can do whatever you want with this stuff. If we meet some day, and you think | |
this stuff is worth it, you can buy me a beer in return. Martin Worm-Leonhard | |
---------------------------------------------------------------------------- | |
""" | |
import csv | |
import sys | |
import re | |
import tkFileDialog | |
from os.path import dirname, basename, join | |
def downsampleFile(filename): | |
outheader = 'Name\tR.T. (s)\tRetention Index\tCAS\tSample Concentration\tQuant Masses\tUniqueMass\tQuant S/N\tArea\tSimilarity\tWeight\tClassifications\tType\tSpectra\r\n' | |
outfilename = join(dirname(filename), "LOWRES_" + basename(filename)) | |
with open(filename, 'rb') as infile: | |
with open(outfilename, 'wb') as outfile: | |
reader = csv.reader(infile, delimiter='\t') | |
writer = csv.writer(outfile, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar='~') # Quotechar hack to avoid quoting quotes | |
try: | |
outfile.write(outheader) | |
reader.next() # Skip header line | |
for row in reader: | |
rt1, rt2 = str.split(row[1], ',') # Rounding of RT1 | |
row[1] = '"' + str(round(float(rt1))) + ',' + rt2 + '"' # Rounding of RT1 | |
row[5] = 'T' # Quant Masses replaced by the letter "T" | |
row[6] = str(round(float(row[6]))) # Rounding of Unique mass | |
if row[10] != '': # If Weight fiels is empty, then leave it alone | |
row[10] = str(round(float(row[10]))) # Rounding of Weight | |
# Rounding of spectrum | |
spectrum = row[13] | |
speciter = re.finditer(r'(\d+\.\d+):(\d+\.\d+)', spectrum) | |
lowresspec = {} | |
for s in speciter: | |
mass, intensity = int(round(float(s.group(1)))), float(s.group(2)) | |
if mass in lowresspec: | |
lowresspec[mass] += intensity | |
else: | |
lowresspec[mass] = intensity | |
lowresspecstring = '' | |
for k in sorted(lowresspec.keys()): | |
lowresspecstring += str(k) + ':' + str(int(round(lowresspec[k]))) + ' ' # Intentisies need to be integers as well | |
row[13] = lowresspecstring | |
writer.writerow(row) | |
except csv.Error as e: | |
sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e)) | |
infilenames = tkFileDialog.askopenfilenames(defaultextension='.txt', | |
filetypes=[('TXT file', '.txt')], | |
title='Select files to be downsampled') | |
if len(infilenames) > 0: | |
for name in infilenames: | |
downsampleFile(name) | |
print "Done processing", name | |
else: | |
sys.exit('No files selected. Exiting.') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment