Skip to content

Instantly share code, notes, and snippets.

@adiamb
Created October 14, 2017 01:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adiamb/971433fe5ddfa4d37ad43b969945ddbc to your computer and use it in GitHub Desktop.
Save adiamb/971433fe5ddfa4d37ad43b969945ddbc to your computer and use it in GitHub Desktop.
update protein counts
import sys
import time
import subprocess
from subprocess import PIPE
import re
from itertools import chain
import pandas as pd
import numpy as np
import copy
## import filenames as a list
f_names=['ar_chy_list','ar_try_list', 'pan_chy_list', 'pan_try_list']
## make a tmeplate dictionary for all possible proteins found
temp_dic ={}
for file in f_names:
with open(file) as f_:
for item in f_:
filelist[item.strip('\n')] = 1
with open(item.strip('\n')) as ms_in:
n =0
for line in ms_in:
parse_line = line.strip('\n')
n += 1
if n > 13:
parse_line = line.split(';')
if not re.search(r'(>Reverse|Common contaminant protein)', str(parse_line[18])): ### remove contaminants
if re.search(r'(>PB2-|>HA-|>NA-|>MA1-|>NP-)', str(parse_line[18])):
makekey = parse_line[18].split('-')[0]
else:
makekey = str(parse_line[18])
if makekey not in temp_dic:
temp_dic[makekey] = 0
protein_dic=copy.deepcopy(temp_dic) ### make a copy of the protein_dic
## make a dic with files
main_counts={}
for file in f_names:
with open(file) as f_:
for item in f_:
parse_item = item.strip('\r\n')
with open(parse_item) as ms_in:
n =0
for line in ms_in:
parse_line = line.strip('\n')
n += 1
if n > 13:
parse_line = line.split(';')
if not re.search(r'(>Reverse|Common contaminant protein)', str(parse_line[18])): ### remove contaminants
if re.search(r'(>PB2-|>HA-|>NA-|>MA1-|>NP-)', str(parse_line[18])):
makekey = parse_line[18].split('-')[0]
else:
makekey = str(parse_line[18])
print makekey
if parse_item in main_counts:
get_exist_item = main_counts.get(parse_item)
if makekey in get_exist_item:
get_exist_count = get_exist_item.get(makekey)
get_exist_item[makekey]= int(get_exist_count)+1
else:
get_exist_item[makekey] = 1
else:
main_counts[parse_item] = copy.deepcopy(protein_dic)
file_headers = ';'.join(main_counts.keys()) ### make file headers
final_dic ={}
for key, value in main_counts.iteritems():
for key2, value2 in value.iteritems():
if key2 in final_dic:
get_count = final_dic.get(key2)
final_dic[key2] = str(get_count)+';'+str(value2)
else:
final_dic[key2] = value2
with open('COUNTS_PROTEIN_CONTENT_Oct13.csv', 'w') as f_out:
f_out.write('PROTEIN'+';'+file_headers+'\n')
for key, value in final_dic.iteritems():
f_out.write(key+';'+value+'\n')
if 'Reverse' or 'Common contaminant protein' in t1:
print 'yeay'
''.join(re.findall('\..*\.', line.split(';')[2]))
re.sub(pattern='[^A-Z]*',repl='', string=t1)
0 Query
1 ProteinRank
2 Peptide
3 GlycansNHFAGNa
4 Modification
5 Observed
6 z
7 Observed
8 Calc.mass
9 Off-by-xerror
10 Mass error(ppm)
11 Startingposition
12 Cleavage
13 Score
14 Delta
15 DeltaMod
16 |Log Prob|
17 # of uniquepeptides
18 Protein Name
19 ProteinDB number
20 Comment
21 Scan #
22 Scan Time
headers=["Query",
"ProteinRank",
"Peptide",
"GlycansNHFAGNa",
"Modification",
"Observed",
"z",
"Observed",
"Calc.mass",
"Off-by-xerror",
"Mass error(ppm)",
"Startingposition",
"Cleavage",
"Score",
"Delta",
"DeltaMod",
"|Log Prob|",
"# of uniquepeptides",
"Protein Name",
"ProteinDB number",
"Comment",
"Scan #",
"Scan Time"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment