Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@adiamb
Created October 27, 2017 23:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adiamb/4e901428f22762732970e1b0b78be5a6 to your computer and use it in GitHub Desktop.
Save adiamb/4e901428f22762732970e1b0b78be5a6 to your computer and use it in GitHub Desktop.
make sample.txt file given a list of TCRalpha and TCRBeta barcodes
from __future__ import print_function
import re
import subprocess
import sys
file_in = sys.argv[1]
#file_out = sys.argv[2]
### define functions
def num_ret(str=str):
return(''.join([s for s in str if s.isdigit()]))
################ this will take a file in the format DBID;TCR_A;TCR_A_BC;TCR_B;TCR_B_BC and process it by creating dictionary for each HS run
def HS_spool(file_in):
HS_calls ={}
with open(file_in) as f_in:
for line in f_in:
line_parse = line.strip('\r\n').split(';')
TCR_A = re.sub('TCR-', '',line_parse[1])
TCR_A_BC = num_ret(line_parse[2])
TCR_B = re.sub('TCR-', '', line_parse[3])
TCR_B_BC = num_ret(line_parse[4])
DBID = line_parse[0]
if TCR_A and TCR_B:
make_value_B = str(DBID) + ';'+TCR_B+';'+TCR_B_BC
make_value_A = str(DBID) + ';'+TCR_A+';'+TCR_A_BC
if TCR_B in HS_calls:
get_exist = HS_calls.get(TCR_B)
HS_calls[TCR_B] = get_exist + '$' +make_value_B
else:
HS_calls[TCR_B] = make_value_B
if TCR_A in HS_calls:
get_exist = HS_calls.get(TCR_A)
HS_calls[TCR_A] = get_exist + '$' +make_value_A
else:
HS_calls[TCR_A] = make_value_A
elif TCR_A:
make_value = str(DBID) + ';'+TCR_A+';'+TCR_A_BC
if TCR_A in HS_calls:
get_exist = HS_calls.get(TCR_A)
HS_calls[TCR_A] = get_exist + '$' +make_value
else:
HS_calls[TCR_A] = make_value
else:
make_value = str(DBID) + ';'+TCR_B+';'+TCR_B_BC
if TCR_B in HS_calls:
get_exist = HS_calls.get(TCR_B)
HS_calls[TCR_B] = get_exist + '$' +make_value
else:
HS_calls[TCR_B] = make_value
print ('IDENTIFIED ', len(HS_calls), ' RUNS' )
return(HS_calls)
################# this wil take the HS_dic created by HS_spool and write out barcodes from same DBID and same HS run in one line separated by commas
def out_f(Dic, file_out):
f_out = open(file_out+'_SAMPLES.txt', 'w')
for value in Dic.itervalues():
if value:
f_out.write(value+'\n')
f_out.close()
## main call from python
def main(argv):
HS_calls=HS_spool(file_in)
for key, value in HS_calls.iteritems():
value_parse = value.split('$')
DBID_DIC ={}
for item in value_parse:
item_parse = item.split(';')
if item_parse[2]:
if item_parse[0] in DBID_DIC:
get_exist = DBID_DIC.get(item_parse[0])
DBID_DIC[item_parse[0]] = get_exist+','+item_parse[2]
else:
DBID_DIC[item_parse[0]] = item_parse[2]
print ('IDENTIFIED ', len(DBID_DIC), ' Individuals from ', key)
out_f(DBID_DIC, key)
if __name__ == '__main__':main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment