Created
October 27, 2017 23:41
-
-
Save adiamb/4e901428f22762732970e1b0b78be5a6 to your computer and use it in GitHub Desktop.
make sample.txt file given a list of TCRalpha and TCRBeta barcodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import re | |
import subprocess | |
import sys | |
file_in = sys.argv[1] | |
#file_out = sys.argv[2] | |
### define functions | |
def num_ret(str=str): | |
return(''.join([s for s in str if s.isdigit()])) | |
################ this will take a file in the format DBID;TCR_A;TCR_A_BC;TCR_B;TCR_B_BC and process it by creating dictionary for each HS run | |
def HS_spool(file_in): | |
HS_calls ={} | |
with open(file_in) as f_in: | |
for line in f_in: | |
line_parse = line.strip('\r\n').split(';') | |
TCR_A = re.sub('TCR-', '',line_parse[1]) | |
TCR_A_BC = num_ret(line_parse[2]) | |
TCR_B = re.sub('TCR-', '', line_parse[3]) | |
TCR_B_BC = num_ret(line_parse[4]) | |
DBID = line_parse[0] | |
if TCR_A and TCR_B: | |
make_value_B = str(DBID) + ';'+TCR_B+';'+TCR_B_BC | |
make_value_A = str(DBID) + ';'+TCR_A+';'+TCR_A_BC | |
if TCR_B in HS_calls: | |
get_exist = HS_calls.get(TCR_B) | |
HS_calls[TCR_B] = get_exist + '$' +make_value_B | |
else: | |
HS_calls[TCR_B] = make_value_B | |
if TCR_A in HS_calls: | |
get_exist = HS_calls.get(TCR_A) | |
HS_calls[TCR_A] = get_exist + '$' +make_value_A | |
else: | |
HS_calls[TCR_A] = make_value_A | |
elif TCR_A: | |
make_value = str(DBID) + ';'+TCR_A+';'+TCR_A_BC | |
if TCR_A in HS_calls: | |
get_exist = HS_calls.get(TCR_A) | |
HS_calls[TCR_A] = get_exist + '$' +make_value | |
else: | |
HS_calls[TCR_A] = make_value | |
else: | |
make_value = str(DBID) + ';'+TCR_B+';'+TCR_B_BC | |
if TCR_B in HS_calls: | |
get_exist = HS_calls.get(TCR_B) | |
HS_calls[TCR_B] = get_exist + '$' +make_value | |
else: | |
HS_calls[TCR_B] = make_value | |
print ('IDENTIFIED ', len(HS_calls), ' RUNS' ) | |
return(HS_calls) | |
################# this wil take the HS_dic created by HS_spool and write out barcodes from same DBID and same HS run in one line separated by commas | |
def out_f(Dic, file_out): | |
f_out = open(file_out+'_SAMPLES.txt', 'w') | |
for value in Dic.itervalues(): | |
if value: | |
f_out.write(value+'\n') | |
f_out.close() | |
## main call from python | |
def main(argv): | |
HS_calls=HS_spool(file_in) | |
for key, value in HS_calls.iteritems(): | |
value_parse = value.split('$') | |
DBID_DIC ={} | |
for item in value_parse: | |
item_parse = item.split(';') | |
if item_parse[2]: | |
if item_parse[0] in DBID_DIC: | |
get_exist = DBID_DIC.get(item_parse[0]) | |
DBID_DIC[item_parse[0]] = get_exist+','+item_parse[2] | |
else: | |
DBID_DIC[item_parse[0]] = item_parse[2] | |
print ('IDENTIFIED ', len(DBID_DIC), ' Individuals from ', key) | |
out_f(DBID_DIC, key) | |
if __name__ == '__main__':main(sys.argv) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment