Skip to content

Instantly share code, notes, and snippets.

View jamessdixon's full-sized avatar

Jamie Dixon jamessdixon

View GitHub Profile
file_name = 'clusters_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'mz'+ '\t'+ 'charge' + "\n"
t.write(header)
for y_cluster in y_clusters:
item = str(y_cluster.score)+ '\t' + str(y_cluster.pid)+ '\t' + str(y_cluster.start)+ '\t' + str(y_cluster.end)+ '\t' + str(y_cluster.mz)+ '\t' + str(y_cluster.charge) + "\n"
with open(file_name, 'a') as t:
t.write(item)
file_name = 'postprocessed_alignments.csv'
with open(file_name, 'w') as t:
header = 'label'+ '\t'+ 'left_protein'+ '\t'+ 'right_protein'+ '\t'+ 'sequence' + '\t'+ 'b_score' + '\t'+ 'y_score' + '\t'+ 'total_score' + '\t'+ 'precursor_distance' + '\t'+ 'extended_sequence' + '\t'+ 'alignment' + '\n'
t.write(header)
for postprocessed_alignment in postprocessed_alignments:
label, left_protein, right_protein, sequence, b_score, y_score, total_score, precursor_distance, extended_sequence, alignment = postprocessed_alignment
item = str(label)+ '\t' + str(left_protein)+ '\t' + str(right_protein)+ '\t' + str(sequence)+ str(b_score)+ '\t' + str(y_score)+ '\t' + str(total_score)+ '\t' + str(precursor_distance)+ '\t' + str(extended_sequence)+ '\t' + str(alignment)+ '\t' +'\n'
with open(file_name, 'a') as t:
t.write(item)
file_name = 'rescored_alignments.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'inv_dist'+ '\t'+ 'comb_seq'+ '\t'+ 'flavor' + '\n'
t.write(header)
for rescored_alignment in rescored_alignments:
score, inv_dist, comb_seq, flavor = rescored_alignment
item = str(score)+ '\t' + str(inv_dist)+ '\t' + str(comb_seq)+ '\t' + str(flavor)+ '\n'
with open(file_name, 'a') as t:
t.write(item)
def write_rescored_to_disk(rescoreds,flavor):
file_name = 'rescored_naturals_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'inv_dist'+ '\t'+ 'comb_seq'+ '\t'+ 'flavor' + '\n'
t.write(header)
for rescored in rescoreds:
score, inv_dist, comb_seq, flavor = rescored
item = str(score)+ '\t' + str(inv_dist)+ '\t' + str(comb_seq)+ '\t' + str(flavor)+ '\n'
with open(file_name, 'a') as t:
t.write(item)
file_name = 'alignments_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'b_mz'+ '\t'+ 'b_start'+ '\t'+ 'b_end'+ '\t'+ 'b_ion'+ '\t'+ 'b_charge'+ '\t'+ 'b_pid'+ 'b_score'+ '\t'+ 'y_mz'+ '\t'+ 'y_start'+ '\t'+ 'y_end'+ '\t'+ 'y_ion'+ '\t'+ 'y_charge'+ '\t'+ 'y_pid'+ '\t'+ 'y_score'+ '\n'
t.write(header)
for alignment in alignments:
b,y= alignment
b_mass, b_start, b_end, b_ion, b_charge, b_pid, b_score = b
y_mass, y_start, y_end, y_ion, y_charge, y_pid, y_score = y
item = str(b_mass)+ '\t' + str(b_start)+ '\t' + str(b_end)+ '\t' + str(b_ion)+ '\t' + str(b_charge) + '\t' + str(b_pid) + '\t' + str(b_score) + str(y_mass)+ '\t' + str(y_start)+ '\t' + str(y_end)+ '\t' + str(y_ion)+ '\t' + str(y_charge) + '\t' + str(y_pid) + '\t' + str(y_score) + '\n'
with open(file_name, 'a') as t:
def write_merged_hybrids_to_disk(merged_hybrids):
file_name = 'merged_hybrids_A.csv'
with open(file_name, 'w') as t:
header = 'total_score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'score'+ '\t'+ 'mz'+ '\t' + 'charge'+ '\t'+ 'extensions'+ '\t' + 'seq'+ '\n'
t.write(header)
for merged_hybrid in merged_hybrids:
total_score,b,y= merged_hybrid
b_pid,b_start,b_end,b_score,b_mz,b_charge,b_extensions,b_seq = b
item = str(total_score)+ '\t' + str(b_pid)+ '\t' + str(b_start)+ '\t' + str(b_end)+ '\t' + str(b_score) + '\t' + str(b_mz) + '\t' + str(b_charge) + '\t' + str(b_extensions) + '\t' + str(b_seq) + '\n'
with open(file_name, 'a') as t:
def write_merged_seqs_to_disk(merged_seqs):
file_name = 'merged_seqs_.csv'
with open(file_name, 'w') as t:
header = 'total_score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'score'+ '\t'+ 'mz'+ '\t' + 'charge'+ '\t'+ 'extensions'+ '\t' + 'seq'+ '\n'
t.write(header)
for merged_seq in merged_seqs:
total_score,b,y= merged_seq
b_pid,b_start,b_end,b_score,b_mz,b_charge,b_extensions,b_seq = b
item = str(total_score)+ '\t' + str(b_pid)+ '\t' + str(b_start)+ '\t' + str(b_end)+ '\t' + str(b_score) + '\t' + str(b_mz) + '\t' + str(b_charge) + '\t' + str(b_extensions) + '\t' + str(b_seq) + '\n'
with open(file_name, 'a') as t:
def write_sorted_clusters_to_disk(sorted_clusters, flavor):
file_name = 'sorted_clusters_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'score'+ '\t'+ 'pid'+ '\t'+ 'start'+ '\t'+ 'end'+ '\t'+ 'mz'+ '\t'+ 'charge'+ '\t'+ 'extensions'+ "\t" + 'seq'+ "\n"
t.write(header)
for sorted_cluster in sorted_clusters:
item = str(sorted_cluster.score)+ '\t' + str(sorted_cluster.pid)+ '\t' + str(sorted_cluster.start)+ '\t' + str(sorted_cluster.end)+ '\t' + str(sorted_cluster.mz)+ '\t' + str(sorted_cluster.charge) + '\t' + str(sorted_cluster.extensions) + '\t' + str(sorted_cluster.seq) + "\n"
with open(file_name, 'a') as t:
t.write(item)
@jamessdixon
jamessdixon / high_order_hypedsearch
Created May 30, 2023 12:50
Using Higher Order Functions In Hypedsearch
#original
def do_fourth_thing(spectrum, b_sorted_clusters, y_sorted_clusters, prec_tol):
start_time = time.time()
hybrid_merged = clustering.get_hybrid_matches(b_sorted_clusters, y_sorted_clusters, spectrum.precursor_mass, prec_tol, spectrum.precursor_charge)
end_time = time.time() - start_time
with open('Timing_data.txt', 'a') as t:
t.write("Finding hybrid merges took:" + '\t' + str(end_time) + "\n")
return hybrid_merged
#reduces to
@jamessdixon
jamessdixon / matched_hits_print
Created May 29, 2023 01:53
print out matched_hits
def write_masses_to_disk(matched_masses, flavor):
file_name = 'matched_masses_' + flavor + '.csv'
with open(file_name, 'w') as t:
header = 'precursor_weight'+ '\t'+ 'matched_weight'+ '\t'+ 'start_index'+ '\t'+ 'end_index'+ '\t'+ 'b_ion'+ '\t'+ 'single_charge'+ '\t'+ 'protein_number'+ "\n"
t.write(header)
for key, values in matched_masses.items():
for value in values:
first,second,third,fourth,fifth,sixth = value
item = str(key) + '\t' + str(first)+ '\t' + str(second)+ '\t' + str(third)+ '\t' + str(fourth)+ '\t' + str(fifth)+ '\t' + str(sixth) + "\n"
with open(file_name, 'a') as t: