Created
April 29, 2020 14:01
-
-
Save rohitsuratekar/68183e689f31f029b6145ec77f880fc5 to your computer and use it in GitHub Desktop.
Searches through given peptide sequences to generate unique fragments and their counts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2020 | |
# Author: Rohit Suratekar | |
# | |
# This program finds unique peptides and their frequency in given peptide | |
# fragment data. | |
# | |
# IMPORTANT NOTE: | |
# This assumes there are NO Gaps in your peptide sequence | |
# Every peptide sequence has to be sequential without any gap. | |
# Your INPUT peptide data. | |
# Remember in python range(1,4) => 1,2,3 | |
peptides = [ | |
range(1, 11), | |
range(6, 14), | |
range(9, 12), | |
range(11, 17) | |
] | |
# Use "count_fragments" function to get unique fragments and their counts | |
def get_points(data): | |
""" | |
Converts peptides into their range | |
""" | |
return [(min(x), max(x)) for x in data] | |
def get_first_fragment(data): | |
""" | |
Generates fragment based on given range | |
""" | |
all_starts = [x[0] for x in data] | |
all_ends = [x[1] for x in data] | |
start = min(all_starts) # This is easy | |
# First check with other start points | |
end = [x for x in all_starts if x > start] | |
if len(end) == 0: | |
end = start - 1 | |
else: | |
end = min(end) - 1 | |
if end < start: | |
# All starts with same | |
# So Now check with all end points | |
end_extra = [x for x in all_ends if x > end] | |
if len(end_extra) != 0: | |
end = min(end_extra) | |
else: | |
end = start | |
return start, end | |
def remove_fragment(data, fragment): | |
""" | |
Removes Fragment from the point range | |
""" | |
new_data = [] | |
for d in data: | |
start, end = d | |
if d[0] <= fragment[1] < d[1]: | |
start = fragment[1] + 1 | |
if start > end: | |
start = fragment[1] | |
if (start, end) != fragment: | |
new_data.append((start, end)) | |
return new_data | |
def extract_fragments(data): | |
""" | |
Extracts all fragment from given range | |
""" | |
all_fragments = [] | |
points = get_points(data) | |
while len(points) != 0: | |
fg = get_first_fragment(points) | |
points = remove_fragment(points, fg) | |
all_fragments.append(fg) | |
return all_fragments | |
def count_fragments(data): | |
""" | |
Counts all fragments | |
:param data: Peptide sequence list | |
""" | |
fms = {x: 0 for x in extract_fragments(data)} | |
for d in get_points(data): | |
for x in fms: | |
start, end = x | |
if d[0] <= start and d[1] >= end: | |
fms[x] += 1 | |
print("Range ==> Count") | |
for x in fms: | |
print(f"{x[0]} to {x[1]} ==> {fms[x]}") | |
# Here is the main function to run | |
if __name__ == "__main__": | |
count_fragments(peptides) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment