Skip to content

Instantly share code, notes, and snippets.

@rohitsuratekar
Created April 29, 2020 14:01
Show Gist options
  • Save rohitsuratekar/68183e689f31f029b6145ec77f880fc5 to your computer and use it in GitHub Desktop.
Save rohitsuratekar/68183e689f31f029b6145ec77f880fc5 to your computer and use it in GitHub Desktop.
Searches through given peptide sequences to generate unique fragments and their counts.
# Copyright (c) 2020
# Author: Rohit Suratekar
#
# This program finds unique peptides and their frequency in given peptide
# fragment data.
#
# IMPORTANT NOTE:
# This assumes there are NO Gaps in your peptide sequence
# Every peptide sequence has to be sequential without any gap.
# Your INPUT peptide data.
# Remember in python range(1,4) => 1,2,3
peptides = [
range(1, 11),
range(6, 14),
range(9, 12),
range(11, 17)
]
# Use "count_fragments" function to get unique fragments and their counts
def get_points(data):
"""
Converts peptides into their range
"""
return [(min(x), max(x)) for x in data]
def get_first_fragment(data):
"""
Generates fragment based on given range
"""
all_starts = [x[0] for x in data]
all_ends = [x[1] for x in data]
start = min(all_starts) # This is easy
# First check with other start points
end = [x for x in all_starts if x > start]
if len(end) == 0:
end = start - 1
else:
end = min(end) - 1
if end < start:
# All starts with same
# So Now check with all end points
end_extra = [x for x in all_ends if x > end]
if len(end_extra) != 0:
end = min(end_extra)
else:
end = start
return start, end
def remove_fragment(data, fragment):
"""
Removes Fragment from the point range
"""
new_data = []
for d in data:
start, end = d
if d[0] <= fragment[1] < d[1]:
start = fragment[1] + 1
if start > end:
start = fragment[1]
if (start, end) != fragment:
new_data.append((start, end))
return new_data
def extract_fragments(data):
"""
Extracts all fragment from given range
"""
all_fragments = []
points = get_points(data)
while len(points) != 0:
fg = get_first_fragment(points)
points = remove_fragment(points, fg)
all_fragments.append(fg)
return all_fragments
def count_fragments(data):
"""
Counts all fragments
:param data: Peptide sequence list
"""
fms = {x: 0 for x in extract_fragments(data)}
for d in get_points(data):
for x in fms:
start, end = x
if d[0] <= start and d[1] >= end:
fms[x] += 1
print("Range ==> Count")
for x in fms:
print(f"{x[0]} to {x[1]} ==> {fms[x]}")
# Here is the main function to run
if __name__ == "__main__":
count_fragments(peptides)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment