Created
June 25, 2020 03:13
-
-
Save chrisjurich/7ba0aa8af2a241b37220e201c4b2437a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def blended_eterna_score(row,scale_max=10): | |
"""Method that finds the blended, non-binary eterna score for a structure. Takes the row from a pandas df as input""" | |
assert len(row["target_structure"]) == len(row["sequence"]) | |
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed | |
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"]) | |
# data_len is the number of data points that will be reviewed | |
data_len = min( | |
len(row["target_structure"]), | |
len(row["SHAPE_data"]), # can probably get rid of this one | |
len(sequence), | |
sum([1 for val in row["SHAPE_data"] if val > 0]) # making sure that the SHAPE is not 0 | |
) | |
def unpaired_blend(pct): | |
"""Helper method that determines the score for an unpaired nt position""" | |
if pct >=0 and pct <= 0.25: | |
return 0.9*math.exp((pct-0.25)*30) | |
elif pct > 0.25 and pct <= 1: | |
return 1 - 0.1*math.exp((-pct+0.25)*20) | |
else: | |
raise TypeError("the value of {VAL} is invalid. unpaired_blend() takes a value on the range [0,1]".format(VAL=pct)) | |
def paired_blend(pct): | |
"""Helper method that determines the score for a paired nt position""" | |
if pct >= 0 and pct <= 0.50: | |
return 1 - 0.1*math.exp((pct-0.50)*10) | |
elif pct <= 1: | |
return 0.9*math.exp((-pct+0.50)*10) | |
else: | |
raise TypeError("The value of {VAL} is invalid. paired_blend() takes a values on the range [0,1]".format(VAL=pct)) | |
def scale(value,val_min,val_max): | |
"""Method that scales a value to a percentage where 0% is the value min and 100% is the max""" | |
val_range = val_max - val_min | |
scaled_val = (value - val_min)/val_range | |
return min(max(0,scaled_val),1) | |
scale_factor = scale_max/max(row["SHAPE_data"]) | |
shape_max = max(row["SHAPE_data"]) | |
shape_min = min(row["SHAPE_data"]) | |
score = 0 | |
for index in range(data_len): | |
ss = row["target_structure"][index] | |
shape = row["SHAPE_data"][index]*scale_factor | |
error = row["SHAPE_error"][index]*scale_factor | |
if ss == ".": | |
score += unpaired_blend( | |
scale(shape+error,shape_min,shape_max) | |
) | |
elif ss != ".": | |
score += paired_blend( | |
scale(shape-error,shape_min,shape_max) | |
) | |
return 100*score/data_len |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment