Skip to content

Instantly share code, notes, and snippets.

@chrisjurich
Created June 25, 2020 03:13
Show Gist options
  • Save chrisjurich/7ba0aa8af2a241b37220e201c4b2437a to your computer and use it in GitHub Desktop.
Save chrisjurich/7ba0aa8af2a241b37220e201c4b2437a to your computer and use it in GitHub Desktop.
def blended_eterna_score(row,scale_max=10):
"""Method that finds the blended, non-binary eterna score for a structure. Takes the row from a pandas df as input"""
assert len(row["target_structure"]) == len(row["sequence"])
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
# data_len is the number of data points that will be reviewed
data_len = min(
len(row["target_structure"]),
len(row["SHAPE_data"]), # can probably get rid of this one
len(sequence),
sum([1 for val in row["SHAPE_data"] if val > 0]) # making sure that the SHAPE is not 0
)
def unpaired_blend(pct):
"""Helper method that determines the score for an unpaired nt position"""
if pct >=0 and pct <= 0.25:
return 0.9*math.exp((pct-0.25)*30)
elif pct > 0.25 and pct <= 1:
return 1 - 0.1*math.exp((-pct+0.25)*20)
else:
raise TypeError("the value of {VAL} is invalid. unpaired_blend() takes a value on the range [0,1]".format(VAL=pct))
def paired_blend(pct):
"""Helper method that determines the score for a paired nt position"""
if pct >= 0 and pct <= 0.50:
return 1 - 0.1*math.exp((pct-0.50)*10)
elif pct <= 1:
return 0.9*math.exp((-pct+0.50)*10)
else:
raise TypeError("The value of {VAL} is invalid. paired_blend() takes a values on the range [0,1]".format(VAL=pct))
def scale(value,val_min,val_max):
"""Method that scales a value to a percentage where 0% is the value min and 100% is the max"""
val_range = val_max - val_min
scaled_val = (value - val_min)/val_range
return min(max(0,scaled_val),1)
scale_factor = scale_max/max(row["SHAPE_data"])
shape_max = max(row["SHAPE_data"])
shape_min = min(row["SHAPE_data"])
score = 0
for index in range(data_len):
ss = row["target_structure"][index]
shape = row["SHAPE_data"][index]*scale_factor
error = row["SHAPE_error"][index]*scale_factor
if ss == ".":
score += unpaired_blend(
scale(shape+error,shape_min,shape_max)
)
elif ss != ".":
score += paired_blend(
scale(shape-error,shape_min,shape_max)
)
return 100*score/data_len
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment