chrisjurich/eterna_blended.py

## eterna_blended.py
def blended_eterna_score(row,scale_max=10):
    """Method that finds the blended, non-binary eterna score for a structure. Takes the row from a pandas df as input"""
    assert len(row["target_structure"]) == len(row["sequence"])
    # sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
    sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
    # data_len is the number of data points that will be reviewed
    data_len = min(
            len(row["target_structure"]),
            len(row["SHAPE_data"]), # can probably get rid of this one
            len(sequence),
            sum([1 for val in row["SHAPE_data"] if val > 0]) # making sure that the SHAPE is not 0
            )

    def unpaired_blend(pct):
        """Helper method that determines the score for an unpaired nt position"""
        if pct >=0 and pct <= 0.25:
            return 0.9*math.exp((pct-0.25)*30)
        elif pct > 0.25 and pct <= 1:
            return 1 - 0.1*math.exp((-pct+0.25)*20)
        else:
            raise TypeError("the value of {VAL} is invalid. unpaired_blend() takes a value on the range [0,1]".format(VAL=pct))

    def paired_blend(pct):
        """Helper method that determines the score for a paired nt position"""
        if pct >= 0 and pct <= 0.50:
            return 1 - 0.1*math.exp((pct-0.50)*10)
        elif pct <= 1:
            return 0.9*math.exp((-pct+0.50)*10)
        else:
            raise TypeError("The value of {VAL} is invalid. paired_blend() takes a values on the range [0,1]".format(VAL=pct))


    def scale(value,val_min,val_max):
        """Method that scales a value to a percentage where 0% is the value min and 100% is the max"""
        val_range = val_max - val_min
        scaled_val = (value - val_min)/val_range
        return min(max(0,scaled_val),1)

    scale_factor = scale_max/max(row["SHAPE_data"])
    shape_max = max(row["SHAPE_data"])
    shape_min = min(row["SHAPE_data"])
    score = 0
    for index in range(data_len):
        ss = row["target_structure"][index]
        shape = row["SHAPE_data"][index]*scale_factor
        error = row["SHAPE_error"][index]*scale_factor
        if ss == ".":
            score += unpaired_blend(
                scale(shape+error,shape_min,shape_max)
                    )
        elif ss != ".":
            score += paired_blend(
                scale(shape-error,shape_min,shape_max)
                    )
    return 100*score/data_len
	def blended_eterna_score(row,scale_max=10):
	"""Method that finds the blended, non-binary eterna score for a structure. Takes the row from a pandas df as input"""
	assert len(row["target_structure"]) == len(row["sequence"])
	# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
	sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
	# data_len is the number of data points that will be reviewed
	data_len = min(
	len(row["target_structure"]),
	len(row["SHAPE_data"]), # can probably get rid of this one
	len(sequence),
	sum([1 for val in row["SHAPE_data"] if val > 0]) # making sure that the SHAPE is not 0
	)

	def unpaired_blend(pct):
	"""Helper method that determines the score for an unpaired nt position"""
	if pct >=0 and pct <= 0.25:
	return 0.9math.exp((pct-0.25)30)
	elif pct > 0.25 and pct <= 1:
	return 1 - 0.1math.exp((-pct+0.25)20)
	else:
	raise TypeError("the value of {VAL} is invalid. unpaired_blend() takes a value on the range [0,1]".format(VAL=pct))

	def paired_blend(pct):
	"""Helper method that determines the score for a paired nt position"""
	if pct >= 0 and pct <= 0.50:
	return 1 - 0.1math.exp((pct-0.50)10)
	elif pct <= 1:
	return 0.9math.exp((-pct+0.50)10)
	else:
	raise TypeError("The value of {VAL} is invalid. paired_blend() takes a values on the range [0,1]".format(VAL=pct))


	def scale(value,val_min,val_max):
	"""Method that scales a value to a percentage where 0% is the value min and 100% is the max"""
	val_range = val_max - val_min
	scaled_val = (value - val_min)/val_range
	return min(max(0,scaled_val),1)

	scale_factor = scale_max/max(row["SHAPE_data"])
	shape_max = max(row["SHAPE_data"])
	shape_min = min(row["SHAPE_data"])
	score = 0
	for index in range(data_len):
	ss = row["target_structure"][index]
	shape = row["SHAPE_data"][index]*scale_factor
	error = row["SHAPE_error"][index]*scale_factor
	if ss == ".":
	score += unpaired_blend(
	scale(shape+error,shape_min,shape_max)
	)
	elif ss != ".":
	score += paired_blend(
	scale(shape-error,shape_min,shape_max)
	)
	return 100*score/data_len