Gormador/percentileComputations.py

## percentileComputations.py
##
#  For both approaches, "sortedDensities" (given as arg "densitiesList" in the dichotomy-based solution)
#    is a list of the reference dataset, from which are computed the corresponding percentile of each
#      "density" of another dataset.
#  The second function is ill-named, as it doens't return a percentile but the index of the "densitiesList" array
#    used to compute the "density" percentile.
##

##
#  Naive, brute-force approach.
#    You simply count how many values are smaller than the one for which you want to compute the percentile.
#
def getPercentileOfDensity(density):
    nbInf = 0

    for d in sortedDensities:
        if d < density:
            nbInf+=1
        else:
            break

    return (nbInf / len(sortedDensities)) * 100

##
#  Dichotomy-based, tail-recursive solution.
#
def getPercentileOfDensityRecTerm(density, densitiesList, lowerBound, higherBound):

    if (higherBound - lowerBound) <= 1:
        return lowerBound

    step = (lowerBound + higherBound)//2

    if densitiesList[step] == density:
        return step + 1

    elif densitiesList[step] < density:
        return getPercentileOfDensityRecTerm(density, densitiesList, step+1, higherBound)
    else:
        return getPercentileOfDensityRecTerm(density, densitiesList, lowerBound, step-1)
	##
	# For both approaches, "sortedDensities" (given as arg "densitiesList" in the dichotomy-based solution)
	# is a list of the reference dataset, from which are computed the corresponding percentile of each
	# "density" of another dataset.
	# The second function is ill-named, as it doens't return a percentile but the index of the "densitiesList" array
	# used to compute the "density" percentile.
	##

	##
	# Naive, brute-force approach.
	# You simply count how many values are smaller than the one for which you want to compute the percentile.
	#
	def getPercentileOfDensity(density):
	nbInf = 0

	for d in sortedDensities:
	if d < density:
	nbInf+=1
	else:
	break

	return (nbInf / len(sortedDensities)) * 100

	##
	# Dichotomy-based, tail-recursive solution.
	#
	def getPercentileOfDensityRecTerm(density, densitiesList, lowerBound, higherBound):

	if (higherBound - lowerBound) <= 1:
	return lowerBound

	step = (lowerBound + higherBound)//2

	if densitiesList[step] == density:
	return step + 1

	elif densitiesList[step] < density:
	return getPercentileOfDensityRecTerm(density, densitiesList, step+1, higherBound)
	else:
	return getPercentileOfDensityRecTerm(density, densitiesList, lowerBound, step-1)