CreamyCookie/smoothed_duration_weighted_average_rating.py

## smoothed_duration_weighted_average_rating.py
from dataclasses import dataclass


def hyperbolic_smoother(x, n = 10.0):
    """n determines how strongly smaller values will be magnified"""
    return (n + 1) * x / (n * x + 1)


@dataclass
class Track:
    duration_seconds: int
    star_rating: int


# if this is 4 valid ratings are: 0, 1, 2, 3, 4
MAX_RATING = 4


# for experiments
WEIGHTED_INTERPOLATION_RATIO = 0.3
UNWEIGHTED_INTERPOLATION_RATIO = 1 - WEIGHTED_INTERPOLATION_RATIO

# Limits a weight to this value * (1 / len(tracks))
MAX_WEIGHT_FACTOR = 1.2
MIN_WEIGHT_FACTOR = 1 / MAX_WEIGHT_FACTOR


examples = [
    [Track(10, 4), Track(100, 2), Track(1000, 1), Track(120, 1), Track(160, 3),
     Track(9000, 4), Track(200, 0)],
    [Track(120, 2), Track(240, 4), Track(600, 0)],
    [Track(120, 4), Track(140, 2), Track(210, 3), Track(200, 4), Track(180, 4),
     Track(35 * 60, 0)],
    [Track(120, 4), Track(240, 4), Track(1200, 0), Track(1600, 0)],
    [Track(299, 4), Track(265, 3), Track(391, 4), Track(250, 3), Track(325, 4), Track(195, 3), Track(346, 4), Track(347, 3), Track(317, 2), Track(407, 3), Track(346, 3), Track(239, 4)],
]


print("The following (duration-based) weights represent the ratio / percentage"
    " a tracks'\nrating makes up of the album's average rating (which is in "
    "the TOTAL row).")
print()


for tracks in examples:
    total_duration = float(sum(i.duration_seconds for i in tracks))

    mean_weight = 1 / len(tracks)
    mean_duration = total_duration / len(tracks)
    max_duration = mean_duration * MAX_WEIGHT_FACTOR


    smoothed_duration_weights = []
    total_weight = 0

    for t in tracks:
        dur = t.duration_seconds

        # experiment: directly limit the weight a single track can have
        # fail: often results in very flat weights (and feels wrong)
        #if dur > max_duration:
        #    dur = max_duration

        duration_ratio = dur / total_duration
        weight = hyperbolic_smoother(duration_ratio)

        # experiment: interpolate between 1 / len(tracks) and duration / total
        # fail: when long track exists, the rest look very similar
        #weight = (UNWEIGHTED_INTERPOLATION_RATIO * mean_weight) + (
        #    WEIGHTED_INTERPOLATION_RATIO * duration_ratio)

        total_weight += weight
        smoothed_duration_weights.append(weight)

    print("                                                                          simple")
    print("                                      smoothed                             ratio")
    print("                                      partial             simple         partial")
    print("                          smoothed    weighted            ratio         weighted")
    print("rating      minutes   ➜   weight      rating    VERSUS    weight          rating")
    print('-' * 80)

    average_smoothed_rating = 0
    average_simple_rating = 0
    sum_of_ratings = 0

    for track, weight in zip(tracks, smoothed_duration_weights):
        star_rating = track.star_rating
        dur = track.duration_seconds

        smoothed_ratio = weight / total_weight
        simple_ratio = dur / total_duration

        float_rating = star_rating / MAX_RATING
        sum_of_ratings += float_rating

        wr = smoothed_ratio * float_rating
        sr = simple_ratio * float_rating

        average_simple_rating += sr
        average_smoothed_rating += wr

        print(f"{star_rating}★ = {float_rating:1.2f}   {dur / 60:>6.1f}        "
            f"{smoothed_ratio:>6.1%}      {wr:.4f}              "
            f"{simple_ratio:>6.1%}          {sr:.4f}")

    print('-' * 80)
    print(f"TOTAL:      {total_duration / 60:>6.1f}                    "
        f"{average_smoothed_rating:.4f}                              "
        f"{average_simple_rating:.4f}")

    print()
    old_rating = sum_of_ratings / len(tracks)
    print(f"old (unweighted) average rating: {old_rating:.4f}")
    print(f"old (unweighted) weight (1/len): {mean_weight:6.1%}")
    print()

## terminal output.txt
The following (duration-based) weights represent the ratio / percentage a tracks'
rating makes up of the album's average rating (which is in the TOTAL row).

                                                                          simple
                                      smoothed                             ratio
                                      partial             simple         partial
                          smoothed    weighted            ratio         weighted
rating      minutes   ➜   weight      rating    VERSUS    weight          rating
--------------------------------------------------------------------------------
4★ = 1.00      0.2          0.5%      0.0050                0.1%          0.0009
2★ = 0.50      1.7          4.6%      0.0231                0.9%          0.0047
1★ = 0.25     16.7         26.0%      0.0650                9.4%          0.0236
1★ = 0.25      2.0          5.4%      0.0136                1.1%          0.0028
3★ = 0.75      2.7          7.0%      0.0527                1.5%          0.0113
4★ = 1.00    150.0         47.9%      0.4790               85.0%          0.8499
0★ = 0.00      3.3          8.5%      0.0000                1.9%          0.0000
--------------------------------------------------------------------------------
TOTAL:       176.5                    0.6384                              0.8933

old (unweighted) average rating: 0.5357
old (unweighted) weight (1/len):  14.3%

                                                                          simple
                                      smoothed                             ratio
                                      partial             simple         partial
                          smoothed    weighted            ratio         weighted
rating      minutes   ➜   weight      rating    VERSUS    weight          rating
--------------------------------------------------------------------------------
2★ = 0.50      2.0         26.1%      0.1303               12.5%          0.0625
4★ = 1.00      4.0         33.5%      0.3350               25.0%          0.2500
0★ = 0.00     10.0         40.4%      0.0000               62.5%          0.0000
--------------------------------------------------------------------------------
TOTAL:        16.0                    0.4653                              0.3125

old (unweighted) average rating: 0.5000
old (unweighted) weight (1/len):  33.3%

                                                                          simple
                                      smoothed                             ratio
                                      partial             simple         partial
                          smoothed    weighted            ratio         weighted
rating      minutes   ➜   weight      rating    VERSUS    weight          rating
--------------------------------------------------------------------------------
4★ = 1.00      2.0         10.8%      0.1076                4.1%          0.0407
2★ = 0.50      2.3         12.0%      0.0599                4.7%          0.0237
3★ = 0.75      3.5         15.5%      0.1161                7.1%          0.0534
4★ = 1.00      3.3         15.0%      0.1504                6.8%          0.0678
4★ = 1.00      3.0         14.1%      0.1410                6.1%          0.0610
0★ = 0.00     35.0         32.6%      0.0000               71.2%          0.0000
--------------------------------------------------------------------------------
TOTAL:        49.2                    0.5750                              0.2466

old (unweighted) average rating: 0.7083
old (unweighted) weight (1/len):  16.7%

                                                                          simple
                                      smoothed                             ratio
                                      partial             simple         partial
                          smoothed    weighted            ratio         weighted
rating      minutes   ➜   weight      rating    VERSUS    weight          rating
--------------------------------------------------------------------------------
4★ = 1.00      2.0         11.8%      0.1179                3.8%          0.0380
4★ = 1.00      4.0         18.5%      0.1850                7.6%          0.0759
0★ = 0.00     20.0         33.9%      0.0000               38.0%          0.0000
0★ = 0.00     26.7         35.8%      0.0000               50.6%          0.0000
--------------------------------------------------------------------------------
TOTAL:        52.7                    0.3029                              0.1139

old (unweighted) average rating: 0.5000
old (unweighted) weight (1/len):  25.0%

                                                                          simple
                                      smoothed                             ratio
                                      partial             simple         partial
                          smoothed    weighted            ratio         weighted
rating      minutes   ➜   weight      rating    VERSUS    weight          rating
--------------------------------------------------------------------------------
4★ = 1.00      5.0          8.2%      0.0824                8.0%          0.0802
3★ = 0.75      4.4          7.7%      0.0577                7.1%          0.0533
4★ = 1.00      6.5          9.5%      0.0948               10.5%          0.1049
3★ = 0.75      4.2          7.4%      0.0558                6.7%          0.0503
4★ = 1.00      5.4          8.6%      0.0862                8.7%          0.0872
3★ = 0.75      3.2          6.4%      0.0477                5.2%          0.0392
4★ = 1.00      5.8          8.9%      0.0891                9.3%          0.0928
3★ = 0.75      5.8          8.9%      0.0670                9.3%          0.0698
2★ = 0.50      5.3          8.5%      0.0426                8.5%          0.0425
3★ = 0.75      6.8          9.7%      0.0725               10.9%          0.0819
3★ = 0.75      5.8          8.9%      0.0669                9.3%          0.0696
4★ = 1.00      4.0          7.2%      0.0723                6.4%          0.0641
--------------------------------------------------------------------------------
TOTAL:        62.1                    0.8350                              0.8361

old (unweighted) average rating: 0.8333
old (unweighted) weight (1/len):   8.3%
	from dataclasses import dataclass


	def hyperbolic_smoother(x, n = 10.0):
	"""n determines how strongly smaller values will be magnified"""
	return (n + 1) * x / (n * x + 1)


	@dataclass
	class Track:
	duration_seconds: int
	star_rating: int


	# if this is 4 valid ratings are: 0, 1, 2, 3, 4
	MAX_RATING = 4


	# for experiments
	WEIGHTED_INTERPOLATION_RATIO = 0.3
	UNWEIGHTED_INTERPOLATION_RATIO = 1 - WEIGHTED_INTERPOLATION_RATIO

	# Limits a weight to this value * (1 / len(tracks))
	MAX_WEIGHT_FACTOR = 1.2
	MIN_WEIGHT_FACTOR = 1 / MAX_WEIGHT_FACTOR


	examples = [
	[Track(10, 4), Track(100, 2), Track(1000, 1), Track(120, 1), Track(160, 3),
	Track(9000, 4), Track(200, 0)],
	[Track(120, 2), Track(240, 4), Track(600, 0)],
	[Track(120, 4), Track(140, 2), Track(210, 3), Track(200, 4), Track(180, 4),
	Track(35 * 60, 0)],
	[Track(120, 4), Track(240, 4), Track(1200, 0), Track(1600, 0)],
	[Track(299, 4), Track(265, 3), Track(391, 4), Track(250, 3), Track(325, 4), Track(195, 3), Track(346, 4), Track(347, 3), Track(317, 2), Track(407, 3), Track(346, 3), Track(239, 4)],
	]


	print("The following (duration-based) weights represent the ratio / percentage"
	" a tracks'\nrating makes up of the album's average rating (which is in "
	"the TOTAL row).")
	print()


	for tracks in examples:
	total_duration = float(sum(i.duration_seconds for i in tracks))

	mean_weight = 1 / len(tracks)
	mean_duration = total_duration / len(tracks)
	max_duration = mean_duration * MAX_WEIGHT_FACTOR


	smoothed_duration_weights = []
	total_weight = 0

	for t in tracks:
	dur = t.duration_seconds

	# experiment: directly limit the weight a single track can have
	# fail: often results in very flat weights (and feels wrong)
	#if dur > max_duration:
	# dur = max_duration

	duration_ratio = dur / total_duration
	weight = hyperbolic_smoother(duration_ratio)

	# experiment: interpolate between 1 / len(tracks) and duration / total
	# fail: when long track exists, the rest look very similar
	#weight = (UNWEIGHTED_INTERPOLATION_RATIO * mean_weight) + (
	# WEIGHTED_INTERPOLATION_RATIO * duration_ratio)

	total_weight += weight
	smoothed_duration_weights.append(weight)

	print(" simple")
	print(" smoothed ratio")
	print(" partial simple partial")
	print(" smoothed weighted ratio weighted")
	print("rating minutes ➜ weight rating VERSUS weight rating")
	print('-' * 80)

	average_smoothed_rating = 0
	average_simple_rating = 0
	sum_of_ratings = 0

	for track, weight in zip(tracks, smoothed_duration_weights):
	star_rating = track.star_rating
	dur = track.duration_seconds

	smoothed_ratio = weight / total_weight
	simple_ratio = dur / total_duration

	float_rating = star_rating / MAX_RATING
	sum_of_ratings += float_rating

	wr = smoothed_ratio * float_rating
	sr = simple_ratio * float_rating

	average_simple_rating += sr
	average_smoothed_rating += wr

	print(f"{star_rating}★ = {float_rating:1.2f} {dur / 60:>6.1f} "
	f"{smoothed_ratio:>6.1%} {wr:.4f} "
	f"{simple_ratio:>6.1%} {sr:.4f}")

	print('-' * 80)
	print(f"TOTAL: {total_duration / 60:>6.1f} "
	f"{average_smoothed_rating:.4f} "
	f"{average_simple_rating:.4f}")

	print()
	old_rating = sum_of_ratings / len(tracks)
	print(f"old (unweighted) average rating: {old_rating:.4f}")
	print(f"old (unweighted) weight (1/len): {mean_weight:6.1%}")
	print()
	The following (duration-based) weights represent the ratio / percentage a tracks'
	rating makes up of the album's average rating (which is in the TOTAL row).

	simple
	smoothed ratio
	partial simple partial
	smoothed weighted ratio weighted
	rating minutes ➜ weight rating VERSUS weight rating
	--------------------------------------------------------------------------------
	4★ = 1.00 0.2 0.5% 0.0050 0.1% 0.0009
	2★ = 0.50 1.7 4.6% 0.0231 0.9% 0.0047
	1★ = 0.25 16.7 26.0% 0.0650 9.4% 0.0236
	1★ = 0.25 2.0 5.4% 0.0136 1.1% 0.0028
	3★ = 0.75 2.7 7.0% 0.0527 1.5% 0.0113
	4★ = 1.00 150.0 47.9% 0.4790 85.0% 0.8499
	0★ = 0.00 3.3 8.5% 0.0000 1.9% 0.0000
	--------------------------------------------------------------------------------
	TOTAL: 176.5 0.6384 0.8933

	old (unweighted) average rating: 0.5357
	old (unweighted) weight (1/len): 14.3%

	simple
	smoothed ratio
	partial simple partial
	smoothed weighted ratio weighted
	rating minutes ➜ weight rating VERSUS weight rating
	--------------------------------------------------------------------------------
	2★ = 0.50 2.0 26.1% 0.1303 12.5% 0.0625
	4★ = 1.00 4.0 33.5% 0.3350 25.0% 0.2500
	0★ = 0.00 10.0 40.4% 0.0000 62.5% 0.0000
	--------------------------------------------------------------------------------
	TOTAL: 16.0 0.4653 0.3125

	old (unweighted) average rating: 0.5000
	old (unweighted) weight (1/len): 33.3%

	simple
	smoothed ratio
	partial simple partial
	smoothed weighted ratio weighted
	rating minutes ➜ weight rating VERSUS weight rating
	--------------------------------------------------------------------------------
	4★ = 1.00 2.0 10.8% 0.1076 4.1% 0.0407
	2★ = 0.50 2.3 12.0% 0.0599 4.7% 0.0237
	3★ = 0.75 3.5 15.5% 0.1161 7.1% 0.0534
	4★ = 1.00 3.3 15.0% 0.1504 6.8% 0.0678
	4★ = 1.00 3.0 14.1% 0.1410 6.1% 0.0610
	0★ = 0.00 35.0 32.6% 0.0000 71.2% 0.0000
	--------------------------------------------------------------------------------
	TOTAL: 49.2 0.5750 0.2466

	old (unweighted) average rating: 0.7083
	old (unweighted) weight (1/len): 16.7%

	simple
	smoothed ratio
	partial simple partial
	smoothed weighted ratio weighted
	rating minutes ➜ weight rating VERSUS weight rating
	--------------------------------------------------------------------------------
	4★ = 1.00 2.0 11.8% 0.1179 3.8% 0.0380
	4★ = 1.00 4.0 18.5% 0.1850 7.6% 0.0759
	0★ = 0.00 20.0 33.9% 0.0000 38.0% 0.0000
	0★ = 0.00 26.7 35.8% 0.0000 50.6% 0.0000
	--------------------------------------------------------------------------------
	TOTAL: 52.7 0.3029 0.1139

	old (unweighted) average rating: 0.5000
	old (unweighted) weight (1/len): 25.0%

	simple
	smoothed ratio
	partial simple partial
	smoothed weighted ratio weighted
	rating minutes ➜ weight rating VERSUS weight rating
	--------------------------------------------------------------------------------
	4★ = 1.00 5.0 8.2% 0.0824 8.0% 0.0802
	3★ = 0.75 4.4 7.7% 0.0577 7.1% 0.0533
	4★ = 1.00 6.5 9.5% 0.0948 10.5% 0.1049
	3★ = 0.75 4.2 7.4% 0.0558 6.7% 0.0503
	4★ = 1.00 5.4 8.6% 0.0862 8.7% 0.0872
	3★ = 0.75 3.2 6.4% 0.0477 5.2% 0.0392
	4★ = 1.00 5.8 8.9% 0.0891 9.3% 0.0928
	3★ = 0.75 5.8 8.9% 0.0670 9.3% 0.0698
	2★ = 0.50 5.3 8.5% 0.0426 8.5% 0.0425
	3★ = 0.75 6.8 9.7% 0.0725 10.9% 0.0819
	3★ = 0.75 5.8 8.9% 0.0669 9.3% 0.0696
	4★ = 1.00 4.0 7.2% 0.0723 6.4% 0.0641
	--------------------------------------------------------------------------------
	TOTAL: 62.1 0.8350 0.8361

	old (unweighted) average rating: 0.8333
	old (unweighted) weight (1/len): 8.3%