Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from dataclasses import dataclass
def hyperbolic_smoother(x, n = 10.0):
"""n determines how strongly smaller values will be magnified"""
return (n + 1) * x / (n * x + 1)
@dataclass
class Track:
duration_seconds: int
star_rating: int
# if this is 4 valid ratings are: 0, 1, 2, 3, 4
MAX_RATING = 4
# for experiments
WEIGHTED_INTERPOLATION_RATIO = 0.3
UNWEIGHTED_INTERPOLATION_RATIO = 1 - WEIGHTED_INTERPOLATION_RATIO
# Limits a weight to this value * (1 / len(tracks))
MAX_WEIGHT_FACTOR = 1.2
MIN_WEIGHT_FACTOR = 1 / MAX_WEIGHT_FACTOR
examples = [
[Track(10, 4), Track(100, 2), Track(1000, 1), Track(120, 1), Track(160, 3),
Track(9000, 4), Track(200, 0)],
[Track(120, 2), Track(240, 4), Track(600, 0)],
[Track(120, 4), Track(140, 2), Track(210, 3), Track(200, 4), Track(180, 4),
Track(35 * 60, 0)],
[Track(120, 4), Track(240, 4), Track(1200, 0), Track(1600, 0)],
[Track(299, 4), Track(265, 3), Track(391, 4), Track(250, 3), Track(325, 4), Track(195, 3), Track(346, 4), Track(347, 3), Track(317, 2), Track(407, 3), Track(346, 3), Track(239, 4)],
]
print("The following (duration-based) weights represent the ratio / percentage"
" a tracks'\nrating makes up of the album's average rating (which is in "
"the TOTAL row).")
print()
for tracks in examples:
total_duration = float(sum(i.duration_seconds for i in tracks))
mean_weight = 1 / len(tracks)
mean_duration = total_duration / len(tracks)
max_duration = mean_duration * MAX_WEIGHT_FACTOR
smoothed_duration_weights = []
total_weight = 0
for t in tracks:
dur = t.duration_seconds
# experiment: directly limit the weight a single track can have
# fail: often results in very flat weights (and feels wrong)
#if dur > max_duration:
# dur = max_duration
duration_ratio = dur / total_duration
weight = hyperbolic_smoother(duration_ratio)
# experiment: interpolate between 1 / len(tracks) and duration / total
# fail: when long track exists, the rest look very similar
#weight = (UNWEIGHTED_INTERPOLATION_RATIO * mean_weight) + (
# WEIGHTED_INTERPOLATION_RATIO * duration_ratio)
total_weight += weight
smoothed_duration_weights.append(weight)
print(" simple")
print(" smoothed ratio")
print(" partial simple partial")
print(" smoothed weighted ratio weighted")
print("rating minutes ➜ weight rating VERSUS weight rating")
print('-' * 80)
average_smoothed_rating = 0
average_simple_rating = 0
sum_of_ratings = 0
for track, weight in zip(tracks, smoothed_duration_weights):
star_rating = track.star_rating
dur = track.duration_seconds
smoothed_ratio = weight / total_weight
simple_ratio = dur / total_duration
float_rating = star_rating / MAX_RATING
sum_of_ratings += float_rating
wr = smoothed_ratio * float_rating
sr = simple_ratio * float_rating
average_simple_rating += sr
average_smoothed_rating += wr
print(f"{star_rating}★ = {float_rating:1.2f} {dur / 60:>6.1f} "
f"{smoothed_ratio:>6.1%} {wr:.4f} "
f"{simple_ratio:>6.1%} {sr:.4f}")
print('-' * 80)
print(f"TOTAL: {total_duration / 60:>6.1f} "
f"{average_smoothed_rating:.4f} "
f"{average_simple_rating:.4f}")
print()
old_rating = sum_of_ratings / len(tracks)
print(f"old (unweighted) average rating: {old_rating:.4f}")
print(f"old (unweighted) weight (1/len): {mean_weight:6.1%}")
print()
The following (duration-based) weights represent the ratio / percentage a tracks'
rating makes up of the album's average rating (which is in the TOTAL row).
simple
smoothed ratio
partial simple partial
smoothed weighted ratio weighted
rating minutes ➜ weight rating VERSUS weight rating
--------------------------------------------------------------------------------
4★ = 1.00 0.2 0.5% 0.0050 0.1% 0.0009
2★ = 0.50 1.7 4.6% 0.0231 0.9% 0.0047
1★ = 0.25 16.7 26.0% 0.0650 9.4% 0.0236
1★ = 0.25 2.0 5.4% 0.0136 1.1% 0.0028
3★ = 0.75 2.7 7.0% 0.0527 1.5% 0.0113
4★ = 1.00 150.0 47.9% 0.4790 85.0% 0.8499
0★ = 0.00 3.3 8.5% 0.0000 1.9% 0.0000
--------------------------------------------------------------------------------
TOTAL: 176.5 0.6384 0.8933
old (unweighted) average rating: 0.5357
old (unweighted) weight (1/len): 14.3%
simple
smoothed ratio
partial simple partial
smoothed weighted ratio weighted
rating minutes ➜ weight rating VERSUS weight rating
--------------------------------------------------------------------------------
2★ = 0.50 2.0 26.1% 0.1303 12.5% 0.0625
4★ = 1.00 4.0 33.5% 0.3350 25.0% 0.2500
0★ = 0.00 10.0 40.4% 0.0000 62.5% 0.0000
--------------------------------------------------------------------------------
TOTAL: 16.0 0.4653 0.3125
old (unweighted) average rating: 0.5000
old (unweighted) weight (1/len): 33.3%
simple
smoothed ratio
partial simple partial
smoothed weighted ratio weighted
rating minutes ➜ weight rating VERSUS weight rating
--------------------------------------------------------------------------------
4★ = 1.00 2.0 10.8% 0.1076 4.1% 0.0407
2★ = 0.50 2.3 12.0% 0.0599 4.7% 0.0237
3★ = 0.75 3.5 15.5% 0.1161 7.1% 0.0534
4★ = 1.00 3.3 15.0% 0.1504 6.8% 0.0678
4★ = 1.00 3.0 14.1% 0.1410 6.1% 0.0610
0★ = 0.00 35.0 32.6% 0.0000 71.2% 0.0000
--------------------------------------------------------------------------------
TOTAL: 49.2 0.5750 0.2466
old (unweighted) average rating: 0.7083
old (unweighted) weight (1/len): 16.7%
simple
smoothed ratio
partial simple partial
smoothed weighted ratio weighted
rating minutes ➜ weight rating VERSUS weight rating
--------------------------------------------------------------------------------
4★ = 1.00 2.0 11.8% 0.1179 3.8% 0.0380
4★ = 1.00 4.0 18.5% 0.1850 7.6% 0.0759
0★ = 0.00 20.0 33.9% 0.0000 38.0% 0.0000
0★ = 0.00 26.7 35.8% 0.0000 50.6% 0.0000
--------------------------------------------------------------------------------
TOTAL: 52.7 0.3029 0.1139
old (unweighted) average rating: 0.5000
old (unweighted) weight (1/len): 25.0%
simple
smoothed ratio
partial simple partial
smoothed weighted ratio weighted
rating minutes ➜ weight rating VERSUS weight rating
--------------------------------------------------------------------------------
4★ = 1.00 5.0 8.2% 0.0824 8.0% 0.0802
3★ = 0.75 4.4 7.7% 0.0577 7.1% 0.0533
4★ = 1.00 6.5 9.5% 0.0948 10.5% 0.1049
3★ = 0.75 4.2 7.4% 0.0558 6.7% 0.0503
4★ = 1.00 5.4 8.6% 0.0862 8.7% 0.0872
3★ = 0.75 3.2 6.4% 0.0477 5.2% 0.0392
4★ = 1.00 5.8 8.9% 0.0891 9.3% 0.0928
3★ = 0.75 5.8 8.9% 0.0670 9.3% 0.0698
2★ = 0.50 5.3 8.5% 0.0426 8.5% 0.0425
3★ = 0.75 6.8 9.7% 0.0725 10.9% 0.0819
3★ = 0.75 5.8 8.9% 0.0669 9.3% 0.0696
4★ = 1.00 4.0 7.2% 0.0723 6.4% 0.0641
--------------------------------------------------------------------------------
TOTAL: 62.1 0.8350 0.8361
old (unweighted) average rating: 0.8333
old (unweighted) weight (1/len): 8.3%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment