Last active
December 16, 2015 22:09
-
-
Save morr/5504626 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
class Recommendations::Metrics::MetricBase | |
# элемент в списке, но без оценки | |
NotRated = -1 | |
def initialize(klass) | |
@klass = klass | |
end | |
def learn(user_id, user_rates, all_rates) | |
@all_rates = all_rates | |
@user_rates = user_rates | |
end | |
def predict(user_id, minimum_votes, without_user_rates) | |
totals, votes, similarities_sum = Rails.cache.fetch cache_key(user_id, without_user_rates), expires_in: 2.weeks do | |
calculate user_id, minimum_votes, without_user_rates | |
end | |
# normalized list of items | |
rankings = totals.each_with_object({}) do |v,memo| | |
id,total = v.first, v.second | |
memo[id] = total / similarities_sum[id] if votes[id] > minimum_votes | |
end | |
end | |
private | |
def calculate(user_id, minimum_votes, without_user_rates) | |
totals = {} | |
votes = {} | |
similarities_sum = {} | |
@all_rates.each do |sampler_id, scores| | |
# себя не надо учитывать | |
next if without_user_rates && sampler_id == user_id | |
# получение похожести | |
similarity = compare user_id, @user_rates, sampler_id, @all_rates[sampler_id] | |
# нулевую похожесть не учитываем (отрицательную так же - Pearson) | |
next if similarity <= 0 | |
scores.each do |id,score| | |
# рекомендовать будем только то, чего у пользователя не в списке | |
next if without_user_rates && @user_rates.include?(id) | |
# не поставленные оценки не учитываем | |
next if score == NotRated | |
#raise 'unexpected NotRated score' if score == NotRated | |
# счётчик голосов | |
votes[id] = (votes[id] || 0) + 1 | |
# similarity * score | |
totals[id] = (totals[id] || 0) + score * similarity | |
# sum of similarities | |
similarities_sum[id] = (similarities_sum[id] || 0) + similarity | |
end | |
end | |
[totals, votes, similarities_sum] | |
end | |
def cache_key(user_id, without_user_rates) | |
"#{self.class.name}_#{user_id}_#{without_user_rates}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment