Skip to content

Instantly share code, notes, and snippets.

@bnsh
Last active December 4, 2018 07:46
Show Gist options
  • Save bnsh/e405fd323410883871bea5cc5de42b3d to your computer and use it in GitHub Desktop.
Save bnsh/e405fd323410883871bea5cc5de42b3d to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
"""Is this a bug in lightfm's precision_at_k function?
If there are #entries < k for a particular user, it seems as if that user can _never_ have a precision
> (#positive_interactions / k) for that user. So, the maximum precision is _not_ in fact 1.0, it's whatever the average
of _all_ the (#positive_interactions / k) values are throughout the entire training set.
I have a proposed solution at the end. I wonder if it works.
"""
import numpy as np
from scipy.sparse import csr_matrix
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
def main():
realdata = [
[5, 0, 0, 0, 0, 0],
[5, 5, 0, 0, 0, 0],
[5, 5, 5, 0, 0, 0],
[5, 5, 5, 5, 0, 0],
[5, 5, 5, 5, 5, 0],
[5, 5, 5, 5, 5, 5],
]
data = {
"train": csr_matrix(realdata),
"test": csr_matrix(realdata),
}
model = LightFM(loss="warp")
model.fit(data["train"], epochs=300, num_threads=2)
rank = model.predict_rank(data["train"])
print(rank.todense())
print(data["train"].todense())
precision = precision_at_k(model, data["train"], k=5)
print(precision)
print("Train precision: %.2f" % (precision.mean(),))
# Proposed solution:
k = 5
ranks = model.predict_rank(data["train"])
ranks.data = np.less(ranks.data, k, ranks.data)
denominator = np.clip((data["train"] != 0).astype(np.float).sum(axis=1), 1, k)
print(denominator)
precision = np.squeeze(np.array(ranks.sum(axis=1)) / denominator)
print(precision)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment