sa-/pandas_ndcg.py

## pandas_ndcg.py
# Compute ndcg @ k

def ndcg_at_k(predictions_df, k):
    """
    This pandas dataframe should contain the columns "customer_id",
    "estimate", and "label".

    Where `estimate` is a recommendation score
    that we can sort by descending order.

    And `label` is the ground truth.
    """
    predictions_df["reco_rank"] = (predictions_df
                                   .groupby("customer_id").estimate
                                   .rank("first", ascending=False))

    predictions_df = predictions_df[predictions_df["reco_rank"]<=k]

    predictions_df["discounted_gain"] = predictions_df["label"]/np.log1p(predictions_df["reco_rank"])

    ideal_dcg = np.sum(1/np.log1p(range(1,k+1)))

    ndcg = (predictions_df.groupby("customer_id")["discounted_gain"].sum()/ideal_dcg)

    return ndcg
	# Compute ndcg @ k

	def ndcg_at_k(predictions_df, k):
	"""
	This pandas dataframe should contain the columns "customer_id",
	"estimate", and "label".

	Where `estimate` is a recommendation score
	that we can sort by descending order.

	And `label` is the ground truth.
	"""
	predictions_df["reco_rank"] = (predictions_df
	.groupby("customer_id").estimate
	.rank("first", ascending=False))

	predictions_df = predictions_df[predictions_df["reco_rank"]<=k]

	predictions_df["discounted_gain"] = predictions_df["label"]/np.log1p(predictions_df["reco_rank"])

	ideal_dcg = np.sum(1/np.log1p(range(1,k+1)))

	ndcg = (predictions_df.groupby("customer_id")["discounted_gain"].sum()/ideal_dcg)

	return ndcg