from sklearn.metrics.pairwise import cosine_similarity | |
# Function for calculating average precision for a query | |
def average_precision(qid,qvector): | |
# Getting the ground truth and document vectors | |
qresult=testing_result.loc[testing_result['qid']==qid,['docid','rel']] | |
qcorpus=testing_corpus.loc[testing_corpus['docid'].isin(qresult['docid']),['docid','vector']] | |
qresult=pd.merge(qresult,qcorpus,on='docid') | |
# Ranking documents for the query | |
qresult['similarity']=qresult['vector'].apply(lambda x: cosine_similarity(np.array(qvector).reshape(1, -1),np.array(x).reshape(1, -1)).item()) | |
qresult.sort_values(by='similarity',ascending=False,inplace=True) | |
# Taking Top 10 documents for the evaluation | |
ranking=qresult.head(10)['rel'].values | |
# Calculating precision | |
precision=[] | |
for i in range(1,11): | |
if ranking[i-1]: | |
precision.append(np.sum(ranking[:i])/i) | |
# If no relevant document in list then return 0 | |
if precision==[]: | |
return 0 | |
return np.mean(precision) | |
# Calculating average precision for all queries in the test set | |
testing_queries['AP']=testing_queries.apply(lambda x: average_precision(x['qid'],x['vector']),axis=1) | |
# Finding Mean Average Precision | |
print('Mean Average Precision=>',testing_queries['AP'].mean()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment