Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
def ranking_ir(query):
# pre-process Query
query=query.lower()
query=expand_contractions(query)
query=clean_text(query)
query=re.sub(' +',' ',query)
# generating vector
vector=get_embedding_w2v(query.split())
# ranking documents
documents=testing_corpus[['docid','title','body']].copy()
documents['similarity']=testing_corpus['vector'].apply(lambda x: cosine_similarity(np.array(vector).reshape(1, -1),np.array(x).reshape(1, -1)).item())
documents.sort_values(by='similarity',ascending=False,inplace=True)
return documents.head(10).reset_index(drop=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment