Skip to content

Instantly share code, notes, and snippets.

@ChicagoDev
Created March 5, 2019 20:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ChicagoDev/b2260c6fd2e97af58ef8fc52044a2e8e to your computer and use it in GitHub Desktop.
Save ChicagoDev/b2260c6fd2e97af58ef8fc52044a2e8e to your computer and use it in GitHub Desktop.
from collections import namedtuple
CastMatch = namedtuple('CastMatch', 'role playedby cosine_dist')
def best_match(got_roles_list, clust_dict, vectorizer, topic_model, cluster_model):
matches = []
for person in got_roles_list:
best_match = None
best_fit = -np.inf
words = vectorizer.transform([person['clean_doc']]).toarray()
words_reduced = topic_model.transform(words)
persons_cluster = cluster_model.predict(words_reduced)
for cast in clust_dict[persons_cluster[0]]:
cluster_person = [model_a.model.loc[cast]]
topic_model_person = topic_model.transform(cluster_person)
cos_dist = cosine_distance(words_reduced[0],topic_model_person[0])
if cos_dist > best_fit:
best_fit = cos_dist
best_match = cast
matches.append(CastMatch(role=person['name'], playedby=best_match, cosine_dist=best_fit))
return matches
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment