Skip to content

Instantly share code, notes, and snippets.

Created January 2, 2018 20:27
Show Gist options
  • Save mayhewsw/5f38ddeb59e154c277bbef7f8ff3d5f9 to your computer and use it in GitHub Desktop.
Save mayhewsw/5f38ddeb59e154c277bbef7f8ff3d5f9 to your computer and use it in GitHub Desktop.
Codenames clue giver
# coding: utf-8
from gensim.models import KeyedVectors
# Downloaded from fasttext:
# Converted to word2vec binary format for faster loading (see
vec = KeyedVectors.load_word2vec_format("~/data/wiki-news-300d-1M.vec.bin", binary=True)
from itertools import combinations
from nltk.stem import WordNetLemmatizer,PorterStemmer
wnl = WordNetLemmatizer()
stemmer = PorterStemmer()
def isplural(word):
lemma = wnl.lemmatize(word, 'n')
plural = True if word is not lemma else False
return plural
def isCap(word):
return word != word.lower()
def isSame(a,b):
return stemmer.stem(a) == stemmer.stem(b) or a != a.lower() or b != b.lower()
while True:
# words is all the words on your team (red or blue words)
words = input("words >> ").strip().split()
# give one negative word (usually the black word)
negword = input("neg >> ").strip()
neg = [negword]
bestpair = None
bestcand = ""
bestscore = -1
num = 2
for p in combinations(words,num):
sim = vec.most_similar(positive=p, negative=neg)
for s in sim[:5]:
cand = s[0]
score = s[1]
if isSame(cand, p[0]) or isSame(cand, p[1]):
print(cand, score)
if score > bestscore:
bestpair = p
bestscore = score
sim = vec.most_similar(positive=bestpair, negative=neg)
for s in sim[:5]:
cand = s[0]
if isSame(bestpair[0], cand) or isSame(bestpair[1], cand):
from gensim.models.keyedvectors import KeyedVectors
fname = "~/data/wiki-news-300d-1M.vec"
model = KeyedVectors.load_word2vec_format(fname, binary=False)
model.save_word2vec_format(fname + ".bin", binary=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment