Skip to content

Instantly share code, notes, and snippets.

@eliotl
Last active May 5, 2021 21:11
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eliotl/411a045b45daf94b5184a9d34600c6ee to your computer and use it in GitHub Desktop.
Save eliotl/411a045b45daf94b5184a9d34600c6ee to your computer and use it in GitHub Desktop.
hollowPM - Hollow Phonetic Matrix module
"""
This is my Hollow Phonetics Matrix module: It is for handling different poetic
word search operations for my Flask website (https://puns.plus) It is a handler
for three large 45,000 x 45,000 nearest neighbor matrices that are too big for
me to want to load into memory. (One for overall sound similarity, one for
word2vec meaning similarity, and one for presence of individual phonetic features.)
Given different search requests, this translates from search word to vector
number, unpickles individual vectors from the disk as needed, performs the
necessary math, and then renders the results as words.
>>> pm = HollowPM(basepath)
Words that sound the most like 'clam'. (Phonetic similarity.)
>>> v1 = pm.ph('clam')
>>> v1
<1x45829 sparse matrix of type '<class 'numpy.float64'>'
with 5000 stored elements in Compressed Sparse Row format>
>>> pm.render(v1, n=8)
['Klamm', 'clam', 'clammy', 'clams', 'clamp', 'clamor', 'clamps', 'clamped']
Words that are most similar in meaning to 'clam'. (word2vec semantic similarity.)
>>> v2 = pm.w2('clam')
>>> pm.render(v2, n=8)
['clam', 'clams', 'oyster', 'lobster', 'crab', 'crabs', 'scallop', 'oysters']
Words that start with the sounds "kl".
>>> v3 = pm.gr('#kl')
>>> pm.render(v3, n=8)
['Clo', 'clay', 'Clay', 'claw', 'clue', 'Cleo', 'Chloe', 'claws']
Words that sound like 'clam' AND have a similar meaning to 'clam'
>>> vCombo = v1.multiply(v2)
>>> pm.render(vCombo, n=8)
['clam', 'clams', 'clambake', 'lamb', 'ham', 'yam', 'calamari', 'scampi']
Words that sound like 'clam' AND have a similar meaning to 'clam' AND start with
"kl"
>>> vComboAll = vCombo.multiply(v3)
>>> pm.render(vComboAll, n=10)
['clam', 'clams', 'clambake', 'clove', 'claws', 'cloves', 'clump', 'clover',
'clownfish', 'clementine']
Words that are most similar in sound to both 'clam' AND 'lobster'
>>> vv = pm.ph('clam*lobster')
>>> pm.render(vv, n=8)
['Kloster', 'clatter', 'clobber', 'clamber', 'cloister', 'cluster', 'clamor',
'clobbered']
Words that have a "kl" sound but don't start with it
>>> pm.render(pm.gr('kl') - pm.gr('#kl'), n=8)
['cackling', 'quickly', 'Buckley', 'thickly', 'sickly', 'acclaim', 'psychically',
'sackcloth']
"""
# phVecs is another module in this project which handles generating
# phonetic feature vectors and associated math.
from phVecs import FeatureBank, double_prune_N
import numpy as np
from scipy import sparse as sps
import sklearn as skl
import pickle
class HollowPM:
""" Hollow Phonetics Matrix wrapper. A handler for three large nearest-
neighbor matrices for a given vocabulary: Phonetic similarity (ph),
Semantic similarity (w2), and phonetic features (gr).
"""
def __init__(self, basepath, vocabName):
# Set the paths for opening the needed vectors
self._basepath = basepath
self._vocabName = vocabName
self._phNaybsPath = "phv_naybs/" + self._vocabName + "_"
self._w2NaybsPath = "w2v_naybs/" + self._vocabName + "_"
self._grNaybsPath = "grm_naybs/" + self._vocabName + "_"
self._posPath = "posDict" + self._vocabName
self._meterPath = "meterDict" + self._vocabName
# keyDict is a dictionary that maps words to indices and indices to words.
self.keyDict = self._open_pickle("keyDict" + self._vocabName)
# FeatureDeposit maps all of the individual phonetic feature vectors to their index.
self.fb = FeatureBank()
xShape = len(self.keyDict) // 2 # The number of words in the vocabulary
yShape = self.fb.maxLen_ # The number of phonetic features in the model
self.shape = (xShape, yShape)
def __getitem__(self, item):
"""Return the corresponding index or word for a given word or index."""
return self.keyDict.get(item)
def _ix(self, item):
"""Return the index for a given word. If given an index, returns the
same index.
"""
if isinstance(item, str):
i = self.keyDict.get(item)
elif item in self.keyDict:
i = item
elif int(item) in self.keyDict:
i = int(item)
return i
def _ix_gr(self, item):
"""Return the index for a given phonetic feature. If given an index,
returns the same index.
"""
if isinstance(item, str):
i = self.fb[item]
elif isinstance(item, list):
i = self.fb[item]
elif isinstance(item, int):
if item in range(nd.maxLen_):
i = item
else:
i = None
return i
def _open_sum(self, indices, open_function):
"""Opens a list of nearest-neighbor vectors by index, adds and
normalizes them.
"""
vecs = [open_function(i) for i in indices]
vecs = [v for v in vecs if v.count_nonzero()]
summed = self.normalize(sum(vecs))
return summed
def _open_prod(self, indices, open_function):
"""Opens a list of nearest-neighbor vectors by index, multiplies and
normalizes them.
"""
vecs = [open_function(i) for i in indices]
vecs = [v for v in vecs if v.count_nonzero()]
prodded = self.multiply_list(vecs, NORM=True)
return prodded
def _open_vector(self, prefix, i):
"""Opens a vector from the disk, given a prefix and index."""
try:
return self._open_pickle(prefix + str(i))
except:
return self.empty_nayb()
def _open_split_vector(self, prefix, i):
"""Opens a vector from the disk, if the vector is split into separate
numpy arrays of its data and indices
"""
data = self._open_pickle(prefix + str(i) + "_data")
inds = self._open_pickle(prefix + str(i) + "_inds")
return data, inds
def _n_range_handle(self, array, n):
"""
A protocol for handling requests for different slices as a function input.
Used in HollowPM.render() and HollowSS.print_nearest_crosses()
"""
if isinstance(n, tuple):
array = array[n[0] : n[1]]
elif n < 0:
array = array[n:]
else:
array = array[:n]
return array
def _open_pickle(self, path):
"""Opens a pickled object off of the disk in the path where the
HollowPM's data is stored.
"""
with open(self._basepath + path + ".pickle", "rb") as f:
item = pickle.load(f)
return item
def ph(self, i):
"""Open a word's phonetic nearest-neighbor vector,
or combine multiple words' nearest-neighbor vectors.
Uses cosine similarity of the phonetic feature vectors from phVecs to determine similarity.
Parameters
----------
i: int or str
if int: returns a single word's nearest-neighbor vector by index.
if str: returns a single word's nearest-neighbor vector, or combines
multiple words' nearest-neighbor vectors.
To combine multiple words, input a string with the words separated by '*' or '+'
self.ph('clam+lobster') returns the sum of the vectors for 'clam' and 'lobster'
self.ph('clam*lobster') returns the product of the vectors for 'clam' and 'lobster'
Use '+' if you want to return neighbors for "clam OR lobster"
Use '*' if you want to return neighbors for "clam AND lobster"
If you mix '+' and '*', the order of operations is backwards: + resolves before *
Returns
-------
naybVec: CSR matrix
A nearest neighbors vector for the given word or combination of words.
"""
if "*" in i:
# Return the product of all of the words in the input string
return self._open_prod(i.split("*"), self.ph)
if "+" in i:
# Return the sum of all of the words in the input string
return self._open_sum(i.split("+"), self.ph)
i = self._ix(i)
naybVec = self._open_vector(self._phNaybsPath, i)
return naybVec
def w2(self, i):
"""Open a word's semantic nearest-neighbor vector,
or combine multiple words' nearest-neighbor vectors.
Uses word2vec cosine distance to determine semantic (meaning) similarity.
Parameters
----------
i: int or str
if int: returns a single word's nearest-neighbor vector by index.
if str: returns a single word's nearest-neighbor vector, or combines
multiple words' nearest-neighbor vectors.
To combine multiple words, input a string with words separated by '*' or '+':
self.w2('China+France') returns the sum of the vectors for 'China' and 'France'
self.w2('China*France') returns the product of the vectors for 'China' and 'France'
Use '+' if you want to return neighbors of 'China' OR 'France'
Use '*' if you want to return neighbors of 'China' AND 'France'
If you mix '+' and '*', the order of operations is backwards: '+' resolves before '*'
Returns
-------
naybVec: CSR matrix
A nearest neighbors vector for the given word or combination of words.
"""
if "*" in i:
# Return the product of all of the words in the input string
return self._open_prod(i.split("*"), self.w2)
if "+" in i:
# Return the product of all of the words in the input string
return self._open_sum(i.split("+"), self.w2)
i = self._ix(i)
naybVec = self._open_vector(self._w2NaybsPath, i)
return naybVec
def gr(self, i):
"""Open a phonetic feature's nearest-neighbor vector,
or combine multiple phonetic features' nearest-neighbor vectors.
Uses normalized phonetic feature vectors from phVecs to determine similarity.
Parameters
----------
i: int or str
if int: returns a single feature's nearest-neighbor vector by index.
if str: returns a single feature's nearest-neighbor vector, or combines
multiple features' nearest-neighbor vectors.
To combine multiple words, input a string with the words separated by '*' or '+'
self.gr('b+v') returns the sum of the vectors for 'b' and 'v'
self.gr('b*v') returns the product of the vectors for 'b' and 'v'
Use '+' if you want to return words with 'b' OR 'v'
Use '*' if you want to return words with 'b' AND 'v'
If you mix '+' and '*', the order of operations is backwards: '+' resolves before '*'
Returns
-------
naybVec: CSR matrix
A nearest neighbors vector for the given word or combination of words.
"""
if "*" in i:
# Return the product of all of the words in the input string
return self._open_prod(i.split("*"), self.gr)
if "+" in i:
# Return the sum of all of the words in the input string
return self._open_sum(i.split("+"), self.gr)
i = self._ix_gr(i)
naybVec = self._open_vector(self._grNaybsPath, i)
return naybVec
def render(
self,
naybVec,
n=20,
iList=None,
VALUES=False,
rounding=3,
POSITIVE=True,
BYMETER=False,
BYPOS=False,
):
"""Converts a nearest-neighbors vector into a sorted list of the words
it represents.
Parameters
----------
naybVec: CSR Matrix
A nearest-neighbors vector corresponding to samples in the
HollowPM's vocabulary.
n: int or tuple
Determines which slice of neighbors to return:
If n is a positive int: Returns the top n values
If n is a negative int: Returns the bottom -n values
If n is a tuple: Returns the top n[0] to n[1] values
iList: array
A mapping for indices if naybVec represents a subset of
VALUES: bool
If True, render() will return a list of (word, value) tuples,
instead of just words.
rounding: int
Determines what place to round values to.
POSITIVE: bool
Whether to only return neighbors with positive scores.
BYMETER: bool
Whether to return the results in a dict, sorted by meter.
BYPOS: bool
Whether to return the results in a dict, sorted by part-of-speech.
"""
def sort_render(self, indices, sortDict, PRINT=True):
"""
Sorts a list of indices by a trait mapped in sortDict.
Prints the results or returns them in a defaultdict.
"""
sortedInds = defaultdict(list)
for i in indices:
sortedInds[sortDict[i]].append(self[i])
if PRINT:
# Print the words to output
for k, v in sortedInds.item():
print(k)
print(", ".join(items))
else:
# Return as a defaultdict
return sortedInds
sorts = np.argsort(-naybVec.data)
if POSITIVE:
sorts = sorts[np.where(naybVec.data[sorts] > 0)]
sorts = self._n_range_handle(sorts, n)
if iList is not None:
indices = iList[naybVec.indices[sorts]]
else:
indices = naybVec.indices[sorts]
if BYMETER:
# Sort the words by their meter
sortDict = self._open_pickle(self.meterPath)
return self.sort_render(indices, sortDict, PRINT=False)
if BYPOS:
# Sort the words by their part of speech
sortDict = self._open_pickle(self.posPath)
return self.sort_render(indices, sortDict, PRINT=False)
if VALUES:
# Return the list of words with their values from the vector
outList = [
(self[i], np.round(float(v), rounding))
for i, v in zip(indices, naybVec.data[sorts])
]
else:
# Return as a list of words
outList = [self[i] for i in indices]
return outList
def multiply_list(self, vecs, NORM=True):
"""Perform vector multiplication a list of vectors.
Normalizes them by default.
"""
runningVec = vecs[0]
for vec in vecs[1:]:
runningVec = runningVec.multiply(vec)
if NORM:
runningVec = self.normalize(runningVec)
return runningVec
def empty_nayb(self):
"""Return an empty nearest-neighbors vector"""
return sps.csr_matrix(
(np.array([]), np.array([]), np.array([0, 0])), shape=(1, self.shape[0])
)
def count_to_nayb(self, count):
"""Converts a Counter object of phonetic features to a nearest-neighbor
vector. Can be used for performing phonetic searches for words outside
of the vocabulary.
"""
nayb = self.empty_nayb()
l2Norm = 0
for k, v in count.items():
nayb = nayb + self.gr(k) * v
l2Norm += v ** 2
l2Norm **= 0.5
nayb = nayb / l2Norm
return nayb
def normalize(self, vec):
"""Normalizes a vector using the L2 Norm."""
return skl.preprocessing.normalize(vec)
def slml(self, sl, ml, m=1, vecN=None, n=None):
"""Runs a "Sounds like, means like" search: returning words which sound
like one word and have similar meanings to another.
Parameters
----------
sl: str
A word or combination of words to search for phonetic similarity.
(Words separated by '+' have their vectors added)
(Words separated by '*' have their vectors multiplied)
ml: str
A word or combination of words to search for semantic similarity.
(Words separated by '+' have their vectors added)
(Words separated by '*' have their vectors multiplied)
m: float
The amount of weight to give to semantic similarity rather than
phonetic similarity. An exponent the values of the "sounds-like"
component are raised to.
vecN: int or None
The number of neighbors to retain for each search word. Defaults
to all.
n: int or None
The number of neighbors to render. By default, slml returns the
full neighbor vector as a vector.
If an int is given for n, slml returns a list of strings
Returns
-------
By default, the nearest-neighbor vector of the search.
If n is an int, this returns a list of strings of the top words from
the search.
"""
a = self.ph(sl)
b = self.w2(ml)
if vecN:
a = self.sort_vec(a, vecN, AS_VECTOR=True)
b = self.sort_vec(b, vecN, AS_VECTOR=True)
if m != 1:
a.data = a.data ** m
c = a.multiply(b)
if n is not None:
return self.render(c, n=n)
else:
return c
def pairs(self, aWord, bWord, m=0.5, n=25, vecN=None):
"""Runs a "perfect pairs" search on two words or categories.
Parameters
----------
aWord: str
The first word or group of words to pull "synonyms" for.
(Words separated by '+' have their vectors added)
(Words separated by '*' have their vectors multiplied)
bWord: str
The second word or group of words to pull "synonyms" for.
(Words separated by '+' have their vectors added)
(Words separated by '*' have their vectors multiplied)
m: float
The amount of weight to give to semantic similarity rather than
phonetic similarity. Defaults to equal weights.
n: int
The number of neighbors to retain in the nearest-neighbors matrix
vecN: int
The number of "synonyms" to retain for each search word. Defaults
to all.
Returns
-------
A HollowSS object, which can be queried for overall top matches, or top
matches for a given word.
"""
aVec = self.w2(aWord)
bVec = self.w2(bWord)
if vecN is not None:
aVec = self.sort_vec(aVec, n=vecN, AS_VECTOR=True)
bVec = self.sort_vec(bVec, n=vecN, AS_VECTOR=True)
return self.hollow_pairs(aVec, bVec, self._phNaybsPath, m=m)
def flip_pairs(self, aWord, bWord, m=0.5, n=25, vecN=None):
"""Runs a "meaning mates" search on two words or groups of words.
(The converse of self.pairs())
Parameters
----------
aWord: str
The first word or group of words to pull phonetic neighbors for.
(Words separated by '+' have their vectors added)
(Words separated by '*' have their vectors multiplied)
bWord: str
The second word or group of words to pull phonetic neighbors for.
(Words separated by '+' have their vectors added)
(Words separated by '*' have their vectors multiplied)
m: float
The amount of weight to give to phonetic similarity rather than
semantic similarity. Defaults to equal weights.
n: int
The number of neighbors to retain in the nearest-neighbors matrix.
vecN: int
The number of phonetic neighbors to retain for each search word.
Defaults to all.
Returns
-------
A HollowSS object, which can be queried for overall top matches, or top
matches for a given word.
"""
aVec = self.ph(aWord)
bVec = self.ph(bWord)
if vecN is not None:
aVec = self.sort_vec(aVec, n=vecN, AS_VECTOR=True)
bVec = self.sort_vec(bVec, n=vecN, AS_VECTOR=True)
return self.hollow_pairs(aVec, bVec, self._w2NaybsPath, m=m)
def hollow_pairs(self, aVec, bVec, prefix, m=0.5, n=25):
"""
Does the math for generating a HollowSS search object.
Find all pairwise cosine distances between the elements of two
nearest-neighbor vectors.
N is a nearest-neighbors matrix, the result of the operation
x.T • A • y, where A is an adjacency matrix for the entire vocabulary,
and x and y are two input vectors. To save compute, only the vectors
which correspond to a nonzero value in aVec are loaded into memory.
N is then thresholded to only retain the top n neighbors in each row
and column. (This not only saves memory, but improves the salience of
the results.)
"""
# Make the vectors orthogonal so a word can only be in one category or
# the other.
aVec, bVec = self.deredund_vectors(aVec, bVec)
# Build the nearest-neighbors matrix as a scipy CSR matrix by compiling
# each row's data and indices.
dataList = []
indsList = []
indptrList = [0]
iList = aVec.indices
# For each index in aVec, open its nearest-neighbor vector and compile
# it into a matrix.
for i in iList:
vec = self._open_vector(prefix, i)
dataList.append(vec.data)
indsList.append(vec.indices)
indptrList.append(len(vec.indices))
data = np.concatenate(dataList)
indices = np.concatenate(indsList)
indptr = np.cumsum(indptrList)
N = sps.csr_matrix((data, indices, indptr), shape=(len(iList), self.shape[0]))
# Compute the dot product of the two query vectors and N, to scale N by
# their weights.
if m is not None:
teenyAvec = sps.csr_matrix(aVec.data ** m)
bbVec = bVec.tocsr()
if m != 1:
bbVec.data = bbVec.data ** m
N = teenyAvec.T.multiply(N).multiply(bbVec)
# Threshold N to retain only the top n neighbors for each row and
# column.
if n is not None:
N = double_prune_N(N, n=n)
# Convert the nearest-neighbors matrix and query vectors to a
# HollowSS object.
ss = HollowSS(self, N, iList, aVec, bVec)
return ss
def deredund_vectors(self, aVec, bVec):
"""Make two vectors orthogonal by eliminating any points which are more
highly expressed in the other vector."""
uniqueA = self.cull_distal_words(aVec, bVec)
uniqueB = self.cull_distal_words(bVec, aVec)
return uniqueA, uniqueB
def cull_distal_words(self, aVec, bVec):
"""Return a copy of aVec with only the values that are higher in aVec
than bVec."""
uniqueA = aVec.multiply(aVec > bVec)
return uniqueA
def sort_vec(self, vec, n=100, AS_VECTOR=False):
"""Return the top n indices of a vector, or returns a culled version
of the vector containing only the top n values.
"""
try:
# Pull the indices of the top n values from a vector's data.
topInds = np.argpartition(vec.data, -n)[-n:]
except:
topInds = range(len(vec.data))
if AS_VECTOR:
vec = sps.csr_matrix(
(vec.data[topInds], vec.indices[topInds], np.array([0, len(topInds)])),
shape=vec.shape,
)
outVec = vec
else:
sorts = np.argsort(-vec.data[topInds])
indices = vec.indices[topInds[sorts]]
outVec = indices
return outVec
"""
Container for a SynonymSearch matrix, a nearest-neighbors matrix for two given
clusters of synonyms. Used for the "pairs" and "flip_pairs" searches.
e.g. "Find first names and countries that sound alike"
(Categories are approximated here by returning w2v neighbors for a small sample
of terms.)
(For w2v reasons, cuing first names in this way seems to bias it towards
women's names.)
(The scores are the product of the words' semantic distances from the search
words and their phonetic distance from each other.)
>>> ss = pm.pairs('Jason*Mary', 'France*China', vecN=500)
>>> ss.print_nearest_crosses(10)
('Francie', 'France', 0.06555291557083659)
('Fran', 'France', 0.06401958667751825)
('Frances', 'France', 0.06169720053114153)
('Brittany', 'Britain', 0.05366786355699321)
('Mollie', 'Mali', 0.045043048421428575)
('Molly', 'Mali', 0.04318341143473779)
('Candace', 'Canada', 0.036466033543298616)
('Joanne', 'Japan', 0.035450886042190224)
('Bridget', 'Britain', 0.03468668989727991)
('Cindy', 'India', 0.03356244630824604)
Return the top matches for the "first name" category
>>> ss.a_render(8)
['Julie', 'Christine', 'Jody', 'Angela', 'Rebecca', 'Jennifer', 'Melissa',
'Valerie']
Return the countries that sound the most like "Diane"
>>> ss.neighborest('Diane', n=4)
['Sudan', 'Japan', 'japan', 'Bretagne']
Return the countries that sound the most like "Jody"
>>> ss.neighborest('Jody', n=4)
['Cambodia', 'Japan', 'Germany', 'Saudi']
Return the countries that sound the most like "Melissa"
>>> ss.neighborest('Melissa', n=4)
['Malaysia', 'Bolivia', 'Macao', 'Switzerland']
"""
class HollowSS:
"""
A search object for querying the pairwise nearest neighbors between two
lists of words. The lists of words are represented by aVec and bVec.
Parameters
----------
pm: HollowPM
The Phonetics Matrix handler object for the vocabulary this search
was performed in.
N: CSR Matrix
The nearest neighbors matrix for the given pairs search.
iList: array
The indices in aVec; it maps the rows of N back onto HollowPM's
whole vocabulary.
aVec: 1-D CSR Matrix
The values of Category A. The nearest-neighbor vector of the first
input word or category.
bVec: 1-D CSR Matrix
The values of Category B. The nearest-neighbor vector of the second
input word or category.
"""
def __init__(self, pm, N, iList, aVec, bVec):
self.pm = pm
self.N = N
self.iList = iList
self.aVec = aVec
self.bVec = bVec
self.cross_self()
def __getitem__(self, i):
return self.neighborest(i)
def _get_i(self, word):
# If a word's index is in iList then it is in Category A and needs to
# be mapped to N.
pmI = self.pm._ix(word)
i = np.where(self.iList == pmI)[0]
if i:
i = int(i)
else:
i = pmI
return i
def _a_neighborest(self, word, n=30):
"""Returns the best matches for a given word or index in Category A."""
pmI = self.pm._ix(word)
if not pmI in self.iList:
v = self.pm.empty_nayb()
else:
i = self._get_i(pmI)
v = self.N[i]
return self.pm.render(v, n=n)
def _b_neighborest(self, word, n=30):
"""Returns the best matches for a given word or index in Category B."""
i = self.pm._ix(word)
v = self.N[:, i].T.tocsr()
return self.pm.render(v, iList=self.iList, n=n)
def cross_self(self, n=10000):
"""Convert the nearest-neighbors matrix into a list of the top overall
pairs
"""
# Pull the top overall pairs in the search by their value in N.
cooN = self.N.tocoo()
# sort the values of N as a COOrdinate Matrix
sorts = np.argsort(-cooN.data)
if n > 0:
sorts = sorts[:n]
# Organize them into ((index_A, index_B), score) tuples.
crosses = [
((self.iList[int(a)], int(b)), s)
for a, b, s in zip(cooN.row[sorts], cooN.col[sorts], cooN.data[sorts])
]
self.crosses = crosses
def print_nearest_crosses(self, n=40):
"""Print the overall top pairs from the search"""
crosses = self.pm._n_range_handle(self.crosses, n)
for (a, b), c in crosses:
print((self.pm[a], self.pm[b], c))
def neighborest(self, word, n=20):
"""Return the best matches for a given word in the search.
Parameters
----------
word: str
A word resulting from the search: One
"""
i = self.pm._ix(word)
if i in self.iList:
return self._a_neighborest(word, n)
else:
return self._b_neighborest(word, n)
def a_render(self, n=30):
"""Returns a list of the top matches for Category A."""
return self.pm.render(self.aVec, n)
def b_render(self, n=30):
"""Returns a list of the top matches for Category B."""
return self.pm.render(self.bVec, n)
def top_aNaybs(self, n=8, nn=10):
"""Prints out the top nn matches for the top n words in Category A."""
inds = self.pm.sort_vec(self.aVec, n)
for i in inds:
print(self.pm[i])
print(self.a_neighborest(i, n))
def top_bNaybs(self, n=8, nn=10):
"""Prints out the top nn matches for the top n words in Category B."""
inds = self.pm.sort_vec(self.bVec, n)
for i in inds:
print(self.pm[i])
print(self.b_neighborest(i, nn))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment