eliotl/hollowpm.py

## hollowpm.py
"""
This is my Hollow Phonetics Matrix module: It is for handling different poetic
word search operations for my Flask website (https://puns.plus) It is a handler
for three large 45,000 x 45,000 nearest neighbor matrices that are too big for
me to want to load into memory. (One for overall sound similarity, one for
word2vec meaning similarity, and one for presence of individual phonetic features.)

Given different search requests, this translates from search word to vector
number, unpickles individual vectors from the disk as needed, performs the
necessary math, and then renders the results as words.


>>> pm = HollowPM(basepath)

Words that sound the most like 'clam'. (Phonetic similarity.)
>>> v1 = pm.ph('clam')
>>> v1
<1x45829 sparse matrix of type '<class 'numpy.float64'>'
    with 5000 stored elements in Compressed Sparse Row format>
>>> pm.render(v1, n=8)
['Klamm', 'clam', 'clammy', 'clams', 'clamp', 'clamor', 'clamps', 'clamped']

Words that are most similar in meaning to 'clam'. (word2vec semantic similarity.)
>>> v2 = pm.w2('clam')
>>> pm.render(v2, n=8)
['clam', 'clams', 'oyster', 'lobster', 'crab', 'crabs', 'scallop', 'oysters']

Words that start with the sounds "kl".
>>> v3 = pm.gr('#kl')
>>> pm.render(v3, n=8)
['Clo', 'clay', 'Clay', 'claw', 'clue', 'Cleo', 'Chloe', 'claws']

Words that sound like 'clam' AND have a similar meaning to 'clam'
>>> vCombo = v1.multiply(v2)
>>> pm.render(vCombo, n=8)
['clam', 'clams', 'clambake', 'lamb', 'ham', 'yam', 'calamari', 'scampi']

Words that sound like 'clam' AND have a similar meaning to 'clam' AND start with
"kl"
>>> vComboAll = vCombo.multiply(v3)
>>> pm.render(vComboAll, n=10)
['clam', 'clams', 'clambake', 'clove', 'claws', 'cloves', 'clump', 'clover',
 'clownfish', 'clementine']

Words that are most similar in sound to both 'clam' AND 'lobster'
>>> vv = pm.ph('clam*lobster')
>>> pm.render(vv, n=8)
['Kloster', 'clatter', 'clobber', 'clamber', 'cloister', 'cluster', 'clamor',
 'clobbered']

Words that have a "kl" sound but don't start with it
>>> pm.render(pm.gr('kl') - pm.gr('#kl'), n=8)
['cackling', 'quickly', 'Buckley', 'thickly', 'sickly', 'acclaim', 'psychically',
 'sackcloth']

"""

# phVecs is another module in this project which handles generating
# phonetic feature vectors and associated math.
from phVecs import FeatureBank, double_prune_N
import numpy as np
from scipy import sparse as sps
import sklearn as skl
import pickle


class HollowPM:
    """ Hollow Phonetics Matrix wrapper. A handler for three large nearest-
    neighbor matrices for a given vocabulary: Phonetic similarity (ph),
    Semantic similarity (w2), and phonetic features (gr).
    """

    def __init__(self, basepath, vocabName):
        # Set the paths for opening the needed vectors
        self._basepath = basepath
        self._vocabName = vocabName
        self._phNaybsPath = "phv_naybs/" + self._vocabName + "_"
        self._w2NaybsPath = "w2v_naybs/" + self._vocabName + "_"
        self._grNaybsPath = "grm_naybs/" + self._vocabName + "_"
        self._posPath = "posDict" + self._vocabName
        self._meterPath = "meterDict" + self._vocabName
        # keyDict is a dictionary that maps words to indices and indices to words.
        self.keyDict = self._open_pickle("keyDict" + self._vocabName)
        # FeatureDeposit maps all of the individual phonetic feature vectors to their index.
        self.fb = FeatureBank()
        xShape = len(self.keyDict) // 2  # The number of words in the vocabulary
        yShape = self.fb.maxLen_  # The number of phonetic features in the model
        self.shape = (xShape, yShape)

    def __getitem__(self, item):
        """Return the corresponding index or word for a given word or index."""
        return self.keyDict.get(item)

    def _ix(self, item):
        """Return the index for a given word. If given an index, returns the
        same index.
        """
        if isinstance(item, str):
            i = self.keyDict.get(item)
        elif item in self.keyDict:
            i = item
        elif int(item) in self.keyDict:
            i = int(item)
        return i

    def _ix_gr(self, item):
        """Return the index for a given phonetic feature. If given an index,
        returns the same index.
        """
        if isinstance(item, str):
            i = self.fb[item]
        elif isinstance(item, list):
            i = self.fb[item]
        elif isinstance(item, int):
            if item in range(nd.maxLen_):
                i = item
            else:
                i = None
        return i

    def _open_sum(self, indices, open_function):
        """Opens a list of nearest-neighbor vectors by index, adds and
        normalizes them.
        """
        vecs = [open_function(i) for i in indices]
        vecs = [v for v in vecs if v.count_nonzero()]
        summed = self.normalize(sum(vecs))
        return summed

    def _open_prod(self, indices, open_function):
        """Opens a list of nearest-neighbor vectors by index, multiplies and
        normalizes them.
        """
        vecs = [open_function(i) for i in indices]
        vecs = [v for v in vecs if v.count_nonzero()]
        prodded = self.multiply_list(vecs, NORM=True)
        return prodded

    def _open_vector(self, prefix, i):
        """Opens a vector from the disk, given a prefix and index."""
        try:
            return self._open_pickle(prefix + str(i))
        except:
            return self.empty_nayb()

    def _open_split_vector(self, prefix, i):
        """Opens a vector from the disk, if the vector is split into separate
        numpy arrays of its data and indices
        """
        data = self._open_pickle(prefix + str(i) + "_data")
        inds = self._open_pickle(prefix + str(i) + "_inds")
        return data, inds

    def _n_range_handle(self, array, n):
        """
        A protocol for handling requests for different slices as a function input.
        Used in HollowPM.render() and HollowSS.print_nearest_crosses()
        """
        if isinstance(n, tuple):
            array = array[n[0] : n[1]]
        elif n < 0:
            array = array[n:]
        else:
            array = array[:n]
        return array

    def _open_pickle(self, path):
        """Opens a pickled object off of the disk in the path where the
        HollowPM's data is stored.
        """
        with open(self._basepath + path + ".pickle", "rb") as f:
            item = pickle.load(f)
        return item

    def ph(self, i):
        """Open a word's phonetic nearest-neighbor vector,
        or combine multiple words' nearest-neighbor vectors.
        Uses cosine similarity of the phonetic feature vectors from phVecs to determine similarity.

        Parameters
        ----------
        i: int or str
            if int: returns a single word's nearest-neighbor vector by index.
            if str: returns a single word's nearest-neighbor vector, or combines
            multiple words' nearest-neighbor vectors.

        To combine multiple words, input a string with the words separated by '*' or '+'
          self.ph('clam+lobster') returns the sum of the vectors for 'clam' and 'lobster'
          self.ph('clam*lobster') returns the product of the vectors for 'clam' and 'lobster'
          Use '+' if you want to return neighbors for "clam OR lobster"
          Use '*' if you want to return neighbors for "clam AND lobster"
          If you mix '+' and '*', the order of operations is backwards: + resolves before *

        Returns
        -------
        naybVec: CSR matrix
            A nearest neighbors vector for the given word or combination of words.
        """
        if "*" in i:
            # Return the product of all of the words in the input string
            return self._open_prod(i.split("*"), self.ph)
        if "+" in i:
            # Return the sum of all of the words in the input string
            return self._open_sum(i.split("+"), self.ph)
        i = self._ix(i)
        naybVec = self._open_vector(self._phNaybsPath, i)
        return naybVec

    def w2(self, i):
        """Open a word's semantic nearest-neighbor vector,
        or combine multiple words' nearest-neighbor vectors.
        Uses word2vec cosine distance to determine semantic (meaning) similarity.

        Parameters
        ----------
        i: int or str
            if int: returns a single word's nearest-neighbor vector by index.
            if str: returns a single word's nearest-neighbor vector, or combines
            multiple words' nearest-neighbor vectors.

        To combine multiple words, input a string with words separated by '*' or '+':
          self.w2('China+France') returns the sum of the vectors for 'China' and 'France'
          self.w2('China*France') returns the product of the vectors for 'China' and 'France'
          Use '+' if you want to return neighbors of 'China' OR 'France'
          Use '*' if you want to return neighbors of 'China' AND 'France'
          If you mix '+' and '*', the order of operations is backwards: '+' resolves before '*'

        Returns
        -------
        naybVec: CSR matrix
            A nearest neighbors vector for the given word or combination of words.
        """
        if "*" in i:
            # Return the product of all of the words in the input string
            return self._open_prod(i.split("*"), self.w2)
        if "+" in i:
            # Return the product of all of the words in the input string
            return self._open_sum(i.split("+"), self.w2)
        i = self._ix(i)
        naybVec = self._open_vector(self._w2NaybsPath, i)
        return naybVec

    def gr(self, i):
        """Open a phonetic feature's nearest-neighbor vector,
        or combine multiple phonetic features' nearest-neighbor vectors.
        Uses normalized phonetic feature vectors from phVecs to determine similarity.

        Parameters
        ----------
        i: int or str
            if int: returns a single feature's nearest-neighbor vector by index.
            if str: returns a single feature's nearest-neighbor vector, or combines
            multiple features' nearest-neighbor vectors.

        To combine multiple words, input a string with the words separated by '*' or '+'
          self.gr('b+v') returns the sum of the vectors for 'b' and 'v'
          self.gr('b*v') returns the product of the vectors for 'b' and 'v'
          Use '+' if you want to return words with 'b' OR 'v'
          Use '*' if you want to return words with 'b' AND 'v'
          If you mix '+' and '*', the order of operations is backwards: '+' resolves before '*'

        Returns
        -------
        naybVec: CSR matrix
            A nearest neighbors vector for the given word or combination of words.

        """
        if "*" in i:
            # Return the product of all of the words in the input string
            return self._open_prod(i.split("*"), self.gr)
        if "+" in i:
            # Return the sum of all of the words in the input string
            return self._open_sum(i.split("+"), self.gr)
        i = self._ix_gr(i)
        naybVec = self._open_vector(self._grNaybsPath, i)
        return naybVec

    def render(
        self,
        naybVec,
        n=20,
        iList=None,
        VALUES=False,
        rounding=3,
        POSITIVE=True,
        BYMETER=False,
        BYPOS=False,
    ):
        """Converts a nearest-neighbors vector into a sorted list of the words
        it represents.

        Parameters
        ----------
        naybVec: CSR Matrix
            A nearest-neighbors vector corresponding to samples in the
            HollowPM's vocabulary.
        n: int or tuple
            Determines which slice of neighbors to return:
            If n is a positive int: Returns the top n values
            If n is a negative int: Returns the bottom -n values
            If n is a tuple:        Returns the top n[0] to n[1] values
        iList: array
            A mapping for indices if naybVec represents a subset of
        VALUES: bool
            If True, render() will return a list of (word, value) tuples,
            instead of just words.
        rounding: int
            Determines what place to round values to.
        POSITIVE: bool
            Whether to only return neighbors with positive scores.
        BYMETER: bool
            Whether to return the results in a dict, sorted by meter.
        BYPOS: bool
            Whether to return the results in a dict, sorted by part-of-speech.
        """

        def sort_render(self, indices, sortDict, PRINT=True):
            """
            Sorts a list of indices by a trait mapped in sortDict.
            Prints the results or returns them in a defaultdict.
            """
            sortedInds = defaultdict(list)
            for i in indices:
                sortedInds[sortDict[i]].append(self[i])
            if PRINT:
                # Print the words to output
                for k, v in sortedInds.item():
                    print(k)
                    print(", ".join(items))
            else:
                # Return as a defaultdict
                return sortedInds

        sorts = np.argsort(-naybVec.data)
        if POSITIVE:
            sorts = sorts[np.where(naybVec.data[sorts] > 0)]
        sorts = self._n_range_handle(sorts, n)
        if iList is not None:
            indices = iList[naybVec.indices[sorts]]
        else:
            indices = naybVec.indices[sorts]
        if BYMETER:
            # Sort the words by their meter
            sortDict = self._open_pickle(self.meterPath)
            return self.sort_render(indices, sortDict, PRINT=False)
        if BYPOS:
            # Sort the words by their part of speech
            sortDict = self._open_pickle(self.posPath)
            return self.sort_render(indices, sortDict, PRINT=False)
        if VALUES:
            # Return the list of words with their values from the vector
            outList = [
                (self[i], np.round(float(v), rounding))
                for i, v in zip(indices, naybVec.data[sorts])
            ]
        else:
            # Return as a list of words
            outList = [self[i] for i in indices]
        return outList

    def multiply_list(self, vecs, NORM=True):
        """Perform vector multiplication a list of vectors.
        Normalizes them by default.
        """
        runningVec = vecs[0]
        for vec in vecs[1:]:
            runningVec = runningVec.multiply(vec)
        if NORM:
            runningVec = self.normalize(runningVec)
        return runningVec

    def empty_nayb(self):
        """Return an empty nearest-neighbors vector"""
        return sps.csr_matrix(
            (np.array([]), np.array([]), np.array([0, 0])), shape=(1, self.shape[0])
        )

    def count_to_nayb(self, count):
        """Converts a Counter object of phonetic features to a nearest-neighbor
        vector. Can be used for performing phonetic searches for words outside
        of the vocabulary.
        """
        nayb = self.empty_nayb()
        l2Norm = 0
        for k, v in count.items():
            nayb = nayb + self.gr(k) * v
            l2Norm += v ** 2
        l2Norm **= 0.5
        nayb = nayb / l2Norm
        return nayb

    def normalize(self, vec):
        """Normalizes a vector using the L2 Norm."""
        return skl.preprocessing.normalize(vec)

    def slml(self, sl, ml, m=1, vecN=None, n=None):
        """Runs a "Sounds like, means like" search: returning words which sound
        like one word and have similar meanings to another.

        Parameters
        ----------
        sl: str
            A word or combination of words to search for phonetic similarity.
            (Words separated by '+' have their vectors added)
            (Words separated by '*' have their vectors multiplied)

        ml: str
            A word or combination of words to search for semantic similarity.
            (Words separated by '+' have their vectors added)
            (Words separated by '*' have their vectors multiplied)

        m: float
            The amount of weight to give to semantic similarity rather than
            phonetic similarity. An exponent the values of the "sounds-like"
            component are raised to.

        vecN: int or None
            The number of neighbors to retain for each search word. Defaults
            to all.

        n: int or None
            The number of neighbors to render. By default, slml returns the
            full neighbor vector as a vector.
            If an int is given for n, slml returns a list of strings

        Returns
        -------
        By default, the nearest-neighbor vector of the search.
        If n is an int, this returns a list of strings of the top words from
        the search.
        """
        a = self.ph(sl)
        b = self.w2(ml)
        if vecN:
            a = self.sort_vec(a, vecN, AS_VECTOR=True)
            b = self.sort_vec(b, vecN, AS_VECTOR=True)
        if m != 1:
            a.data = a.data ** m
        c = a.multiply(b)
        if n is not None:
            return self.render(c, n=n)
        else:
            return c

    def pairs(self, aWord, bWord, m=0.5, n=25, vecN=None):
        """Runs a "perfect pairs" search on two words or categories.

        Parameters
        ----------
        aWord:  str
            The first word or group of words to pull "synonyms" for.
            (Words separated by '+' have their vectors added)
            (Words separated by '*' have their vectors multiplied)
        bWord:  str
            The second word or group of words to pull "synonyms" for.
            (Words separated by '+' have their vectors added)
            (Words separated by '*' have their vectors multiplied)
        m: float
            The amount of weight to give to semantic similarity rather than
            phonetic similarity. Defaults to equal weights.
        n: int
            The number of neighbors to retain in the nearest-neighbors matrix
        vecN: int
            The number of "synonyms" to retain for each search word. Defaults
            to all.

        Returns
        -------
        A HollowSS object, which can be queried for overall top matches, or top
        matches for a given word.

        """
        aVec = self.w2(aWord)
        bVec = self.w2(bWord)
        if vecN is not None:
            aVec = self.sort_vec(aVec, n=vecN, AS_VECTOR=True)
            bVec = self.sort_vec(bVec, n=vecN, AS_VECTOR=True)
        return self.hollow_pairs(aVec, bVec, self._phNaybsPath, m=m)

    def flip_pairs(self, aWord, bWord, m=0.5, n=25, vecN=None):
        """Runs a "meaning mates" search on two words or groups of words.
        (The converse of self.pairs())

        Parameters
        ----------
        aWord:  str
            The first word or group of words to pull phonetic neighbors for.
            (Words separated by '+' have their vectors added)
            (Words separated by '*' have their vectors multiplied)
        bWord:  str
            The second word or group of words to pull phonetic neighbors for.
            (Words separated by '+' have their vectors added)
            (Words separated by '*' have their vectors multiplied)
        m: float
            The amount of weight to give to phonetic similarity rather than
            semantic similarity. Defaults to equal weights.
        n: int
            The number of neighbors to retain in the nearest-neighbors matrix.
        vecN: int
            The number of phonetic neighbors to retain for each search word.
            Defaults to all.

        Returns
        -------
        A HollowSS object, which can be queried for overall top matches, or top
        matches for a given word.

        """
        aVec = self.ph(aWord)
        bVec = self.ph(bWord)
        if vecN is not None:
            aVec = self.sort_vec(aVec, n=vecN, AS_VECTOR=True)
            bVec = self.sort_vec(bVec, n=vecN, AS_VECTOR=True)
        return self.hollow_pairs(aVec, bVec, self._w2NaybsPath, m=m)

    def hollow_pairs(self, aVec, bVec, prefix, m=0.5, n=25):
        """
        Does the math for generating a HollowSS search object.
        Find all pairwise cosine distances between the elements of two
        nearest-neighbor vectors.

        N is a nearest-neighbors matrix, the result of the operation
        x.T • A • y, where A is an adjacency matrix for the entire vocabulary,
        and x and y are two input vectors. To save compute, only the vectors
        which correspond to a nonzero value in aVec are loaded into memory.
        N is then thresholded to only retain the top n neighbors in each row
        and column. (This not only saves memory, but improves the salience of
        the results.)
        """
        # Make the vectors orthogonal so a word can only be in one category or
        # the other.
        aVec, bVec = self.deredund_vectors(aVec, bVec)
        # Build the nearest-neighbors matrix as a scipy CSR matrix by compiling
        # each row's data and indices.
        dataList = []
        indsList = []
        indptrList = [0]
        iList = aVec.indices
        # For each index in aVec, open its nearest-neighbor vector and compile
        # it into a matrix.
        for i in iList:
            vec = self._open_vector(prefix, i)
            dataList.append(vec.data)
            indsList.append(vec.indices)
            indptrList.append(len(vec.indices))
        data = np.concatenate(dataList)
        indices = np.concatenate(indsList)
        indptr = np.cumsum(indptrList)
        N = sps.csr_matrix((data, indices, indptr), shape=(len(iList), self.shape[0]))
        # Compute the dot product of the two query vectors and N, to scale N by
        # their weights.
        if m is not None:
            teenyAvec = sps.csr_matrix(aVec.data ** m)
            bbVec = bVec.tocsr()
            if m != 1:
                bbVec.data = bbVec.data ** m
            N = teenyAvec.T.multiply(N).multiply(bbVec)
        # Threshold N to retain only the top n neighbors for each row and
        # column.
        if n is not None:
            N = double_prune_N(N, n=n)
        # Convert the nearest-neighbors matrix and query vectors to a
        # HollowSS object.
        ss = HollowSS(self, N, iList, aVec, bVec)
        return ss

    def deredund_vectors(self, aVec, bVec):
        """Make two vectors orthogonal by eliminating any points which are more
        highly expressed in the other vector."""
        uniqueA = self.cull_distal_words(aVec, bVec)
        uniqueB = self.cull_distal_words(bVec, aVec)
        return uniqueA, uniqueB

    def cull_distal_words(self, aVec, bVec):
        """Return a copy of aVec with only the values that are higher in aVec
        than bVec."""
        uniqueA = aVec.multiply(aVec > bVec)
        return uniqueA

    def sort_vec(self, vec, n=100, AS_VECTOR=False):
        """Return the top n indices of a vector, or returns a culled version
        of the vector containing only the top n values.
        """
        try:
            # Pull the indices of the top n values from a vector's data.
            topInds = np.argpartition(vec.data, -n)[-n:]
        except:
            topInds = range(len(vec.data))
        if AS_VECTOR:
            vec = sps.csr_matrix(
                (vec.data[topInds], vec.indices[topInds], np.array([0, len(topInds)])),
                shape=vec.shape,
            )
            outVec = vec
        else:
            sorts = np.argsort(-vec.data[topInds])
            indices = vec.indices[topInds[sorts]]
            outVec = indices
        return outVec


"""
Container for a SynonymSearch matrix, a nearest-neighbors matrix for two given
clusters of synonyms. Used for the "pairs" and "flip_pairs" searches.

e.g. "Find first names and countries that sound alike"
(Categories are approximated here by returning w2v neighbors for a small sample
of terms.)
(For w2v reasons, cuing first names in this way seems to bias it towards
women's names.)
(The scores are the product of the words' semantic distances from the search
words and their phonetic distance from each other.)

>>> ss = pm.pairs('Jason*Mary', 'France*China', vecN=500)
>>> ss.print_nearest_crosses(10)
('Francie', 'France', 0.06555291557083659)
('Fran', 'France', 0.06401958667751825)
('Frances', 'France', 0.06169720053114153)
('Brittany', 'Britain', 0.05366786355699321)
('Mollie', 'Mali', 0.045043048421428575)
('Molly', 'Mali', 0.04318341143473779)
('Candace', 'Canada', 0.036466033543298616)
('Joanne', 'Japan', 0.035450886042190224)
('Bridget', 'Britain', 0.03468668989727991)
('Cindy', 'India', 0.03356244630824604)

Return the top matches for the "first name" category
>>> ss.a_render(8)
['Julie', 'Christine', 'Jody', 'Angela', 'Rebecca', 'Jennifer', 'Melissa',
'Valerie']

Return the countries that sound the most like "Diane"
>>> ss.neighborest('Diane', n=4)
['Sudan', 'Japan', 'japan', 'Bretagne']

Return the countries that sound the most like "Jody"
>>> ss.neighborest('Jody', n=4)
['Cambodia', 'Japan', 'Germany', 'Saudi']

Return the countries that sound the most like "Melissa"
>>> ss.neighborest('Melissa', n=4)
['Malaysia', 'Bolivia', 'Macao', 'Switzerland']

"""


class HollowSS:
    """
    A search object for querying the pairwise nearest neighbors between two
    lists of words. The lists of words are represented by aVec and bVec.

    Parameters
    ----------
    pm: HollowPM
        The Phonetics Matrix handler object for the vocabulary this search
        was performed in.
    N: CSR Matrix
        The nearest neighbors matrix for the given pairs search.
    iList: array
        The indices in aVec; it maps the rows of N back onto HollowPM's
        whole vocabulary.
    aVec: 1-D CSR Matrix
        The values of Category A. The nearest-neighbor vector of the first
        input word or category.
    bVec: 1-D CSR Matrix
        The values of Category B. The nearest-neighbor vector of the second
        input word or category.
    """

    def __init__(self, pm, N, iList, aVec, bVec):
        self.pm = pm
        self.N = N
        self.iList = iList
        self.aVec = aVec
        self.bVec = bVec
        self.cross_self()

    def __getitem__(self, i):
        return self.neighborest(i)

    def _get_i(self, word):
        # If a word's index is in iList then it is in Category A and needs to
        # be mapped to N.
        pmI = self.pm._ix(word)
        i = np.where(self.iList == pmI)[0]
        if i:
            i = int(i)
        else:
            i = pmI
        return i

    def _a_neighborest(self, word, n=30):
        """Returns the best matches for a given word or index in Category A."""
        pmI = self.pm._ix(word)
        if not pmI in self.iList:
            v = self.pm.empty_nayb()
        else:
            i = self._get_i(pmI)
            v = self.N[i]
        return self.pm.render(v, n=n)

    def _b_neighborest(self, word, n=30):
        """Returns the best matches for a given word or index in Category B."""
        i = self.pm._ix(word)
        v = self.N[:, i].T.tocsr()
        return self.pm.render(v, iList=self.iList, n=n)

    def cross_self(self, n=10000):
        """Convert the nearest-neighbors matrix into a list of the top overall
        pairs
        """
        # Pull the top overall pairs in the search by their value in N.
        cooN = self.N.tocoo()
        # sort the values of N as a COOrdinate Matrix
        sorts = np.argsort(-cooN.data)
        if n > 0:
            sorts = sorts[:n]
        # Organize them into ((index_A, index_B), score) tuples.
        crosses = [
            ((self.iList[int(a)], int(b)), s)
            for a, b, s in zip(cooN.row[sorts], cooN.col[sorts], cooN.data[sorts])
        ]
        self.crosses = crosses

    def print_nearest_crosses(self, n=40):
        """Print the overall top pairs from the search"""
        crosses = self.pm._n_range_handle(self.crosses, n)
        for (a, b), c in crosses:
            print((self.pm[a], self.pm[b], c))

    def neighborest(self, word, n=20):
        """Return the best matches for a given word in the search.

        Parameters
        ----------
        word: str
            A word resulting from the search: One

        """
        i = self.pm._ix(word)
        if i in self.iList:
            return self._a_neighborest(word, n)
        else:
            return self._b_neighborest(word, n)

    def a_render(self, n=30):
        """Returns a list of the top matches for Category A."""
        return self.pm.render(self.aVec, n)

    def b_render(self, n=30):
        """Returns a list of the top matches for Category B."""
        return self.pm.render(self.bVec, n)

    def top_aNaybs(self, n=8, nn=10):
        """Prints out the top nn matches for the top n words in Category A."""
        inds = self.pm.sort_vec(self.aVec, n)
        for i in inds:
            print(self.pm[i])
            print(self.a_neighborest(i, n))

    def top_bNaybs(self, n=8, nn=10):
        """Prints out the top nn matches for the top n words in Category B."""
        inds = self.pm.sort_vec(self.bVec, n)
        for i in inds:
            print(self.pm[i])
            print(self.b_neighborest(i, nn))