oborchers/SIF_Variant_2.py

## SIF_Variant_2.py
def sif_embeddings(sentences, model, alpha=1e-3):
	""" Precomputes the indices of the sentences and uses the numpy indexing to directly multiply and sum the vectors
	"""
	vlookup = model.wv.vocab
	vectors = model.wv
	output = []
	for s in sentences:
      # Pre-compute sentence indices
	    idx = [vlookup[w].index for w in s if w in vlookup]
	    # Note: vectors.sif is a pre-computed numpy array containing the weights for all the word-vectors.
	    v = np.sum(vectors.vectors[idx] * vectors.sif[idx][:, None], axis=0)
	    if len(idx) > 0:
	        v *= 1/len(idx)
	    output.append(v)
	return np.vstack(output).astype(REAL)
	def sif_embeddings(sentences, model, alpha=1e-3):
	""" Precomputes the indices of the sentences and uses the numpy indexing to directly multiply and sum the vectors
	"""
	vlookup = model.wv.vocab
	vectors = model.wv
	output = []
	for s in sentences:
	# Pre-compute sentence indices
	idx = [vlookup[w].index for w in s if w in vlookup]
	# Note: vectors.sif is a pre-computed numpy array containing the weights for all the word-vectors.
	v = np.sum(vectors.vectors[idx] * vectors.sif[idx][:, None], axis=0)
	if len(idx) > 0:
	v *= 1/len(idx)
	output.append(v)
	return np.vstack(output).astype(REAL)