Skip to content

Instantly share code, notes, and snippets.

@oborchers
Last active June 8, 2019 11:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oborchers/9092adf7f01e5b0e081f7c531116ac47 to your computer and use it in GitHub Desktop.
Save oborchers/9092adf7f01e5b0e081f7c531116ac47 to your computer and use it in GitHub Desktop.
def sif_embeddings(sentences, model):
cdef int size = model.vector_size
# Unsafe access via pointers
cdef REAL_t *vectors = <REAL_t *>(np.PyArray_DATA(model.wv.sif_vectors))
output = np.zeros((len(sentences), size), dtype=np.float32)
cdef REAL_t *sv = <REAL_t *>(np.PyArray_DATA(output))
cdef INT_t *sentence_view
for i in xrange(len(sentences)):
if len(sentences[i]):
sentence_view = <INT_t *>(np.PyArray_DATA(sentences[i]))
sentence_len = len(sentences[i])
sif_embeddings_cloop(size, sentence_view, sentence_len, i, vectors, sv)
return output
cdef void sif_embeddings_cloop(const int size, const INT_t *sentence_view, const int sentence_len,
const int sentence_idx, const REAL_t *vectors, REAL_t *summary_vectors) nogil:
cdef int i,d, word_index
cdef REAL_t inv = ONEF, count = <REAL_t> 0.
for i in xrange(sentence_len):
count += ONEF
word_index = sentence_view[i]
# Use single y = a*x+y for adding each word to the sentence matrix
# Pass the memory adresses of the pointers to SAXPY
saxpy(&size, &ONEF, &vectors[word_index * size], &ONE, &summary_vectors[sentence_idx * size], &ONE)
inv = ONEF / count
# Scale the sentence vector by 1/count
sscal(&size, &inv, &summary_vectors[sentence_idx * size], &ONE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment