Last active
June 8, 2019 11:48
-
-
Save oborchers/9092adf7f01e5b0e081f7c531116ac47 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sif_embeddings(sentences, model): | |
cdef int size = model.vector_size | |
# Unsafe access via pointers | |
cdef REAL_t *vectors = <REAL_t *>(np.PyArray_DATA(model.wv.sif_vectors)) | |
output = np.zeros((len(sentences), size), dtype=np.float32) | |
cdef REAL_t *sv = <REAL_t *>(np.PyArray_DATA(output)) | |
cdef INT_t *sentence_view | |
for i in xrange(len(sentences)): | |
if len(sentences[i]): | |
sentence_view = <INT_t *>(np.PyArray_DATA(sentences[i])) | |
sentence_len = len(sentences[i]) | |
sif_embeddings_cloop(size, sentence_view, sentence_len, i, vectors, sv) | |
return output | |
cdef void sif_embeddings_cloop(const int size, const INT_t *sentence_view, const int sentence_len, | |
const int sentence_idx, const REAL_t *vectors, REAL_t *summary_vectors) nogil: | |
cdef int i,d, word_index | |
cdef REAL_t inv = ONEF, count = <REAL_t> 0. | |
for i in xrange(sentence_len): | |
count += ONEF | |
word_index = sentence_view[i] | |
# Use single y = a*x+y for adding each word to the sentence matrix | |
# Pass the memory adresses of the pointers to SAXPY | |
saxpy(&size, &ONEF, &vectors[word_index * size], &ONE, &summary_vectors[sentence_idx * size], &ONE) | |
inv = ONEF / count | |
# Scale the sentence vector by 1/count | |
sscal(&size, &inv, &summary_vectors[sentence_idx * size], &ONE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment