This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow.keras.layers import Input, Dense, Concatenate | |
neurons = 500 # Neurons of the DNN hidden layers | |
components = 2 # Number of components in the mixture | |
no_parameters = 3 # Paramters of the mixture (alpha, mu, sigma) | |
inputs = Input(shape=(x_train.shape[1],)) | |
h1 = Dense(neurons, activation="relu")(inputs) | |
h2 = Dense(neurons, activation="relu")(h1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow_probability import distributions as tfd | |
def slice_parameter_vectors(parameter_vector): | |
""" Returns an unpacked list of paramter vectors. | |
""" | |
return [parameter_vector[:,i*components:(i+1)*components] for i in range(no_parameters)] | |
def gnll_loss(y, parameter_vector): | |
""" Computes the mean negative log-likelihood loss of y given the mixture parameters. | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
from tensorflow_probability import distributions as tfd | |
tf.enable_eager_execution() | |
# Set values for the mixture | |
alphas = [0.6, 0.3, 0.1] | |
means = [30, 60, 120] | |
sigmas = [5, 3, 1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
def nnelu(input): | |
""" Computes the Non-Negative Exponential Linear Unit | |
""" | |
return tf.add(tf.constant(1, dtype=tf.float32), tf.nn.elu(input)) | |
tf.keras.utils.get_custom_objects().update({'nnelu': Activation(nnelu)}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
REAL = np.float32 | |
def sif_embeddings(sentences, model, alpha=1e-3): | |
"""Compute the SIF embeddings for a list of sentences | |
Parameters | |
---------- | |
sentences : list | |
The sentences to compute the embeddings for | |
model : `~gensim.models.base_any2vec.BaseAny2VecModel` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for w in s: | |
if w in vlookup: | |
# The loop over the the vector dimensions is completely unecessary and extremely slow | |
v += ( alpha / (alpha + (vlookup[w].count / Z))) * vectors[w] | |
count += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sif_embeddings(sentences, model, alpha=1e-3): | |
""" Precomputes the indices of the sentences and uses the numpy indexing to directly multiply and sum the vectors | |
""" | |
vlookup = model.wv.vocab | |
vectors = model.wv | |
output = [] | |
for s in sentences: | |
# Pre-compute sentence indices | |
idx = [vlookup[w].index for w in s if w in vlookup] | |
# Note: vectors.sif is a pre-computed numpy array containing the weights for all the word-vectors. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sif_embeddings(sentences, model): | |
""" Precomputes the sif_vectors in a separate matrix | |
""" | |
vlookup = model.wv.vocab | |
vectors = model.wv.sif_vectors | |
# The sif_vectors are pre-computed as: | |
# sif_vectors = (model.wv.vectors * model.wv.sif[:, None]) | |
output = [] | |
for s in sentences: | |
idx = [vlookup[w].index for w in s if w in vlookup] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sif_embeddings(sentences, model): | |
""" Uses a pre-computed list of indices and skips the use of strings alltogether | |
""" | |
vectors = model.wv.sif_vectors | |
output = np.zeros(shape=(len(sentences), model.vector_size), dtype=REAL) | |
for i,s in enumerate(sentences): | |
output[i] = np.sum(vectors[s], axis=0) * ( (1/len(s)) if len(s)>0 else 1) | |
return output.astype(REAL) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sif_embeddings(sentences, model): | |
cdef int size = model.vector_size | |
cdef float[:,:] vectors = model.wv.sif_vectors | |
cdef int sentence_index, word_index, d, count = 0 | |
cdef float inv = 1. | |
np_sum = np.sum | |
output = np.zeros((len(sentences), size), dtype=np.float32) | |
cdef float[:,:] sv = output |
OlderNewer