Skip to content

Instantly share code, notes, and snippets.

View oborchers's full-sized avatar

Oliver Borchers oborchers

View GitHub Profile
@oborchers
oborchers / mdn_parameter_vectors.py
Last active February 12, 2019 17:13
Create the parameter vectors for the MDN
from tensorflow.keras.layers import Input, Dense, Concatenate
neurons = 500 # Neurons of the DNN hidden layers
components = 2 # Number of components in the mixture
no_parameters = 3 # Paramters of the mixture (alpha, mu, sigma)
inputs = Input(shape=(x_train.shape[1],))
h1 = Dense(neurons, activation="relu")(inputs)
h2 = Dense(neurons, activation="relu")(h1)
@oborchers
oborchers / mdn_loss_function.py
Created February 12, 2019 17:30
Computation of the MDN Loss Function
from tensorflow_probability import distributions as tfd
def slice_parameter_vectors(parameter_vector):
""" Returns an unpacked list of paramter vectors.
"""
return [parameter_vector[:,i*components:(i+1)*components] for i in range(no_parameters)]
def gnll_loss(y, parameter_vector):
""" Computes the mean negative log-likelihood loss of y given the mixture parameters.
"""
@oborchers
oborchers / gmm_tfprob.py
Last active February 15, 2019 09:48
Simple Gaussian Mixture with TF Eager / Probability
import numpy as np
import tensorflow as tf
from tensorflow_probability import distributions as tfd
tf.enable_eager_execution()
# Set values for the mixture
alphas = [0.6, 0.3, 0.1]
means = [30, 60, 120]
sigmas = [5, 3, 1]
@oborchers
oborchers / mdn_nnelu.py
Last active May 23, 2019 08:14
Computes the Non-Negative Exponential Linear Unit
import tensorflow as tf
def nnelu(input):
""" Computes the Non-Negative Exponential Linear Unit
"""
return tf.add(tf.constant(1, dtype=tf.float32), tf.nn.elu(input))
tf.keras.utils.get_custom_objects().update({'nnelu': Activation(nnelu)})
@oborchers
oborchers / sif_baseline.py
Created June 7, 2019 16:16
Baseline implementation for SIF embeddings
import numpy as np
REAL = np.float32
def sif_embeddings(sentences, model, alpha=1e-3):
"""Compute the SIF embeddings for a list of sentences
Parameters
----------
sentences : list
The sentences to compute the embeddings for
model : `~gensim.models.base_any2vec.BaseAny2VecModel`
for w in s:
if w in vlookup:
# The loop over the the vector dimensions is completely unecessary and extremely slow
v += ( alpha / (alpha + (vlookup[w].count / Z))) * vectors[w]
count += 1
def sif_embeddings(sentences, model, alpha=1e-3):
""" Precomputes the indices of the sentences and uses the numpy indexing to directly multiply and sum the vectors
"""
vlookup = model.wv.vocab
vectors = model.wv
output = []
for s in sentences:
# Pre-compute sentence indices
idx = [vlookup[w].index for w in s if w in vlookup]
# Note: vectors.sif is a pre-computed numpy array containing the weights for all the word-vectors.
def sif_embeddings(sentences, model):
""" Precomputes the sif_vectors in a separate matrix
"""
vlookup = model.wv.vocab
vectors = model.wv.sif_vectors
# The sif_vectors are pre-computed as:
# sif_vectors = (model.wv.vectors * model.wv.sif[:, None])
output = []
for s in sentences:
idx = [vlookup[w].index for w in s if w in vlookup]
def sif_embeddings(sentences, model):
""" Uses a pre-computed list of indices and skips the use of strings alltogether
"""
vectors = model.wv.sif_vectors
output = np.zeros(shape=(len(sentences), model.vector_size), dtype=REAL)
for i,s in enumerate(sentences):
output[i] = np.sum(vectors[s], axis=0) * ( (1/len(s)) if len(s)>0 else 1)
return output.astype(REAL)
def sif_embeddings(sentences, model):
cdef int size = model.vector_size
cdef float[:,:] vectors = model.wv.sif_vectors
cdef int sentence_index, word_index, d, count = 0
cdef float inv = 1.
np_sum = np.sum
output = np.zeros((len(sentences), size), dtype=np.float32)
cdef float[:,:] sv = output