Oliver Borchers oborchers

## mdn_parameter_vectors.py
from tensorflow.keras.layers import Input, Dense, Concatenate

neurons = 500     # Neurons of the DNN hidden layers
components = 2    # Number of components in the mixture
no_parameters = 3 # Paramters of the mixture (alpha, mu, sigma)

inputs = Input(shape=(x_train.shape[1],))
h1 = Dense(neurons, activation="relu")(inputs)
h2 = Dense(neurons, activation="relu")(h1)

## mdn_loss_function.py
from tensorflow_probability import distributions as tfd

def slice_parameter_vectors(parameter_vector):
    """ Returns an unpacked list of paramter vectors.
    """
    return [parameter_vector[:,i*components:(i+1)*components] for i in range(no_parameters)]

def gnll_loss(y, parameter_vector):
    """ Computes the mean negative log-likelihood loss of y given the mixture parameters.
    """

## gmm_tfprob.py
import numpy as np
import tensorflow as tf
from tensorflow_probability import distributions as tfd

tf.enable_eager_execution()

# Set values for the mixture
alphas = [0.6, 0.3, 0.1]
means = [30, 60, 120]
sigmas = [5, 3, 1]

## mdn_nnelu.py
import tensorflow as tf

def nnelu(input):
    """ Computes the Non-Negative Exponential Linear Unit
    """
    return tf.add(tf.constant(1, dtype=tf.float32), tf.nn.elu(input))

tf.keras.utils.get_custom_objects().update({'nnelu': Activation(nnelu)})

## sif_baseline.py
import numpy as np
REAL = np.float32

def sif_embeddings(sentences, model, alpha=1e-3):
    """Compute the SIF embeddings for a list of sentences
    Parameters
    ----------
    sentences : list
        The sentences to compute the embeddings for
    model : `~gensim.models.base_any2vec.BaseAny2VecModel`

## SIF_Variant_1.py
for w in s:
  if w in vlookup:
    # The loop over the the vector dimensions is completely unecessary and extremely slow
    v += ( alpha / (alpha + (vlookup[w].count / Z))) * vectors[w]
    count += 1

## SIF_Variant_2.py
def sif_embeddings(sentences, model, alpha=1e-3):
	""" Precomputes the indices of the sentences and uses the numpy indexing to directly multiply and sum the vectors
	"""
	vlookup = model.wv.vocab
	vectors = model.wv
	output = []
	for s in sentences:
      # Pre-compute sentence indices
	    idx = [vlookup[w].index for w in s if w in vlookup]
	    # Note: vectors.sif is a pre-computed numpy array containing the weights for all the word-vectors.

## SIF_Variant_3.py
def sif_embeddings(sentences, model):
	""" Precomputes the sif_vectors in a separate matrix
	"""
	vlookup = model.wv.vocab
	vectors = model.wv.sif_vectors
	# The sif_vectors are pre-computed as:
	# sif_vectors = (model.wv.vectors * model.wv.sif[:, None])
	output = []
	for s in sentences:
	    idx = [vlookup[w].index for w in s if w in vlookup]

## SIF_Variant_4.py
def sif_embeddings(sentences, model):
  """ Uses a pre-computed list of indices and skips the use of strings alltogether
  """
  vectors = model.wv.sif_vectors
  output = np.zeros(shape=(len(sentences), model.vector_size), dtype=REAL)
  for i,s in enumerate(sentences):
    output[i] = np.sum(vectors[s], axis=0) * ( (1/len(s)) if len(s)>0 else 1)
  return output.astype(REAL)

## SIF_Variant_5.pyx
def sif_embeddings(sentences, model):
    cdef int size = model.vector_size
    cdef float[:,:] vectors = model.wv.sif_vectors

    cdef int sentence_index, word_index, d, count = 0
    cdef float inv = 1.
    np_sum = np.sum

    output = np.zeros((len(sentences), size), dtype=np.float32)
    cdef float[:,:] sv = output
	from tensorflow.keras.layers import Input, Dense, Concatenate

	neurons = 500 # Neurons of the DNN hidden layers
	components = 2 # Number of components in the mixture
	no_parameters = 3 # Paramters of the mixture (alpha, mu, sigma)

	inputs = Input(shape=(x_train.shape[1],))
	h1 = Dense(neurons, activation="relu")(inputs)
	h2 = Dense(neurons, activation="relu")(h1)
	from tensorflow_probability import distributions as tfd

	def slice_parameter_vectors(parameter_vector):
	""" Returns an unpacked list of paramter vectors.
	"""
	return [parameter_vector[:,icomponents:(i+1)components] for i in range(no_parameters)]

	def gnll_loss(y, parameter_vector):
	""" Computes the mean negative log-likelihood loss of y given the mixture parameters.
	"""
	import numpy as np
	import tensorflow as tf
	from tensorflow_probability import distributions as tfd

	tf.enable_eager_execution()

	# Set values for the mixture
	alphas = [0.6, 0.3, 0.1]
	means = [30, 60, 120]
	sigmas = [5, 3, 1]
	import tensorflow as tf

	def nnelu(input):
	""" Computes the Non-Negative Exponential Linear Unit
	"""
	return tf.add(tf.constant(1, dtype=tf.float32), tf.nn.elu(input))

	tf.keras.utils.get_custom_objects().update({'nnelu': Activation(nnelu)})
	import numpy as np
	REAL = np.float32

	def sif_embeddings(sentences, model, alpha=1e-3):
	"""Compute the SIF embeddings for a list of sentences
	Parameters
	----------
	sentences : list
	The sentences to compute the embeddings for
	model : `~gensim.models.base_any2vec.BaseAny2VecModel`
	for w in s:
	if w in vlookup:
	# The loop over the the vector dimensions is completely unecessary and extremely slow
	v += ( alpha / (alpha + (vlookup[w].count / Z))) * vectors[w]
	count += 1
	def sif_embeddings(sentences, model, alpha=1e-3):
	""" Precomputes the indices of the sentences and uses the numpy indexing to directly multiply and sum the vectors
	"""
	vlookup = model.wv.vocab
	vectors = model.wv
	output = []
	for s in sentences:
	# Pre-compute sentence indices
	idx = [vlookup[w].index for w in s if w in vlookup]
	# Note: vectors.sif is a pre-computed numpy array containing the weights for all the word-vectors.
	def sif_embeddings(sentences, model):
	""" Precomputes the sif_vectors in a separate matrix
	"""
	vlookup = model.wv.vocab
	vectors = model.wv.sif_vectors
	# The sif_vectors are pre-computed as:
	# sif_vectors = (model.wv.vectors * model.wv.sif[:, None])
	output = []
	for s in sentences:
	idx = [vlookup[w].index for w in s if w in vlookup]
	def sif_embeddings(sentences, model):
	""" Uses a pre-computed list of indices and skips the use of strings alltogether
	"""
	vectors = model.wv.sif_vectors
	output = np.zeros(shape=(len(sentences), model.vector_size), dtype=REAL)
	for i,s in enumerate(sentences):
	output[i] = np.sum(vectors[s], axis=0) * ( (1/len(s)) if len(s)>0 else 1)
	return output.astype(REAL)
	def sif_embeddings(sentences, model):
	cdef int size = model.vector_size
	cdef float[:,:] vectors = model.wv.sif_vectors

	cdef int sentence_index, word_index, d, count = 0
	cdef float inv = 1.
	np_sum = np.sum

	output = np.zeros((len(sentences), size), dtype=np.float32)
	cdef float[:,:] sv = output