egslava/keras-embedding.py

## keras-embedding.py
"""
Created on Fri Mar 30 17:23:45 2018

Embeds one-hot encoder vector
i.e. [0] -> [1 0 0 0], [1] -> [0 1 0 0], [2] -> [0 0 1 0], so on

Using three approaches:
    1. Keras `Embedding` layer
    2. Keras `Dense` layer
    3. TensorFlow matmul

So you can see how it _could_ be implemented in a different way. Though you can see also performance penalties of all of them.

And measures time and memory for each of those approaches
@author: egslava@gmail.com
"""
import keras, tensorflow as tf, numpy as np, matplotlib.pyplot as plt, keras.backend as K
import timeit, os, psutil, sys, gc
from keras.layers import Dense, Reshape, Embedding
from keras.losses import mean_squared_error
from memory_profiler import profile

NUM_CLASSES = 500

os.environ["CUDA_VISIBLE_DEVICES"]="0" # just a bugfix of Tensorflow on GPUs
np.set_printoptions(suppress=True, precision=4) # 9.97123123123e-10 -> 0.0000 :)

def make_report(mode='dense'):

    with tf.device('/cpu:0'):   # we use CPU to be able to access amount of memory
        tf.reset_default_graph()
        sess = tf.Session()
        K.set_session(sess)

        gc.collect()
        start_mem = psutil.Process(os.getpid()).memory_info().rss

        X_embed_np = np.arange(NUM_CLASSES).reshape(1, -1).astype(np.int32)
        X_dense_np = np.arange(NUM_CLASSES).reshape(1, -1).astype(np.float32)

        X_embed = tf.placeholder_with_default(X_embed_np, shape=X_embed_np.shape)
        X_dense = tf.placeholder_with_default(X_dense_np, shape=X_dense_np.shape)

        labels = np.eye(NUM_CLASSES).astype(np.float32)
        labels = tf.placeholder_with_default(labels, labels.shape)

        with sess.as_default():
            if mode == 'embed':
                preds = Embedding(NUM_CLASSES, output_dim=NUM_CLASSES)(X_embed)
            elif mode == 'matmul':
                W = tf.get_variable('W', shape=(NUM_CLASSES, NUM_CLASSES**2))
                matmul_preds = tf.matmul(X_dense, W)
                preds = tf.reshape(matmul_preds, (NUM_CLASSES, NUM_CLASSES))
            elif mode == 'dense':
                dense = Dense(NUM_CLASSES**2, input_shape=(None,),)(X_dense)
                preds = Reshape((NUM_CLASSES, NUM_CLASSES))(dense)

            else: raise Exception('Invalid param mode: %s. Can be: embed|matmul|dense' % mode)

            loss = tf.reduce_sum( mean_squared_error(labels, preds) )
            minimizer = tf.train.AdamOptimizer().minimize(loss)

            sess.run(tf.global_variables_initializer())

            # report
            benchmarks_resuls = timeit.Timer(lambda: minimizer.run() ).repeat(3, 10)
            used_time = min( benchmarks_resuls )
            used_mem = psutil.Process(os.getpid()).memory_info().rss - start_mem
            return '%s time to train: %0.4f sec, loss: %0.4f, used mem: %0.4f MB' % (mode, used_time, loss.eval(), used_mem / 1024. / 1024. )


        sess.close()
        K.set_session(None)
        pass


print ( make_report('embed') )
print ( make_report('matmul') )
print ( make_report('dense') )
	"""
	Created on Fri Mar 30 17:23:45 2018

	Embeds one-hot encoder vector
	i.e. [0] -> [1 0 0 0], [1] -> [0 1 0 0], [2] -> [0 0 1 0], so on

	Using three approaches:
	1. Keras `Embedding` layer
	2. Keras `Dense` layer
	3. TensorFlow matmul

	So you can see how it _could_ be implemented in a different way. Though you can see also performance penalties of all of them.

	And measures time and memory for each of those approaches
	@author: egslava@gmail.com
	"""
	import keras, tensorflow as tf, numpy as np, matplotlib.pyplot as plt, keras.backend as K
	import timeit, os, psutil, sys, gc
	from keras.layers import Dense, Reshape, Embedding
	from keras.losses import mean_squared_error
	from memory_profiler import profile

	NUM_CLASSES = 500

	os.environ["CUDA_VISIBLE_DEVICES"]="0" # just a bugfix of Tensorflow on GPUs
	np.set_printoptions(suppress=True, precision=4) # 9.97123123123e-10 -> 0.0000 :)

	def make_report(mode='dense'):

	with tf.device('/cpu:0'): # we use CPU to be able to access amount of memory
	tf.reset_default_graph()
	sess = tf.Session()
	K.set_session(sess)

	gc.collect()
	start_mem = psutil.Process(os.getpid()).memory_info().rss

	X_embed_np = np.arange(NUM_CLASSES).reshape(1, -1).astype(np.int32)
	X_dense_np = np.arange(NUM_CLASSES).reshape(1, -1).astype(np.float32)

	X_embed = tf.placeholder_with_default(X_embed_np, shape=X_embed_np.shape)
	X_dense = tf.placeholder_with_default(X_dense_np, shape=X_dense_np.shape)

	labels = np.eye(NUM_CLASSES).astype(np.float32)
	labels = tf.placeholder_with_default(labels, labels.shape)

	with sess.as_default():
	if mode == 'embed':
	preds = Embedding(NUM_CLASSES, output_dim=NUM_CLASSES)(X_embed)
	elif mode == 'matmul':
	W = tf.get_variable('W', shape=(NUM_CLASSES, NUM_CLASSES**2))
	matmul_preds = tf.matmul(X_dense, W)
	preds = tf.reshape(matmul_preds, (NUM_CLASSES, NUM_CLASSES))
	elif mode == 'dense':
	dense = Dense(NUM_CLASSES**2, input_shape=(None,),)(X_dense)
	preds = Reshape((NUM_CLASSES, NUM_CLASSES))(dense)

	else: raise Exception('Invalid param mode: %s. Can be: embed\|matmul\|dense' % mode)

	loss = tf.reduce_sum( mean_squared_error(labels, preds) )
	minimizer = tf.train.AdamOptimizer().minimize(loss)

	sess.run(tf.global_variables_initializer())

	# report
	benchmarks_resuls = timeit.Timer(lambda: minimizer.run() ).repeat(3, 10)
	used_time = min( benchmarks_resuls )
	used_mem = psutil.Process(os.getpid()).memory_info().rss - start_mem
	return '%s time to train: %0.4f sec, loss: %0.4f, used mem: %0.4f MB' % (mode, used_time, loss.eval(), used_mem / 1024. / 1024. )


	sess.close()
	K.set_session(None)
	pass


	print ( make_report('embed') )
	print ( make_report('matmul') )
	print ( make_report('dense') )