Last active
August 4, 2018 13:31
-
-
Save patricoferris/912fd16632e5c778fb67df015784369c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Before we dive in we need to declare some variables | |
vocabulary_size = len(artist_lookup) | |
#How big we want our final vectors to be | |
embedding_size = 64 | |
#The number of training samples passed per epoch | |
batch_size = 64 | |
#Number of negative samples to use in NCE [see below] | |
num_sampled = 16 | |
#BAND2VEC - Tensorflow Time! | |
import tensorflow as tf | |
import math | |
graph = tf.Graph() | |
with graph.as_default(): | |
#Defining variables and functions in a scope is good practice - adds a prefix to the operations | |
with tf.name_scope('inputs'): | |
#Tensorflow Placeholders are the mouths of the neural network - they will constantly be fed new information | |
training_inputs = tf.placeholder(tf.int32, shape=[batch_size]) | |
training_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) | |
#Using the CPU | |
with tf.device('/cpu:0'): | |
with tf.name_scope('embeddings'): | |
#The embeddings - variables are maintained across runs and need to be initialised with a shape and type | |
#Each row is a band represented by a vector of length 'embedding_size' | |
embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) | |
#Like passing muliple indices to a numpy array we get the vectors quickly with this function | |
embed = tf.nn.embedding_lookup(embeddings, training_inputs) | |
with tf.name_scope('weights'): | |
#Like embeddings we initialise our weights and also... | |
nce_weights = tf.Variable( | |
tf.truncated_normal( | |
[vocabulary_size, embedding_size], | |
stddev=1.0 / math.sqrt(embedding_size))) | |
with tf.name_scope('biases'): | |
#...our biases | |
nce_biases = tf.Variable(tf.zeros([vocabulary_size])) | |
with tf.name_scope('loss'): | |
#Finally our loss function - see below for an explanation of the Noise Contrastive Estimation Approach | |
loss = tf.reduce_mean( | |
tf.nn.nce_loss( | |
weights=nce_weights, | |
biases=nce_biases, | |
labels=training_labels, | |
inputs=embed, | |
num_sampled=num_sampled, | |
num_classes=vocabulary_size)) | |
with tf.name_scope('optimizer'): | |
optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment