Justin Evans eustin

## letor-part-1-model-sentences.py
sentences = [
    "snoopy dog",
    "milo dog",
    "dumbo elephant",
    "portugal country",
    "brazil country",
]

## letor-part-1-model-tokeniser.py
tokeniser = tf.keras.preprocessing.text.Tokenizer()
tokeniser.fit_on_texts(sentences)

print(tokeniser.word_index)

## letor-part-1-model-sequences.py
sequences = tokeniser.texts_to_sequences(sentences)
for x in sequences:
    print(x)

## letor-part-1-model-vocab-size.py
VOCAB_SIZE = max(tokeniser.index_word) + 1
print(f"VOCAB_SIZE: {VOCAB_SIZE}")

## letor-part-1-model-skipgrams.py
def make_skipgrams():
    train_x, all_labels = [], []
    for sequence in sequences:
        pairs, labels = tf.keras.preprocessing.sequence.skipgrams(
            sequence, VOCAB_SIZE, negative_samples=1.0, window_size=1, shuffle=True
        )
        train_x.extend(pairs)
        all_labels.extend(labels)

    train_x = np.array(train_x)

## letor-part-1-model-model-arch.py
# inputs
content_input = tf.keras.layers.Input(shape=(1, ), dtype=tf.int32, name='content_word')
context_input = tf.keras.layers.Input(shape=(1, ), dtype=tf.int32, name='context_word')

# layers
embeddings = tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=2, name='embeddings')
dot_prod = tf.keras.layers.Dot(axes=2, normalize=True, name='dot_product')
# graph
content_embedding = embeddings(content_input)
context_embedding = embeddings(context_input)

## letor-part-1-model-training-loop.py
loss_hist = []

for i in range(20):

    if i > 0:

        content_words, context_words, labels = make_skipgrams()

        hist = model.fit([content_words, context_words], labels, epochs=1, verbose=0)
        print(f"loss: {hist.history['loss'][-1]:.4f}")

## letor-part-2-packages.py
import random
random.seed(1)

import numpy as np
np.random.seed(1)

import itertools
import matplotlib.pyplot as plt
plt.style.use('ggplot')

## letor-part-2-objects-to-rank.py
objects_to_rank = {'dress', 'shirt', 'pants'}

## letor-part-2-all-permutations.py
all_permutations = list(itertools.permutations(objects_to_rank))

for x in sorted(all_permutations):
    print(x)
	sentences = [
	"snoopy dog",
	"milo dog",
	"dumbo elephant",
	"portugal country",
	"brazil country",
	]
	tokeniser = tf.keras.preprocessing.text.Tokenizer()
	tokeniser.fit_on_texts(sentences)

	print(tokeniser.word_index)
	sequences = tokeniser.texts_to_sequences(sentences)
	for x in sequences:
	print(x)
	VOCAB_SIZE = max(tokeniser.index_word) + 1
	print(f"VOCAB_SIZE: {VOCAB_SIZE}")
	def make_skipgrams():
	train_x, all_labels = [], []
	for sequence in sequences:
	pairs, labels = tf.keras.preprocessing.sequence.skipgrams(
	sequence, VOCAB_SIZE, negative_samples=1.0, window_size=1, shuffle=True
	)
	train_x.extend(pairs)
	all_labels.extend(labels)

	train_x = np.array(train_x)
	# inputs
	content_input = tf.keras.layers.Input(shape=(1, ), dtype=tf.int32, name='content_word')
	context_input = tf.keras.layers.Input(shape=(1, ), dtype=tf.int32, name='context_word')

	# layers
	embeddings = tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=2, name='embeddings')
	dot_prod = tf.keras.layers.Dot(axes=2, normalize=True, name='dot_product')
	# graph
	content_embedding = embeddings(content_input)
	context_embedding = embeddings(context_input)
	loss_hist = []

	for i in range(20):

	if i > 0:

	content_words, context_words, labels = make_skipgrams()

	hist = model.fit([content_words, context_words], labels, epochs=1, verbose=0)
	print(f"loss: {hist.history['loss'][-1]:.4f}")
	import random
	random.seed(1)

	import numpy as np
	np.random.seed(1)

	import itertools
	import matplotlib.pyplot as plt
	plt.style.use('ggplot')
	all_permutations = list(itertools.permutations(objects_to_rank))

	for x in sorted(all_permutations):
	print(x)