Skip to content

Instantly share code, notes, and snippets.

View eustin's full-sized avatar
😶‍🌫️

Justin Evans eustin

😶‍🌫️
View GitHub Profile
sentences = [
"snoopy dog",
"milo dog",
"dumbo elephant",
"portugal country",
"brazil country",
]
tokeniser = tf.keras.preprocessing.text.Tokenizer()
tokeniser.fit_on_texts(sentences)
print(tokeniser.word_index)
sequences = tokeniser.texts_to_sequences(sentences)
for x in sequences:
print(x)
VOCAB_SIZE = max(tokeniser.index_word) + 1
print(f"VOCAB_SIZE: {VOCAB_SIZE}")
def make_skipgrams():
train_x, all_labels = [], []
for sequence in sequences:
pairs, labels = tf.keras.preprocessing.sequence.skipgrams(
sequence, VOCAB_SIZE, negative_samples=1.0, window_size=1, shuffle=True
)
train_x.extend(pairs)
all_labels.extend(labels)
train_x = np.array(train_x)
# inputs
content_input = tf.keras.layers.Input(shape=(1, ), dtype=tf.int32, name='content_word')
context_input = tf.keras.layers.Input(shape=(1, ), dtype=tf.int32, name='context_word')
# layers
embeddings = tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=2, name='embeddings')
dot_prod = tf.keras.layers.Dot(axes=2, normalize=True, name='dot_product')
# graph
content_embedding = embeddings(content_input)
context_embedding = embeddings(context_input)
loss_hist = []
for i in range(20):
if i > 0:
content_words, context_words, labels = make_skipgrams()
hist = model.fit([content_words, context_words], labels, epochs=1, verbose=0)
print(f"loss: {hist.history['loss'][-1]:.4f}")
import random
random.seed(1)
import numpy as np
np.random.seed(1)
import itertools
import matplotlib.pyplot as plt
plt.style.use('ggplot')
objects_to_rank = {'dress', 'shirt', 'pants'}
all_permutations = list(itertools.permutations(objects_to_rank))
for x in sorted(all_permutations):
print(x)