Skip to content

Instantly share code, notes, and snippets.

@gauravbansal98
Created May 13, 2020 12:45
Show Gist options
  • Save gauravbansal98/77a96ed1f99583a2ea929c2fd9f14ad3 to your computer and use it in GitHub Desktop.
Save gauravbansal98/77a96ed1f99583a2ea929c2fd9f14ad3 to your computer and use it in GitHub Desktop.
# create sequences of images, input sequences and output words for an image
def create_sequences(tokenizer, max_length, descriptions, photos, vocab_size):
X1, X2, y = list(), list(), list()
# walk through each image identifier
for key, desc_list in descriptions.items():
# walk through each description for the image
for desc in desc_list:
# encode the sequence
seq = tokenizer.texts_to_sequences([desc])[0]
# split one sequence into multiple X,y pairs
for i in range(1, len(seq)):
# split into input and output pair
in_seq, out_seq = seq[:i], seq[i]
# pad input sequence
in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
# encode output sequence
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
# store
X1.append(photos[key][0])
X2.append(in_seq)
y.append(out_seq)
return array(X1), array(X2), array(y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment