Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# create sequences of length 5 tokens
def create_seq(text, seq_len = 5):
sequences = []
# if the number of tokens in 'text' is greater than 5
if len(text.split()) > seq_len:
for i in range(seq_len, len(text.split())):
# select sequence of tokens
seq = text.split()[i-seq_len:i+1]
# add to the list
sequences.append(" ".join(seq))
return sequences
# if the number of tokens in 'text' is less than or equal to 5
else:
return [text]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment