Last active May 22, 2020 19:25
num_merges = 10
for i in range(num_merges):
#compute frequency of bigrams in a corpus
pairs = get_stats(corpus)
#compute the best pair
best = max(pairs, key=pairs.get)
#merge the frequent pair in corpus
corpus = merge_vocab(best, corpus)
#append to merge list and vocabulary
#convert a tuple to a string
merges_in_string = ["".join(list(i)) for i in merges]
print("BPE Merge Operations:",merges_in_string)
