{{ message }}

Instantly share code, notes, and snippets.

# Aravind Pai aravindpai

Created May 22, 2020
View bpe_8.py
 #merge the frequent pair in corpus corpus = merge_vocab(best, corpus) print("Updated Corpus (After Merge operation)": corpus) #convert a tuple to a string best = "".join(list(best)) #append to merge and vocabulary merges = [] merges.append(best)
Last active May 22, 2020
View bpe_7.py
 #compute the best pair best = max(pairs, key=pairs.get) print("Most Frequent pair:",best)
Created May 22, 2020
View bpe_6.py
 #compute frequency of bigrams in a corpus pairs = get_stats(corpus) print(pairs)
Created May 22, 2020
View bpe_5.py
 #merges the most frequent pair in the corpus #accepts the corpus and best pair #returns the modified corpus import re def merge_vocab(pair, corpus_in): corpus_out = {} bigram = re.escape(' '.join(pair)) p = re.compile(r'(?
Created May 22, 2020
View bpe_4.py
 #computer frequency of a pair of characters or character sequences #accepts corpus and return frequency of each pair def get_stats(corpus): pairs = collections.defaultdict(int) for word, freq in corpus.items(): symbols = word.split() for i in range(len(symbols)-1): pairs[symbols[i],symbols[i+1]] += freq return pairs
Last active May 22, 2020
View bpe_3.py
 import collections #returns frequency of each word corpus = collections.Counter(corpus) #convert counter object to dictionary corpus = dict(corpus) print("Corpus:",corpus)
Last active May 22, 2020
View bpe_2.py
 #initlialize the vocabulary vocab = list(set(" ".join(corpus))) vocab.remove(' ') #split the word into characters corpus = [" ".join(token) for token in corpus] #appending corpus=[token+' ' for token in corpus]
Created May 22, 2020
View bpe_1.py
 #importing library import pandas as pd #reading .txt file text = pd.read_csv("sample.txt",header=None) #converting a dataframe into a single list corpus=[] for row in text.values: tokens = row[0].split(" ")
Created May 17, 2020
View 10_26.py
 dist= compute_distance(midpoints,num)
Created May 17, 2020
View 10_26.py
 x1,y1,x2,y2 = person[0] print(x1,y1,x2,y2)