Skip to content

Instantly share code, notes, and snippets.

@edumunozsala
Last active September 25, 2020 18:20
Show Gist options
  • Save edumunozsala/cce0574fbabe957a943a0e255491bddb to your computer and use it in GitHub Desktop.
Save edumunozsala/cce0574fbabe957a943a0e255491bddb to your computer and use it in GitHub Desktop.
Code to create dictionaries for Char-level text generator
class CharVocab:
''' Create a Vocabulary for '''
def __init__(self, type_vocab,pad_token='<PAD>', eos_token='<EOS>', unk_token='<UNK>'): #Initialization of the type of vocabulary
self.type = type_vocab
#self.int2char ={}
self.int2char = []
if pad_token !=None:
self.int2char += [pad_token]
if eos_token !=None:
self.int2char += [eos_token]
if unk_token !=None:
self.int2char += [unk_token]
#self.int2char[1]=eos_token
#self.int2char[2]=unk_token
self.char2int = {}
def __call__(self, text): #When called, adds the values of parameters x_1 and x_2, prints and returns the result
# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(''.join(text))
# Creating a dictionary that maps integers to the characters
self.int2char += list(chars)
# Creating another dictionary that maps characters to integers
self.char2int = {char: ind for ind, char in enumerate(self.int2char)}
vocab = CharVocab('char',None,None,'<UNK>')
vocab(sentences)
print('Length of vocabulary: ', len(vocab.int2char))
print('Int to Char: ', vocab.int2char)
print('Char to Int: ', vocab.char2int)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment