Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save deelipku23/2d5a84262672c608cf2ed0af455dcfb1 to your computer and use it in GitHub Desktop.
Save deelipku23/2d5a84262672c608cf2ed0af455dcfb1 to your computer and use it in GitHub Desktop.
def tokenize(sentences, tokenizer):
input_ids, input_masks, input_segments = [],[],[]
for sentence in tqdm(sentences):
inputs = tokenizer.encode_plus(sentence, add_special_tokens=True, max_length=128, pad_to_max_length=True,
return_attention_mask=True, return_token_type_ids=True)
input_ids.append(inputs['input_ids'])
input_masks.append(inputs['attention_mask'])
input_segments.append(inputs['token_type_ids'])
return np.asarray(input_ids, dtype='int32'), np.asarray(input_masks, dtype='int32'), np.asarray(input_segments, dtype='int32')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment