prateekjoshi565/tokenize_bert.py

## tokenize_bert.py
# tokenize and encode sequences in the training set
tokens_train = tokenizer.batch_encode_plus(
    train_text.tolist(),
    max_length = 25,
    pad_to_max_length=True,
    truncation=True
)

# tokenize and encode sequences in the validation set
tokens_val = tokenizer.batch_encode_plus(
    val_text.tolist(),
    max_length = 25,
    pad_to_max_length=True,
    truncation=True
)

# tokenize and encode sequences in the test set
tokens_test = tokenizer.batch_encode_plus(
    test_text.tolist(),
    max_length = 25,
    pad_to_max_length=True,
    truncation=True
)
	# tokenize and encode sequences in the training set
	tokens_train = tokenizer.batch_encode_plus(
	train_text.tolist(),
	max_length = 25,
	pad_to_max_length=True,
	truncation=True
	)

	# tokenize and encode sequences in the validation set
	tokens_val = tokenizer.batch_encode_plus(
	val_text.tolist(),
	max_length = 25,
	pad_to_max_length=True,
	truncation=True
	)

	# tokenize and encode sequences in the test set
	tokens_test = tokenizer.batch_encode_plus(
	test_text.tolist(),
	max_length = 25,
	pad_to_max_length=True,
	truncation=True
	)