Skip to content

Instantly share code, notes, and snippets.

View a7v8x's full-sized avatar
🎮
maximizing reward function in the game of life

David Mraz a7v8x

🎮
maximizing reward function in the game of life
View GitHub Profile
@a7v8x
a7v8x / array_cheat_sheet.js
Created November 1, 2022 21:24
JS Array Cheat Sheet
[1,2,3].length; // 3
[1,2,3].push(4); // [1,2,3,4] *
[1,2,3].unshift(0); // [0,1,2,3] *
[1,2,3].pop(); // [1,2] *
[1,2,3].shift(); // [2,3] *
[1,2,3].at(2); // 3
[1,2,3].indexOf(3); // 2
[1,2,3].includes(3); // true
[1,2,3].map((num) => Math.pow(num, 2)); // [1, 4, 9]
[1,2,3].filter((num) => num % 2); // [1,3]
from transformers import TFBertForSequenceClassification
import tensorflow as tf
# recommended learning rate for Adam 5e-5, 3e-5, 2e-5
learning_rate = 2e-5
# we will do just 1 epoch for illustration, though multiple epochs might be better as long as we will not overfit the model
number_of_epochs = 1
# model initialization
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
# train dataset
ds_train_encoded = encode_examples(ds_train).shuffle(10000).batch(batch_size)
# test dataset
ds_test_encoded = encode_examples(ds_test).batch(batch_size)
# map to the expected input to TFBertForSequenceClassification, see here
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
return {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_masks,
}, label
def encode_examples(ds, limit=-1):
# prepare list, so that we can build up final TensorFlow dataset from slices.
input_ids_list = []
for review, label in tfds.as_numpy(ds_train.take(5)):
print('review', review.decode()[0:50], label)
<<
review This was an absolutely terrible movie. Don't be lu 0
review I have been known to fall asleep during films, but 0
review Mann photographs the Alberta Rocky Mountains in a 0
review This is the kind of film for a snowy Sunday aftern 1
review As others have mentioned, all the women that go nu 1
tfds.core.DatasetInfo(
name='imdb_reviews',
version=1.0.0,
description='Large Movie Review Dataset.
This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.',
homepage='http://ai.stanford.edu/~amaas/data/sentiment/',
features=FeaturesDict({
'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
'text': Text(shape=(), dtype=tf.string),
}),
import tensorflow_datasets as tfds
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
split = (tfds.Split.TRAIN, tfds.Split.TEST),
as_supervised=True,
with_info=True)
print('info', ds_info)
bert_input = tokenizer.encode_plus(
test_sentence,
add_special_tokens = True, # add [CLS], [SEP]
max_length = max_length_test, # max length of the text that can go to BERT
pad_to_max_length = True, # add [PAD] tokens
return_attention_mask = True, # add attention mask to not focus on pad tokens
)
print('encoded', bert_input)
tokenized ['[CLS]', 'test', 'token', '##ization', 'sentence', '.', 'followed', 'by', 'another', 'sentence', '[SEP]']
{
'token_ids': [101, 3231, 19204, 3989, 6251, 1012, 2628, 2011, 2178, 6251, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0],
'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
}
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
max_length_test = 20
test_sentence = 'Test tokenization sentence. Followed by another sentence'
# add special tokens
test_sentence_with_special_tokens = '[CLS]' + test_sentence + '[SEP]'