gorkemozkaya/run_tf_glue.ipynb

## run_tf_glue.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Adapted from [transformers/examples/run_tf_glue.py](https://github.com/huggingface/transformers/blob/master/examples/run_tf_glue.py)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import math\n",
    "\n",
    "import tensorflow as tf\n",
    "import tensorflow_datasets\n",
    "\n",
    "from transformers import (\n",
    "    BertConfig,\n",
    "    BertTokenizer,\n",
    "    TFBertForSequenceClassification,\n",
    "    glue_convert_examples_to_features,\n",
    "    glue_processors\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set up the TPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ[\"TPU_NAME\"] = \"kaggle-tpu\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on TPU  ['10.166.101.2:8470']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Initializing the TPU system: kaggle-tpu\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Initializing the TPU system: kaggle-tpu\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Clearing out eager caches\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Clearing out eager caches\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Finished initializing TPU system.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Finished initializing TPU system.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Found TPU system:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Found TPU system:\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Num TPU Cores: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Num TPU Cores: 8\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Num TPU Workers: 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Num TPU Workers: 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Num TPU Cores Per Worker: 8\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Num TPU Cores Per Worker: 8\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "REPLICAS:  8\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection\n",
    "    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])\n",
    "except ValueError:\n",
    "    tpu = None\n",
    "strategy = tf.distribute.get_strategy()\n",
    "if tpu:\n",
    "    tf.config.experimental_connect_to_cluster(tpu)\n",
    "    tf.tpu.experimental.initialize_tpu_system(tpu)\n",
    "    strategy = tf.distribute.experimental.TPUStrategy(tpu)\n",
    "print(\"REPLICAS: \", strategy.num_replicas_in_sync)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2\n"
     ]
    }
   ],
   "source": [
    "if strategy.num_replicas_in_sync == 8:  # single TPU\n",
    "    BATCH_SIZE = 16 * strategy.num_replicas_in_sync\n",
    "    EVAL_BATCH_SIZE = BATCH_SIZE * 4\n",
    "    EPOCHS = 3\n",
    "else:\n",
    "    BATCH_SIZE = 32\n",
    "    EVAL_BATCH_SIZE = BATCH_SIZE * 2\n",
    "    EPOCHS = 3    \n",
    "\n",
    "TASK = \"mrpc\"\n",
    "if TASK == \"sst-2\":\n",
    "    TFDS_TASK = \"sst2\"\n",
    "elif TASK == \"sts-b\":\n",
    "    TFDS_TASK = \"stsb\"\n",
    "else:\n",
    "    TFDS_TASK = TASK\n",
    "    \n",
    "num_labels = len(glue_processors[TASK]().get_labels())\n",
    "print(num_labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Reference: [tensorflow/models/official/nlp/bert](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "GLUE_DIR = \"gs://cloud-tpu-checkpoints/bert/classification\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def single_file_dataset(input_file, name_to_features):\n",
    "    \"\"\"Creates a single-file dataset to be passed for BERT custom training.\"\"\"\n",
    "    # For training, we want a lot of parallel reading and shuffling.\n",
    "    # For eval, we want no shuffling and parallel reading doesn't matter.\n",
    "    d = tf.data.TFRecordDataset(input_file)\n",
    "    d = d.map(lambda record: decode_record(record, name_to_features))\n",
    "\n",
    "    # When `input_file` is a path to a single file or a list\n",
    "    # containing a single path, disable auto sharding so that\n",
    "    # same input file is sent to all workers.\n",
    "    if isinstance(input_file, str) or len(input_file) == 1:\n",
    "        options = tf.data.Options()\n",
    "        options.experimental_distribute.auto_shard_policy = (\n",
    "            tf.data.experimental.AutoShardPolicy.OFF)\n",
    "    d = d.with_options(options)\n",
    "    return d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def decode_record(record, name_to_features):\n",
    "    \"\"\"Decodes a record to a TensorFlow example.\"\"\"\n",
    "    example = tf.io.parse_single_example(record, name_to_features)\n",
    "\n",
    "    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.\n",
    "    # So cast all int64 to int32.\n",
    "    for name in list(example.keys()):\n",
    "        t = example[name]\n",
    "        if t.dtype == tf.int64:\n",
    "          t = tf.cast(t, tf.int32)\n",
    "        example[name] = t\n",
    "\n",
    "    return example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_classifier_dataset(file_path,\n",
    "                              seq_length,\n",
    "                              batch_size,\n",
    "                              is_training=True,\n",
    "                              input_pipeline_context=None):\n",
    "    \"\"\"Creates input dataset from (tf)records files for train/eval.\"\"\"\n",
    "    name_to_features = {\n",
    "      'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n",
    "      'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64),\n",
    "      'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n",
    "      'label_ids': tf.io.FixedLenFeature([], tf.int64),\n",
    "      'is_real_example': tf.io.FixedLenFeature([], tf.int64),\n",
    "    }\n",
    "    dataset = single_file_dataset(file_path, name_to_features)\n",
    "\n",
    "    # The dataset is always sharded by number of hosts.\n",
    "    # num_input_pipelines is the number of hosts rather than number of cores.\n",
    "    if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1:\n",
    "        dataset = dataset.shard(input_pipeline_context.num_input_pipelines,\n",
    "                            input_pipeline_context.input_pipeline_id)\n",
    "\n",
    "    def _select_data_from_record(record):\n",
    "        x = {\n",
    "            'input_ids': record['input_ids'],\n",
    "            'attention_mask': record['input_mask'],\n",
    "            'token_type_ids': record['segment_ids']\n",
    "        }\n",
    "        y = record['label_ids']\n",
    "        return (x, y)\n",
    "\n",
    "    dataset = dataset.map(_select_data_from_record)\n",
    "\n",
    "    if is_training:\n",
    "        dataset = dataset.shuffle(100)\n",
    "        dataset = dataset.repeat()\n",
    "\n",
    "    dataset = dataset.batch(batch_size, drop_remainder=is_training)\n",
    "    dataset = dataset.prefetch(1024)\n",
    "    return dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_dataset_fn(input_file_pattern, max_seq_length, global_batch_size,\n",
    "                   is_training):\n",
    "    \"\"\"Gets a closure to create a dataset.\"\"\"\n",
    "\n",
    "    def _dataset_fn(ctx=None):\n",
    "        \"\"\"Returns tf.data.Dataset for distributed BERT pretraining.\"\"\"\n",
    "        batch_size = ctx.get_per_replica_batch_size(\n",
    "            global_batch_size) if ctx else global_batch_size\n",
    "        dataset = create_classifier_dataset(\n",
    "            input_file_pattern,\n",
    "            max_seq_length,\n",
    "            batch_size,\n",
    "            is_training=is_training,\n",
    "            input_pipeline_context=ctx)\n",
    "        return dataset\n",
    "\n",
    "    return _dataset_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.io.gfile.GFile(f'{GLUE_DIR}/{TASK}_meta_data', 'rb') as reader:\n",
    "    input_meta_data = json.loads(reader.read().decode('utf-8'))\n",
    "\n",
    "max_seq_length = input_meta_data['max_seq_length']\n",
    "train_input_fn = get_dataset_fn(\n",
    "    f\"{GLUE_DIR}/{TASK}_train.tf_record\",\n",
    "    max_seq_length,\n",
    "    BATCH_SIZE,\n",
    "    is_training=True)\n",
    "eval_input_fn = get_dataset_fn(\n",
    "    f\"{GLUE_DIR}/{TASK}_eval.tf_record\",\n",
    "    max_seq_length,\n",
    "    EVAL_BATCH_SIZE,\n",
    "    is_training=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load tokenizer and model from pretrained model/vocabulary. Specify the number of labels to classify (2+: classification, 1: regression)\n",
    "config = BertConfig.from_pretrained(\"bert-base-cased\", num_labels=num_labels)\n",
    "tokenizer = BertTokenizer.from_pretrained(\"bert-base-cased\")\n",
    "with strategy.scope():\n",
    "    training_dataset = train_input_fn()\n",
    "    evaluation_dataset = eval_input_fn()\n",
    "    \n",
    "    model = TFBertForSequenceClassification.from_pretrained(\"bert-base-cased\", config=config)\n",
    "    # Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule\n",
    "    opt = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08)\n",
    "\n",
    "    if num_labels == 1:\n",
    "        loss = tf.keras.losses.MeanSquaredError()\n",
    "    else:\n",
    "        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
    "\n",
    "    metric = tf.keras.metrics.SparseCategoricalAccuracy(\"accuracy\")\n",
    "    model.compile(optimizer=opt, loss=loss, metrics=[metric])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data_size = input_meta_data['train_data_size']\n",
    "steps_per_epoch = int(train_data_size / BATCH_SIZE)\n",
    "eval_steps = int(math.ceil(input_meta_data['eval_data_size'] / EVAL_BATCH_SIZE))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train for 28 steps, validate for 1 steps\n",
      "Epoch 1/3\n",
      "28/28 [==============================] - 80s 3s/step - loss: 0.6127 - accuracy: 0.6733 - val_loss: 0.5566 - val_accuracy: 0.7157\n",
      "Epoch 2/3\n",
      "28/28 [==============================] - 3s 110ms/step - loss: 0.5024 - accuracy: 0.7656 - val_loss: 0.4951 - val_accuracy: 0.7328\n",
      "Epoch 3/3\n",
      "28/28 [==============================] - 3s 103ms/step - loss: 0.3980 - accuracy: 0.8245 - val_loss: 0.5141 - val_accuracy: 0.7574\n"
     ]
    }
   ],
   "source": [
    "# Train and evaluate using tf.keras.Model.fit()\n",
    "# train_steps = train_examples // BATCH_SIZE\n",
    "# valid_steps = valid_examples // EVAL_BATCH_SIZE\n",
    "history = model.fit(\n",
    "    training_dataset,\n",
    "    epochs=EPOCHS,\n",
    "    steps_per_epoch=steps_per_epoch,\n",
    "    validation_data=evaluation_dataset,\n",
    "    validation_steps=eval_steps,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save TF2 model\n",
    "os.makedirs(\"./save/\", exist_ok=True)\n",
    "model.save_pretrained(\"./save/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Adapted from [transformers/examples/run_tf_glue.py](https://github.com/huggingface/transformers/blob/master/examples/run_tf_glue.py)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"import json\n",
	"import math\n",
	"\n",
	"import tensorflow as tf\n",
	"import tensorflow_datasets\n",
	"\n",
	"from transformers import (\n",
	" BertConfig,\n",
	" BertTokenizer,\n",
	" TFBertForSequenceClassification,\n",
	" glue_convert_examples_to_features,\n",
	" glue_processors\n",
	")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Set up the TPU"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"os.environ[\"TPU_NAME\"] = \"kaggle-tpu\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Running on TPU ['10.166.101.2:8470']\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Initializing the TPU system: kaggle-tpu\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Initializing the TPU system: kaggle-tpu\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Clearing out eager caches\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Clearing out eager caches\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Finished initializing TPU system.\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Finished initializing TPU system.\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Found TPU system:\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Found TPU system:\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Num TPU Cores: 8\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Num TPU Cores: 8\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Num TPU Workers: 1\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Num TPU Workers: 1\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Num TPU Cores Per Worker: 8\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Num TPU Cores Per Worker: 8\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"REPLICAS: 8\n"
	]
	}
	],
	"source": [
	"try:\n",
	" tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection\n",
	" print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])\n",
	"except ValueError:\n",
	" tpu = None\n",
	"strategy = tf.distribute.get_strategy()\n",
	"if tpu:\n",
	" tf.config.experimental_connect_to_cluster(tpu)\n",
	" tf.tpu.experimental.initialize_tpu_system(tpu)\n",
	" strategy = tf.distribute.experimental.TPUStrategy(tpu)\n",
	"print(\"REPLICAS: \", strategy.num_replicas_in_sync)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2\n"
	]
	}
	],
	"source": [
	"if strategy.num_replicas_in_sync == 8: # single TPU\n",
	" BATCH_SIZE = 16 * strategy.num_replicas_in_sync\n",
	" EVAL_BATCH_SIZE = BATCH_SIZE * 4\n",
	" EPOCHS = 3\n",
	"else:\n",
	" BATCH_SIZE = 32\n",
	" EVAL_BATCH_SIZE = BATCH_SIZE * 2\n",
	" EPOCHS = 3 \n",
	"\n",
	"TASK = \"mrpc\"\n",
	"if TASK == \"sst-2\":\n",
	" TFDS_TASK = \"sst2\"\n",
	"elif TASK == \"sts-b\":\n",
	" TFDS_TASK = \"stsb\"\n",
	"else:\n",
	" TFDS_TASK = TASK\n",
	" \n",
	"num_labels = len(glue_processors[TASK]().get_labels())\n",
	"print(num_labels)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Dataset"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Reference: [tensorflow/models/official/nlp/bert](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"GLUE_DIR = \"gs://cloud-tpu-checkpoints/bert/classification\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"def single_file_dataset(input_file, name_to_features):\n",
	" \"\"\"Creates a single-file dataset to be passed for BERT custom training.\"\"\"\n",
	" # For training, we want a lot of parallel reading and shuffling.\n",
	" # For eval, we want no shuffling and parallel reading doesn't matter.\n",
	" d = tf.data.TFRecordDataset(input_file)\n",
	" d = d.map(lambda record: decode_record(record, name_to_features))\n",
	"\n",
	" # When `input_file` is a path to a single file or a list\n",
	" # containing a single path, disable auto sharding so that\n",
	" # same input file is sent to all workers.\n",
	" if isinstance(input_file, str) or len(input_file) == 1:\n",
	" options = tf.data.Options()\n",
	" options.experimental_distribute.auto_shard_policy = (\n",
	" tf.data.experimental.AutoShardPolicy.OFF)\n",
	" d = d.with_options(options)\n",
	" return d"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"def decode_record(record, name_to_features):\n",
	" \"\"\"Decodes a record to a TensorFlow example.\"\"\"\n",
	" example = tf.io.parse_single_example(record, name_to_features)\n",
	"\n",
	" # tf.Example only supports tf.int64, but the TPU only supports tf.int32.\n",
	" # So cast all int64 to int32.\n",
	" for name in list(example.keys()):\n",
	" t = example[name]\n",
	" if t.dtype == tf.int64:\n",
	" t = tf.cast(t, tf.int32)\n",
	" example[name] = t\n",
	"\n",
	" return example"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"def create_classifier_dataset(file_path,\n",
	" seq_length,\n",
	" batch_size,\n",
	" is_training=True,\n",
	" input_pipeline_context=None):\n",
	" \"\"\"Creates input dataset from (tf)records files for train/eval.\"\"\"\n",
	" name_to_features = {\n",
	" 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n",
	" 'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64),\n",
	" 'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n",
	" 'label_ids': tf.io.FixedLenFeature([], tf.int64),\n",
	" 'is_real_example': tf.io.FixedLenFeature([], tf.int64),\n",
	" }\n",
	" dataset = single_file_dataset(file_path, name_to_features)\n",
	"\n",
	" # The dataset is always sharded by number of hosts.\n",
	" # num_input_pipelines is the number of hosts rather than number of cores.\n",
	" if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1:\n",
	" dataset = dataset.shard(input_pipeline_context.num_input_pipelines,\n",
	" input_pipeline_context.input_pipeline_id)\n",
	"\n",
	" def _select_data_from_record(record):\n",
	" x = {\n",
	" 'input_ids': record['input_ids'],\n",
	" 'attention_mask': record['input_mask'],\n",
	" 'token_type_ids': record['segment_ids']\n",
	" }\n",
	" y = record['label_ids']\n",
	" return (x, y)\n",
	"\n",
	" dataset = dataset.map(_select_data_from_record)\n",
	"\n",
	" if is_training:\n",
	" dataset = dataset.shuffle(100)\n",
	" dataset = dataset.repeat()\n",
	"\n",
	" dataset = dataset.batch(batch_size, drop_remainder=is_training)\n",
	" dataset = dataset.prefetch(1024)\n",
	" return dataset"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_dataset_fn(input_file_pattern, max_seq_length, global_batch_size,\n",
	" is_training):\n",
	" \"\"\"Gets a closure to create a dataset.\"\"\"\n",
	"\n",
	" def _dataset_fn(ctx=None):\n",
	" \"\"\"Returns tf.data.Dataset for distributed BERT pretraining.\"\"\"\n",
	" batch_size = ctx.get_per_replica_batch_size(\n",
	" global_batch_size) if ctx else global_batch_size\n",
	" dataset = create_classifier_dataset(\n",
	" input_file_pattern,\n",
	" max_seq_length,\n",
	" batch_size,\n",
	" is_training=is_training,\n",
	" input_pipeline_context=ctx)\n",
	" return dataset\n",
	"\n",
	" return _dataset_fn"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"with tf.io.gfile.GFile(f'{GLUE_DIR}/{TASK}_meta_data', 'rb') as reader:\n",
	" input_meta_data = json.loads(reader.read().decode('utf-8'))\n",
	"\n",
	"max_seq_length = input_meta_data['max_seq_length']\n",
	"train_input_fn = get_dataset_fn(\n",
	" f\"{GLUE_DIR}/{TASK}_train.tf_record\",\n",
	" max_seq_length,\n",
	" BATCH_SIZE,\n",
	" is_training=True)\n",
	"eval_input_fn = get_dataset_fn(\n",
	" f\"{GLUE_DIR}/{TASK}_eval.tf_record\",\n",
	" max_seq_length,\n",
	" EVAL_BATCH_SIZE,\n",
	" is_training=False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Load tokenizer and model from pretrained model/vocabulary. Specify the number of labels to classify (2+: classification, 1: regression)\n",
	"config = BertConfig.from_pretrained(\"bert-base-cased\", num_labels=num_labels)\n",
	"tokenizer = BertTokenizer.from_pretrained(\"bert-base-cased\")\n",
	"with strategy.scope():\n",
	" training_dataset = train_input_fn()\n",
	" evaluation_dataset = eval_input_fn()\n",
	" \n",
	" model = TFBertForSequenceClassification.from_pretrained(\"bert-base-cased\", config=config)\n",
	" # Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule\n",
	" opt = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08)\n",
	"\n",
	" if num_labels == 1:\n",
	" loss = tf.keras.losses.MeanSquaredError()\n",
	" else:\n",
	" loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
	"\n",
	" metric = tf.keras.metrics.SparseCategoricalAccuracy(\"accuracy\")\n",
	" model.compile(optimizer=opt, loss=loss, metrics=[metric])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"train_data_size = input_meta_data['train_data_size']\n",
	"steps_per_epoch = int(train_data_size / BATCH_SIZE)\n",
	"eval_steps = int(math.ceil(input_meta_data['eval_data_size'] / EVAL_BATCH_SIZE))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Train for 28 steps, validate for 1 steps\n",
	"Epoch 1/3\n",
	"28/28 [==============================] - 80s 3s/step - loss: 0.6127 - accuracy: 0.6733 - val_loss: 0.5566 - val_accuracy: 0.7157\n",
	"Epoch 2/3\n",
	"28/28 [==============================] - 3s 110ms/step - loss: 0.5024 - accuracy: 0.7656 - val_loss: 0.4951 - val_accuracy: 0.7328\n",
	"Epoch 3/3\n",
	"28/28 [==============================] - 3s 103ms/step - loss: 0.3980 - accuracy: 0.8245 - val_loss: 0.5141 - val_accuracy: 0.7574\n"
	]
	}
	],
	"source": [
	"# Train and evaluate using tf.keras.Model.fit()\n",
	"# train_steps = train_examples // BATCH_SIZE\n",
	"# valid_steps = valid_examples // EVAL_BATCH_SIZE\n",
	"history = model.fit(\n",
	" training_dataset,\n",
	" epochs=EPOCHS,\n",
	" steps_per_epoch=steps_per_epoch,\n",
	" validation_data=evaluation_dataset,\n",
	" validation_steps=eval_steps,\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Save TF2 model\n",
	"os.makedirs(\"./save/\", exist_ok=True)\n",
	"model.save_pretrained(\"./save/\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}