jshirius/銅メダル検証-bert-base-tf2-0-minimalistic-iii_2.ipynb

## 銅メダル検証-bert-base-tf2-0-minimalistic-iii_2.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import GroupKFold\n",
    "import matplotlib.pyplot as plt\n",
    "from tqdm.notebook import tqdm\n",
    "import tensorflow_hub as hub\n",
    "import tensorflow as tf\n",
    "import bert_tokenization as tokenization\n",
    "import tensorflow.keras.backend as K\n",
    "import gc\n",
    "import os\n",
    "from scipy.stats import spearmanr\n",
    "from math import floor, ceil\n",
    "from tensorflow.keras.models import load_model\n",
    "\n",
    "np.set_printoptions(suppress=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Abuout this kernel\n",
    "\n",
    "In this kernel, an example of adding features other than character strings is described with reference to \"l Bert-base TF2.0 (minimalistic) III\".\n",
    "\n",
    "Referenced kernel Bert-base TF2.0 (minimalistic) III<br>\n",
    "https://www.kaggle.com/bibek777/bert-base-tf2-0-minimalistic-iii\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train shape = (6079, 41)\n",
      "test shape = (476, 11)\n",
      "\n",
      "output categories:\n",
      "\t ['question_asker_intent_understanding', 'question_body_critical', 'question_conversational', 'question_expect_short_answer', 'question_fact_seeking', 'question_has_commonly_accepted_answer', 'question_interestingness_others', 'question_interestingness_self', 'question_multi_intent', 'question_not_really_a_question', 'question_opinion_seeking', 'question_type_choice', 'question_type_compare', 'question_type_consequence', 'question_type_definition', 'question_type_entity', 'question_type_instructions', 'question_type_procedure', 'question_type_reason_explanation', 'question_type_spelling', 'question_well_written', 'answer_helpful', 'answer_level_of_information', 'answer_plausible', 'answer_relevance', 'answer_satisfaction', 'answer_type_instructions', 'answer_type_procedure', 'answer_type_reason_explanation', 'answer_well_written']\n",
      "\n",
      "input categories:\n",
      "\t ['question_title', 'question_body', 'answer']\n"
     ]
    }
   ],
   "source": [
    "PATH = '../input/google-quest-challenge/'\n",
    "\n",
    "\n",
    "#事前学習のモデルをLargeに変更する\n",
    "BERT_PATH = '../input/bert-base-from-tfhub/bert_en_uncased_L-12_H-768_A-12'\n",
    "\n",
    "#現状、Largeだとメモリーエラーになる\n",
    "#BERT_PATH = '../input/bert-base-from-tfhub-l24-h1024-a16/tf_bert_en_cased_L-24_H-1024_A-16'\n",
    "\n",
    "\n",
    "tokenizer = tokenization.FullTokenizer(BERT_PATH+'/assets/vocab.txt', True)\n",
    "MAX_SEQUENCE_LENGTH = 512\n",
    "\n",
    "df_train = pd.read_csv(PATH+'train.csv')\n",
    "df_test = pd.read_csv(PATH+'test.csv')\n",
    "df_sub = pd.read_csv(PATH+'sample_submission.csv')\n",
    "\n",
    "TARGET_COLUMNS = df_sub.columns.values[1:].tolist()\n",
    "\n",
    "print('train shape =', df_train.shape)\n",
    "print('test shape =', df_test.shape)\n",
    "\n",
    "output_categories = list(df_train.columns[11:])\n",
    "input_categories = list(df_train.columns[[1,2,5]])\n",
    "print('\\noutput categories:\\n\\t', output_categories)\n",
    "print('\\ninput categories:\\n\\t', input_categories)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nprint(\"データ追加前\"), print(df_train.shape)\\n\\n#データを追加する\\n#/kaggle/input/to-google-quest-qa-labeling/train_append.csv\\ndf_train_append = pd.read_csv(\\'../input/to-google-quest-qa-labeling/train_append_id800up.csv\\')\\ndf_train_append = df_train_append.drop(\"index\", axis=1)\\nprint(\"追加するデータ量\"), print(df_train_append.shape)\\n\\n#追加後の行数\\ndf_train = df_train.append(df_train_append)\\ndf_train = df_train.reset_index(drop=True)\\nprint(\"データ追加後\"), print(df_train.shape)\\n'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#データ追加前の形状\n",
    "#結局、やくたたずだった\n",
    "\"\"\"\n",
    "print(\"データ追加前\"), print(df_train.shape)\n",
    "\n",
    "#データを追加する\n",
    "#/kaggle/input/to-google-quest-qa-labeling/train_append.csv\n",
    "df_train_append = pd.read_csv('../input/to-google-quest-qa-labeling/train_append_id800up.csv')\n",
    "df_train_append = df_train_append.drop(\"index\", axis=1)\n",
    "print(\"追加するデータ量\"), print(df_train_append.shape)\n",
    "\n",
    "#追加後の行数\n",
    "df_train = df_train.append(df_train_append)\n",
    "df_train = df_train.reset_index(drop=True)\n",
    "print(\"データ追加後\"), print(df_train.shape)\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#トレーニングデータ、テストデータをを少なく(100個くらい)する\n",
    "#コードチェック用の\n",
    "\n",
    "#df_train = pd.read_csv(PATH+'train.csv')\n",
    "#df_test = pd.read_csv(PATH+'test.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _get_masks(tokens, max_seq_length):\n",
    "    \"\"\"Mask for padding\"\"\"\n",
    "    if len(tokens)>max_seq_length:\n",
    "        raise IndexError(\"Token length more than max seq length!\")\n",
    "    return [1]*len(tokens) + [0] * (max_seq_length - len(tokens))\n",
    "\n",
    "def _get_segments(tokens, max_seq_length):\n",
    "    \"\"\"Segments: 0 for the first sequence, 1 for the second\"\"\"\n",
    "    if len(tokens)>max_seq_length:\n",
    "        raise IndexError(\"Token length more than max seq length!\")\n",
    "    segments = []\n",
    "    first_sep = True\n",
    "    current_segment_id = 0\n",
    "    for token in tokens:\n",
    "        segments.append(current_segment_id)\n",
    "        if token == \"[SEP]\":\n",
    "            if first_sep:\n",
    "                first_sep = False \n",
    "            else:\n",
    "                current_segment_id = 1\n",
    "    return segments + [0] * (max_seq_length - len(tokens))\n",
    "\n",
    "def _get_ids(tokens, tokenizer, max_seq_length):\n",
    "    \"\"\"Token ids from Tokenizer vocab\"\"\"\n",
    "    token_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
    "    input_ids = token_ids + [0] * (max_seq_length-len(token_ids))\n",
    "    return input_ids\n",
    "\n",
    "def _trim_input(title, question, answer, max_sequence_length, \n",
    "                t_max_len=30, q_max_len=239, a_max_len=239):\n",
    "\n",
    "    t = tokenizer.tokenize(title)\n",
    "    q = tokenizer.tokenize(question)\n",
    "    a = tokenizer.tokenize(answer)\n",
    "    \n",
    "    t_len = len(t)\n",
    "    q_len = len(q)\n",
    "    a_len = len(a)\n",
    "\n",
    "    if (t_len+q_len+a_len+4) > max_sequence_length:\n",
    "        \n",
    "        if t_max_len > t_len:\n",
    "            t_new_len = t_len\n",
    "            a_max_len = a_max_len + floor((t_max_len - t_len)/2)\n",
    "            q_max_len = q_max_len + ceil((t_max_len - t_len)/2)\n",
    "        else:\n",
    "            t_new_len = t_max_len\n",
    "      \n",
    "        if a_max_len > a_len:\n",
    "            a_new_len = a_len \n",
    "            q_new_len = q_max_len + (a_max_len - a_len)\n",
    "        elif q_max_len > q_len:\n",
    "            a_new_len = a_max_len + (q_max_len - q_len)\n",
    "            q_new_len = q_len\n",
    "        else:\n",
    "            a_new_len = a_max_len\n",
    "            q_new_len = q_max_len\n",
    "            \n",
    "            \n",
    "        if t_new_len+a_new_len+q_new_len+4 != max_sequence_length:\n",
    "            raise ValueError(\"New sequence length should be %d, but is %d\" \n",
    "                             % (max_sequence_length, (t_new_len+a_new_len+q_new_len+4)))\n",
    "        \n",
    "        t = t[:t_new_len]\n",
    "        q = q[:q_new_len]\n",
    "        a = a[:a_new_len]\n",
    "    \n",
    "    return t, q, a\n",
    "\n",
    "def _convert_to_bert_inputs(title, question, answer, tokenizer, max_sequence_length):\n",
    "    \"\"\"Converts tokenized input to ids, masks and segments for BERT\"\"\"\n",
    "    \n",
    "    stoken = [\"[CLS]\"] + title + [\"[SEP]\"] + question + [\"[SEP]\"] + answer + [\"[SEP]\"]\n",
    "\n",
    "    input_ids = _get_ids(stoken, tokenizer, max_sequence_length)\n",
    "    input_masks = _get_masks(stoken, max_sequence_length)\n",
    "    input_segments = _get_segments(stoken, max_sequence_length)\n",
    "\n",
    "    return [input_ids, input_masks, input_segments]\n",
    "\n",
    "def compute_input_arays(df, columns, tokenizer, max_sequence_length):\n",
    "    input_ids, input_masks, input_segments = [], [], []\n",
    "    for _, instance in tqdm(df[columns].iterrows()):\n",
    "        t, q, a = instance.question_title, instance.question_body, instance.answer\n",
    "\n",
    "        t, q, a = _trim_input(t, q, a, max_sequence_length)\n",
    "\n",
    "        ids, masks, segments = _convert_to_bert_inputs(t, q, a, tokenizer, max_sequence_length)\n",
    "        input_ids.append(ids)\n",
    "        input_masks.append(masks)\n",
    "        input_segments.append(segments)\n",
    "        \n",
    "    return [np.asarray(input_ids, dtype=np.int32), \n",
    "            np.asarray(input_masks, dtype=np.int32), \n",
    "            np.asarray(input_segments, dtype=np.int32)]\n",
    "\n",
    "\n",
    "def compute_output_arrays(df, columns):\n",
    "    return np.asarray(df[columns])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_spearmanr(trues, preds):\n",
    "    rhos = []\n",
    "    for col_trues, col_pred in zip(trues.T, preds.T):\n",
    "        rhos.append(\n",
    "            spearmanr(col_trues, col_pred + np.random.normal(0, 1e-7, col_pred.shape[0])).correlation)\n",
    "    return np.mean(rhos)\n",
    "\n",
    "\n",
    "class CustomCallback(tf.keras.callbacks.Callback):\n",
    "    \n",
    "    def __init__(self, valid_data, test_data, batch_size=16, fold=None):\n",
    "\n",
    "        self.valid_inputs = valid_data[0]\n",
    "        self.valid_outputs = valid_data[1]\n",
    "        self.test_inputs = test_data\n",
    "        \n",
    "        self.batch_size = batch_size\n",
    "        self.fold = fold\n",
    "        \n",
    "    def on_train_begin(self, logs={}):\n",
    "        self.valid_predictions = []\n",
    "        self.test_predictions = []\n",
    "        \n",
    "    def on_epoch_end(self, epoch, logs={}):\n",
    "        self.valid_predictions.append(\n",
    "            self.model.predict(self.valid_inputs, batch_size=self.batch_size))\n",
    "        \n",
    "        rho_val = compute_spearmanr(\n",
    "            self.valid_outputs, np.average(self.valid_predictions, axis=0))\n",
    "        \n",
    "        print(\"\\nvalidation rho: %.4f\" % rho_val)\n",
    "        \n",
    "        if self.fold is not None:\n",
    "            self.model.save_weights(f'bert-base-{fold}-{epoch}.h5py')\n",
    "        \n",
    "        self.test_predictions.append(\n",
    "            self.model.predict(self.test_inputs, batch_size=self.batch_size)\n",
    "        )\n",
    "\n",
    "def bert_model():\n",
    "    \n",
    "    input_word_ids = tf.keras.layers.Input(\n",
    "        (MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_word_ids')\n",
    "    input_masks = tf.keras.layers.Input(\n",
    "        (MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_masks')\n",
    "    input_segments = tf.keras.layers.Input(\n",
    "        (MAX_SEQUENCE_LENGTH,), dtype=tf.int32, name='input_segments')\n",
    "    \n",
    "    bert_layer = hub.KerasLayer(BERT_PATH, trainable=True)\n",
    "    \n",
    "    _, x = bert_layer([input_word_ids, input_masks, input_segments])\n",
    "    \n",
    "    #x = tf.keras.layers.GlobalAveragePooling1D()(sequence_output)\n",
    "    x = tf.keras.layers.Lambda(lambda x: x[:, 0])(x)\n",
    "    x = tf.keras.layers.Dropout(0.2)(x)\n",
    "    out = tf.keras.layers.Dense(30, activation=\"sigmoid\", name=\"dense_output\")(x)\n",
    "\n",
    "    model = tf.keras.models.Model(\n",
    "        inputs=[input_word_ids, input_masks, input_segments], outputs=out)\n",
    "    \n",
    "    return model    \n",
    "        \n",
    "def train_and_predict(model, train_data, valid_data, test_data, \n",
    "                      learning_rate, epochs, batch_size, loss_function, fold):\n",
    "        \n",
    "    custom_callback = CustomCallback(\n",
    "        valid_data=(valid_data[0], valid_data[1]), \n",
    "        test_data=test_data,\n",
    "        batch_size=batch_size,\n",
    "        fold=None)\n",
    "\n",
    "    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
    "    model.compile(loss=loss_function, optimizer=optimizer)\n",
    "    model.fit(train_data[0], train_data[1], epochs=epochs, \n",
    "              batch_size=batch_size, callbacks=[custom_callback])\n",
    "    \n",
    "    return custom_callback\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "59527e1231e1400795cea7f4c477ab21",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9e5ed41366914a33a6b1fcbd992e2df0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "gkf = GroupKFold(n_splits=10).split(X=df_train.question_body, groups=df_train.question_body) ############## originaln_splits=5\n",
    "\n",
    "outputs = compute_output_arrays(df_train, output_categories)\n",
    "inputs = compute_input_arays(df_train, input_categories, tokenizer, MAX_SEQUENCE_LENGTH)\n",
    "test_inputs = compute_input_arays(df_test, input_categories, tokenizer, MAX_SEQUENCE_LENGTH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "inputの形状は？\n",
      "<class 'list'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"inputの形状は？\")\n",
    "print(type(inputs))\n",
    "len(inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1回目のループ\n",
      "[   0    3    4 ... 6076 6077 6078]\n",
      "train_inputsの形状\n",
      "3\n",
      "5471\n",
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "input_word_ids (InputLayer)     [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "input_masks (InputLayer)        [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "input_segments (InputLayer)     [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "keras_layer (KerasLayer)        [(None, 768), (None, 109482241   input_word_ids[0][0]             \n",
      "                                                                 input_masks[0][0]                \n",
      "                                                                 input_segments[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "lambda (Lambda)                 (None, 768)          0           keras_layer[0][1]                \n",
      "__________________________________________________________________________________________________\n",
      "dropout (Dropout)               (None, 768)          0           lambda[0][0]                     \n",
      "__________________________________________________________________________________________________\n",
      "dense_output (Dense)            (None, 30)           23070       dropout[0][0]                    \n",
      "==================================================================================================\n",
      "Total params: 109,505,311\n",
      "Trainable params: 109,505,310\n",
      "Non-trainable params: 1\n",
      "__________________________________________________________________________________________________\n",
      "Train on 5471 samples\n",
      "Epoch 1/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3947"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/numpy/lib/function_base.py:2530: RuntimeWarning: invalid value encountered in true_divide\n",
      "  c /= stddev[:, None]\n",
      "/opt/conda/lib/python3.6/site-packages/numpy/lib/function_base.py:2531: RuntimeWarning: invalid value encountered in true_divide\n",
      "  c /= stddev[None, :]\n",
      "/opt/conda/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:877: RuntimeWarning: invalid value encountered in greater\n",
      "  return (self.a < x) & (x < self.b)\n",
      "/opt/conda/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:877: RuntimeWarning: invalid value encountered in less\n",
      "  return (self.a < x) & (x < self.b)\n",
      "/opt/conda/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:1831: RuntimeWarning: invalid value encountered in less_equal\n",
      "  cond2 = cond0 & (x <= self.a)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 412s 75ms/sample - loss: 0.3947\n",
      "Epoch 2/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3638\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 379s 69ms/sample - loss: 0.3638\n",
      "Epoch 3/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3426\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 379s 69ms/sample - loss: 0.3426\n",
      "Epoch 4/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3175\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.3175\n",
      "Epoch 5/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.2930\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 379s 69ms/sample - loss: 0.2930\n",
      "2回目のループ\n",
      "[   1    2    3 ... 6076 6077 6078]\n",
      "train_inputsの形状\n",
      "3\n",
      "5471\n",
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "input_word_ids (InputLayer)     [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "input_masks (InputLayer)        [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "input_segments (InputLayer)     [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "keras_layer (KerasLayer)        [(None, 768), (None, 109482241   input_word_ids[0][0]             \n",
      "                                                                 input_masks[0][0]                \n",
      "                                                                 input_segments[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "lambda (Lambda)                 (None, 768)          0           keras_layer[0][1]                \n",
      "__________________________________________________________________________________________________\n",
      "dropout (Dropout)               (None, 768)          0           lambda[0][0]                     \n",
      "__________________________________________________________________________________________________\n",
      "dense_output (Dense)            (None, 30)           23070       dropout[0][0]                    \n",
      "==================================================================================================\n",
      "Total params: 109,505,311\n",
      "Trainable params: 109,505,310\n",
      "Non-trainable params: 1\n",
      "__________________________________________________________________________________________________\n",
      "Train on 5471 samples\n",
      "Epoch 1/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3919\n",
      "validation rho: 0.3691\n",
      "5471/5471 [==============================] - 413s 75ms/sample - loss: 0.3920\n",
      "Epoch 2/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3622\n",
      "validation rho: 0.3899\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.3622\n",
      "Epoch 3/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3414\n",
      "validation rho: 0.3952\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.3413\n",
      "Epoch 4/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3160\n",
      "validation rho: 0.3957\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.3160\n",
      "Epoch 5/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.2928\n",
      "validation rho: 0.3970\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.2928\n",
      "3回目のループ\n",
      "[   0    1    2 ... 6076 6077 6078]\n",
      "train_inputsの形状\n",
      "3\n",
      "5471\n",
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "input_word_ids (InputLayer)     [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "input_masks (InputLayer)        [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "input_segments (InputLayer)     [(None, 512)]        0                                            \n",
      "__________________________________________________________________________________________________\n",
      "keras_layer (KerasLayer)        [(None, 768), (None, 109482241   input_word_ids[0][0]             \n",
      "                                                                 input_masks[0][0]                \n",
      "                                                                 input_segments[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "lambda (Lambda)                 (None, 768)          0           keras_layer[0][1]                \n",
      "__________________________________________________________________________________________________\n",
      "dropout (Dropout)               (None, 768)          0           lambda[0][0]                     \n",
      "__________________________________________________________________________________________________\n",
      "dense_output (Dense)            (None, 30)           23070       dropout[0][0]                    \n",
      "==================================================================================================\n",
      "Total params: 109,505,311\n",
      "Trainable params: 109,505,310\n",
      "Non-trainable params: 1\n",
      "__________________________________________________________________________________________________\n",
      "Train on 5471 samples\n",
      "Epoch 1/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3921"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/numpy/lib/function_base.py:2530: RuntimeWarning: invalid value encountered in true_divide\n",
      "  c /= stddev[:, None]\n",
      "/opt/conda/lib/python3.6/site-packages/numpy/lib/function_base.py:2531: RuntimeWarning: invalid value encountered in true_divide\n",
      "  c /= stddev[None, :]\n",
      "/opt/conda/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:877: RuntimeWarning: invalid value encountered in greater\n",
      "  return (self.a < x) & (x < self.b)\n",
      "/opt/conda/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:877: RuntimeWarning: invalid value encountered in less\n",
      "  return (self.a < x) & (x < self.b)\n",
      "/opt/conda/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:1831: RuntimeWarning: invalid value encountered in less_equal\n",
      "  cond2 = cond0 & (x <= self.a)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 412s 75ms/sample - loss: 0.3921\n",
      "Epoch 2/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3618\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.3619\n",
      "Epoch 3/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3412\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.3412\n",
      "Epoch 4/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.3169\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.3169\n",
      "Epoch 5/5\n",
      "5464/5471 [============================>.] - ETA: 0s - loss: 0.2938\n",
      "validation rho: nan\n",
      "5471/5471 [==============================] - 380s 69ms/sample - loss: 0.2939\n",
      "4回目のループ\n",
      "[   0    1    2 ... 6076 6077 6078]\n",
      "4回目のループ\n",
      "[   0    1    2 ... 6076 6077 6078]\n",
      "4回目のループ\n",
      "[   0    1    2 ... 6076 6077 6078]\n",
      "4回目のループ\n",
      "[   0    1    2 ... 6076 6077 6078]\n",
      "4回目のループ\n",
      "[   0    1    2 ... 6074 6076 6078]\n",
      "4回目のループ\n",
      "[   0    1    2 ... 6076 6077 6078]\n",
      "4回目のループ\n",
      "[   0    1    2 ... 6074 6075 6077]\n"
     ]
    }
   ],
   "source": [
    "histories = []\n",
    "count = 1\n",
    "for fold, (train_idx, valid_idx) in enumerate(gkf):\n",
    "    print(str(count) + \"回目のループ\")\n",
    "    print(train_idx)\n",
    "  \n",
    "    # will actually only do 3 folds (out of 5) to manage < 2h\n",
    "    if fold < 3:\n",
    "        K.clear_session()\n",
    "        model = bert_model()\n",
    "\n",
    "\n",
    "        \n",
    "        train_inputs = [inputs[i][train_idx] for i in range(3)]\n",
    "        train_outputs = outputs[train_idx]\n",
    "\n",
    "        valid_inputs = [inputs[i][valid_idx] for i in range(3)]\n",
    "        valid_outputs = outputs[valid_idx]\n",
    "\n",
    "        \n",
    "         \n",
    "        print(\"train_inputsの形状\")\n",
    "        print(len(train_inputs)) #一周目の答えは\n",
    "        print(len(train_inputs[0])) #\n",
    "        \n",
    "        \n",
    "        model.summary()\n",
    "        name = \"model_\" + str(count) + \".png\"\n",
    "        tf.keras.utils.plot_model(model, to_file=name)\n",
    "        count +=1\n",
    "        \n",
    "        \n",
    "        # history contains two lists of valid and test preds respectively:\n",
    "        #  [valid_predictions_{fold}, test_predictions_{fold}]\n",
    "        # Largeでbatch_sizeを8だとメモリ不足になるため6に調整 epochsを5から３にした→元の戻した\n",
    "        history = train_and_predict(model, \n",
    "                          train_data=(train_inputs, train_outputs), \n",
    "                          valid_data=(valid_inputs, valid_outputs),\n",
    "                          test_data=test_inputs, \n",
    "                            #learning_rateから「1e-5」→「 3e-5」\n",
    "                          learning_rate=3e-5, epochs=5, batch_size=8,\n",
    "                          loss_function='binary_crossentropy', fold=fold)\n",
    "        \n",
    "        \n",
    "\n",
    "        histories.append(history)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "\n",
    "\n",
    "\n",
    "def postprocessing(oof_df):\n",
    "   \n",
    "    scaler = MinMaxScaler()\n",
    "\n",
    "    type_one_column_list = [\n",
    "       'question_conversational', \\\n",
    "       'question_has_commonly_accepted_answer', \\\n",
    "       'question_not_really_a_question', \\\n",
    "       'question_type_choice', \\\n",
    "       'question_type_compare', \\\n",
    "       'question_type_consequence', \\\n",
    "       'question_type_definition', \\\n",
    "       'question_type_entity', \\\n",
    "       'question_type_instructions', \n",
    "    ]\n",
    "    \n",
    "    oof_df[type_one_column_list] = scaler.fit_transform(oof_df[type_one_column_list])\n",
    "    \n",
    "    tmp = oof_df.copy(deep=True)\n",
    "    \n",
    "    for column in type_one_column_list:\n",
    "        \n",
    "        oof_df.loc[tmp[column] <= 0.16667, column] = 0\n",
    "        oof_df.loc[(tmp[column] > 0.16667) & (tmp[column] <= 0.41667), column] = 0.333333\n",
    "        oof_df.loc[(tmp[column] > 0.41667) & (tmp[column] <= 0.58333), column] = 0.500000\n",
    "        oof_df.loc[(tmp[column] > 0.58333) & (tmp[column] <= 0.73333), column] = 0.666667\n",
    "        oof_df.loc[(tmp[column] > 0.73333), column] = 1\n",
    "    \n",
    "    \n",
    "   \n",
    "    \n",
    "    \n",
    "    ################################################# handle type 3 columns      \n",
    "    type_three_column_list = [\n",
    "       'question_interestingness_self', \n",
    "    ]\n",
    "    scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "    oof_df[type_three_column_list] = scaler.fit_transform(oof_df[type_three_column_list])\n",
    "    tmp[type_three_column_list] = scaler.fit_transform(tmp[type_three_column_list])\n",
    "    \n",
    "    for column in type_three_column_list:\n",
    "        oof_df.loc[tmp[column] <= 0.385, column] = 0.333333\n",
    "        oof_df.loc[(tmp[column] > 0.385) & (tmp[column] <= 0.47), column] = 0.444444\n",
    "        oof_df.loc[(tmp[column] > 0.47) & (tmp[column] <= 0.525), column] = 0.5\n",
    "        oof_df.loc[(tmp[column] > 0.525) & (tmp[column] <= 0.605), column] = 0.555556\n",
    "        oof_df.loc[(tmp[column] > 0.605) & (tmp[column] <= 0.715), column] = 0.666667\n",
    "        oof_df.loc[(tmp[column] > 0.715) & (tmp[column] <= 0.8), column] = 0.833333\n",
    "        oof_df.loc[(tmp[column] > 0.8) & (tmp[column] <= 0.94), column] = 0.888889\n",
    "        oof_df.loc[(tmp[column] > 0.94), column] = 1\n",
    "        \n",
    "        \n",
    "        \n",
    "    type_four_column_list = [\n",
    "        'answer_satisfaction'\n",
    "    ]\n",
    "    scaler = MinMaxScaler(feature_range=(0.2, 1))\n",
    "    oof_df[type_four_column_list] = scaler.fit_transform(oof_df[type_four_column_list])\n",
    "    tmp[type_four_column_list] = scaler.fit_transform(tmp[type_four_column_list])\n",
    "    \n",
    "    for column in type_four_column_list:\n",
    "        \n",
    "        oof_df.loc[tmp[column] <= 0.233, column] = 0.200000\n",
    "        oof_df.loc[(tmp[column] > 0.233) & (tmp[column] <= 0.283), column] = 0.266667\n",
    "        oof_df.loc[(tmp[column] > 0.283) & (tmp[column] <= 0.315), column] = 0.300000\n",
    "        oof_df.loc[(tmp[column] > 0.315) & (tmp[column] <= 0.365), column] = 0.333333\n",
    "        oof_df.loc[(tmp[column] > 0.365) & (tmp[column] <= 0.433), column] = 0.400000\n",
    "        oof_df.loc[(tmp[column] > 0.433) & (tmp[column] <= 0.483), column] = 0.466667\n",
    "        oof_df.loc[(tmp[column] > 0.483) & (tmp[column] <= 0.517), column] = 0.500000\n",
    "        oof_df.loc[(tmp[column] > 0.517) & (tmp[column] <= 0.567), column] = 0.533333\n",
    "        oof_df.loc[(tmp[column] > 0.567) & (tmp[column] <= 0.633), column] = 0.600000\n",
    "        oof_df.loc[(tmp[column] > 0.633) & (tmp[column] <= 0.683), column] = 0.666667\n",
    "        oof_df.loc[(tmp[column] > 0.683) & (tmp[column] <= 0.715), column] = 0.700000\n",
    "        oof_df.loc[(tmp[column] > 0.715) & (tmp[column] <= 0.767), column] = 0.733333\n",
    "        oof_df.loc[(tmp[column] > 0.767) & (tmp[column] <= 0.833), column] = 0.800000\n",
    "        oof_df.loc[(tmp[column] > 0.883) & (tmp[column] <= 0.915), column] = 0.900000\n",
    "        oof_df.loc[(tmp[column] > 0.915) & (tmp[column] <= 0.967), column] = 0.933333\n",
    "        oof_df.loc[(tmp[column] > 0.967), column] = 1\n",
    "    \n",
    "    \n",
    "    oof_values = oof_df[TARGET_COLUMNS].values\n",
    "    DEGREE = len(oof_df)//45*9\n",
    "    oof_values = np.around(oof_values * DEGREE) / DEGREE  \n",
    "    oof_df[TARGET_COLUMNS] = oof_values\n",
    "    \n",
    "    return oof_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_predictions = [histories[i].test_predictions for i in range(len(histories))]\n",
    "test_predictions = [np.average(test_predictions[i], axis=0) for i in range(len(test_predictions))]\n",
    "test_predictions = np.mean(test_predictions, axis=0)\n",
    "\n",
    "#post process \n",
    "\n",
    "df_sub.iloc[:, 1:] = test_predictions\n",
    "\n",
    "#提出用に加工\n",
    "df_sub = postprocessing(df_sub) \n",
    "\n",
    "df_sub.to_csv('submission.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#df_sub = postprocessing(df_sub) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {
     "310411ea7eb0474284a449db736aa803": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "DescriptionStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "DescriptionStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "StyleView",
       "description_width": ""
      }
     },
     "425a789d044140ac9e5b492882e2a5f7": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "1.2.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "1.2.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "overflow_x": null,
       "overflow_y": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "51573fdd88b646119b4224ea99aea29b": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "1.2.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "1.2.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "overflow_x": null,
       "overflow_y": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "59527e1231e1400795cea7f4c477ab21": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "1.5.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_f5e02903db024c87b7793931efcd2770",
        "IPY_MODEL_f573014b23344654a6c3447294e5bead"
       ],
       "layout": "IPY_MODEL_e7b563eb2e584dca8bd046b327040b68"
      }
     },
     "6ce4a4332bea45ccbf6b6c3df20cb4ac": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": "initial"
      }
     },
     "90cb37ef2c834e2aabc9ae268bd77cc2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "1.5.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_425a789d044140ac9e5b492882e2a5f7",
       "placeholder": "",
       "style": "IPY_MODEL_cc7781cb82db4bb294428f0483ebb176",
       "value": " 476/? [00:08&lt;00:00, 57.12it/s]"
      }
     },
     "9d6ad2a884fd43fb98121b15077d15ef": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "1.2.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "1.2.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "overflow_x": null,
       "overflow_y": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "9e5ed41366914a33a6b1fcbd992e2df0": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "1.5.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_d4286c52a36e4821974598b32b5d87ab",
        "IPY_MODEL_90cb37ef2c834e2aabc9ae268bd77cc2"
       ],
       "layout": "IPY_MODEL_9d6ad2a884fd43fb98121b15077d15ef"
      }
     },
     "afa0da908a964e8eabbf673a669275b9": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": "initial"
      }
     },
     "bba2fc23f48b42208ac796e85148a3e3": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "1.2.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "1.2.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "overflow_x": null,
       "overflow_y": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "bf184a4683f14d21aa21f905350a4bd2": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "1.2.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "1.2.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "overflow_x": null,
       "overflow_y": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "cc7781cb82db4bb294428f0483ebb176": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "DescriptionStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "DescriptionStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "StyleView",
       "description_width": ""
      }
     },
     "d4286c52a36e4821974598b32b5d87ab": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "IntProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "IntProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "1.5.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_bf184a4683f14d21aa21f905350a4bd2",
       "max": 1,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_6ce4a4332bea45ccbf6b6c3df20cb4ac",
       "value": 1
      }
     },
     "e7b563eb2e584dca8bd046b327040b68": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "1.2.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "1.2.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "1.2.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "overflow_x": null,
       "overflow_y": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "f573014b23344654a6c3447294e5bead": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "1.5.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_bba2fc23f48b42208ac796e85148a3e3",
       "placeholder": "",
       "style": "IPY_MODEL_310411ea7eb0474284a449db736aa803",
       "value": " 6079/? [01:15&lt;00:00, 81.02it/s]"
      }
     },
     "f5e02903db024c87b7793931efcd2770": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "1.5.0",
      "model_name": "IntProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "1.5.0",
       "_model_name": "IntProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "1.5.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_51573fdd88b646119b4224ea99aea29b",
       "max": 1,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_afa0da908a964e8eabbf673a669275b9",
       "value": 1
      }
     }
    },
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}