Skip to content

Instantly share code, notes, and snippets.

@AmalVijayan
Created December 3, 2019 10:56
Show Gist options
  • Save AmalVijayan/d7d50e5ea4af39760dc9b55daa840dec to your computer and use it in GitHub Desktop.
Save AmalVijayan/d7d50e5ea4af39760dc9b55daa840dec to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "bert-for-tf2.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "hml6o8_YNYWq",
"colab_type": "code",
"outputId": "14e8b058-19c0-4d0c-9c0c-907c7bc1f4b0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"from google.colab import drive\n",
"drive.mount(\"/GD\")"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Drive already mounted at /GD; to attempt to forcibly remount, call drive.mount(\"/GD\", force_remount=True).\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "SdAHqf6RNNa7",
"colab_type": "code",
"outputId": "41b6d9ea-766d-4aa9-9f8e-054a773f4606",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
}
},
"source": [
"try:\n",
" %tensorflow_version 2.x #gpu\n",
"except Exception:\n",
" pass\n",
"\n",
"import tensorflow as tf"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"`%tensorflow_version` only switches the major version: `1.x` or `2.x`.\n",
"You set: `2.x #gpu`. This will be interpreted as: `2.x`.\n",
"\n",
"\n",
"TensorFlow 2.x selected.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "VIsetAbCam6y",
"colab_type": "code",
"colab": {}
},
"source": [
"#from tensorflow import keras\n",
"import os\n",
"import re\n",
"import pandas as pd\n",
"train = pd.read_excel(\"/GD/My Drive/Colab Notebooks/News_category/Datasets/Data_Train.xlsx\")\n",
"pred_set = pd.read_excel(\"/GD/My Drive/Colab Notebooks/News_category/Datasets/Data_Test.xlsx\")\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"train, test = train_test_split(train, test_size = 0.2, random_state = 120)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "F10cSOtzYEbK",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "6989d57b-a31c-4315-809c-829761de815c"
},
"source": [
"train.head()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>STORY</th>\n",
" <th>SECTION</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1481</th>\n",
" <td>At 10:01 a.m. ET, the Dow Jones Industrial Ave...</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>630</th>\n",
" <td>With the introduction of algorithmic ranking w...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6182</th>\n",
" <td>Prior to this, two sitting MPs and a sitting M...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>743</th>\n",
" <td>Senior advocate Abhishek Manu Singhvi, appeari...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3947</th>\n",
" <td>The proportion of candidates of the Bharatiya ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" STORY SECTION\n",
"1481 At 10:01 a.m. ET, the Dow Jones Industrial Ave... 3\n",
"630 With the introduction of algorithmic ranking w... 0\n",
"6182 Prior to this, two sitting MPs and a sitting M... 0\n",
"743 Senior advocate Abhishek Manu Singhvi, appeari... 0\n",
"3947 The proportion of candidates of the Bharatiya ... 0"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "GFQX8ONWX_db",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "ca3266f4-d8b4-40cb-d172-82cd529446da"
},
"source": [
"test.head()"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>STORY</th>\n",
" <th>SECTION</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5232</th>\n",
" <td>Netflix has released the first look of upcomin...</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6534</th>\n",
" <td>Upper castes, mostly Rajputs, dominate the sta...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1926</th>\n",
" <td>In 2017-18, the company reported a revenue of ...</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3956</th>\n",
" <td>Sonia Gandhi's confidence comes from the peopl...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3959</th>\n",
" <td>“The Ganga is an emotive issue for people here...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" STORY SECTION\n",
"5232 Netflix has released the first look of upcomin... 2\n",
"6534 Upper castes, mostly Rajputs, dominate the sta... 0\n",
"1926 In 2017-18, the company reported a revenue of ... 3\n",
"3956 Sonia Gandhi's confidence comes from the peopl... 0\n",
"3959 “The Ganga is an emotive issue for people here... 0"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "7Z0y5_sjwMjT",
"colab_type": "code",
"outputId": "458c2a04-adb9-4d29-dd15-2220e0181eb5",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 374
}
},
"source": [
"!pip install bert-for-tf2"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting bert-for-tf2\n",
" Downloading https://files.pythonhosted.org/packages/c2/d8/14e0cfa03bbeb72c314f0648267c490bcceec5e8fb25081ec31307b5509c/bert-for-tf2-0.12.6.tar.gz\n",
"Collecting py-params>=0.7.3\n",
" Downloading https://files.pythonhosted.org/packages/4d/d1/55d228c9a8fa565c518f410efdcb23baaf09a95e2ad637c012f64d5d1133/py-params-0.7.4.tar.gz\n",
"Collecting params-flow>=0.7.1\n",
" Downloading https://files.pythonhosted.org/packages/2b/44/eb1414c6c201bf2bdaa3f037d2f7f35d13f3242003278cef47bf8b3aa681/params-flow-0.7.2.tar.gz\n",
"Requirement already satisfied: numpy in /tensorflow-2.0.0/python3.6 (from params-flow>=0.7.1->bert-for-tf2) (1.17.4)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from params-flow>=0.7.1->bert-for-tf2) (4.28.1)\n",
"Building wheels for collected packages: bert-for-tf2, py-params, params-flow\n",
" Building wheel for bert-for-tf2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for bert-for-tf2: filename=bert_for_tf2-0.12.6-cp36-none-any.whl size=29115 sha256=c29d820ccf02c2ab9a663f2f0926418cea9e209321c6ca5d60a25d3ec5942d85\n",
" Stored in directory: /root/.cache/pip/wheels/24/19/54/51eeca468b219a1bc910c54aff87f0648b28a1fb71c115ba0f\n",
" Building wheel for py-params (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for py-params: filename=py_params-0.7.4-cp36-none-any.whl size=4352 sha256=a5d2d3fbff846032e1c2dcfbca3bbf35c619e73cd84ed13d00df5aa1d38859c2\n",
" Stored in directory: /root/.cache/pip/wheels/89/12/f9/07461c9970813d0452e4459e9d8f8bc0a1b951e140abf74301\n",
" Building wheel for params-flow (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for params-flow: filename=params_flow-0.7.2-cp36-none-any.whl size=16141 sha256=e7897bc116c8ddedff009150fff94733652ec80d99c691d478d9cd9f5b1376ec\n",
" Stored in directory: /root/.cache/pip/wheels/e5/11/f0/cf35bb79050bd7ad8e058c98afeb3ac23c149060776c4283cf\n",
"Successfully built bert-for-tf2 py-params params-flow\n",
"Installing collected packages: py-params, params-flow, bert-for-tf2\n",
"Successfully installed bert-for-tf2-0.12.6 params-flow-0.7.2 py-params-0.7.4\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "8gNpx6Mcwdo1",
"colab_type": "code",
"outputId": "d6c10cfa-8bd1-4d03-9ca7-ed5b302fa278",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
}
},
"source": [
"!pip install sentencepiece"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting sentencepiece\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/14/3d/efb655a670b98f62ec32d66954e1109f403db4d937c50d779a75b9763a29/sentencepiece-0.1.83-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n",
"\r\u001b[K |▎ | 10kB 18.8MB/s eta 0:00:01\r\u001b[K |▋ | 20kB 7.0MB/s eta 0:00:01\r\u001b[K |█ | 30kB 9.7MB/s eta 0:00:01\r\u001b[K |█▎ | 40kB 6.3MB/s eta 0:00:01\r\u001b[K |█▋ | 51kB 7.6MB/s eta 0:00:01\r\u001b[K |██ | 61kB 8.9MB/s eta 0:00:01\r\u001b[K |██▏ | 71kB 10.1MB/s eta 0:00:01\r\u001b[K |██▌ | 81kB 11.2MB/s eta 0:00:01\r\u001b[K |██▉ | 92kB 12.3MB/s eta 0:00:01\r\u001b[K |███▏ | 102kB 10.1MB/s eta 0:00:01\r\u001b[K |███▌ | 112kB 10.1MB/s eta 0:00:01\r\u001b[K |███▉ | 122kB 10.1MB/s eta 0:00:01\r\u001b[K |████ | 133kB 10.1MB/s eta 0:00:01\r\u001b[K |████▍ | 143kB 10.1MB/s eta 0:00:01\r\u001b[K |████▊ | 153kB 10.1MB/s eta 0:00:01\r\u001b[K |█████ | 163kB 10.1MB/s eta 0:00:01\r\u001b[K |█████▍ | 174kB 10.1MB/s eta 0:00:01\r\u001b[K |█████▊ | 184kB 10.1MB/s eta 0:00:01\r\u001b[K |██████ | 194kB 10.1MB/s eta 0:00:01\r\u001b[K |██████▎ | 204kB 10.1MB/s eta 0:00:01\r\u001b[K |██████▋ | 215kB 10.1MB/s eta 0:00:01\r\u001b[K |███████ | 225kB 10.1MB/s eta 0:00:01\r\u001b[K |███████▎ | 235kB 10.1MB/s eta 0:00:01\r\u001b[K |███████▋ | 245kB 10.1MB/s eta 0:00:01\r\u001b[K |███████▉ | 256kB 10.1MB/s eta 0:00:01\r\u001b[K |████████▏ | 266kB 10.1MB/s eta 0:00:01\r\u001b[K |████████▌ | 276kB 10.1MB/s eta 0:00:01\r\u001b[K |████████▉ | 286kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████▏ | 296kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████▌ | 307kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████▊ | 317kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████ | 327kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████▍ | 337kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████▊ | 348kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████ | 358kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████▍ | 368kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████▋ | 378kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████ | 389kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████▎ | 399kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████▋ | 409kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████ | 419kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████▎ | 430kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████▌ | 440kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 450kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████▏ | 460kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████▌ | 471kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████▉ | 481kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████▏ | 491kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████▍ | 501kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████▊ | 512kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████ | 522kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████▍ | 532kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████▊ | 542kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████ | 552kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████▎ | 563kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████▋ | 573kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████ | 583kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████▎ | 593kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████▋ | 604kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████ | 614kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████▏ | 624kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████▌ | 634kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████▉ | 645kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████▏ | 655kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████▌ | 665kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 675kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████▏ | 686kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████▍ | 696kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████▊ | 706kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████ | 716kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████▍ | 727kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████▊ | 737kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 747kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████▎ | 757kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████▋ | 768kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████████ | 778kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████████▎ | 788kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████████▋ | 798kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████████ | 808kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████████▏ | 819kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████████▌ | 829kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 839kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████████▏ | 849kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████████▌ | 860kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████████▉ | 870kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████████ | 880kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████████▍ | 890kB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████████▊ | 901kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████████████ | 911kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████████████▍ | 921kB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████████████▊ | 931kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████████████ | 942kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▎ | 952kB 10.1MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▋ | 962kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████████████ | 972kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 983kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▋ | 993kB 10.1MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▉ | 1.0MB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▏| 1.0MB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▌| 1.0MB 10.1MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▉| 1.0MB 10.1MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 1.0MB 10.1MB/s \n",
"\u001b[?25hInstalling collected packages: sentencepiece\n",
"Successfully installed sentencepiece-0.1.83\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lvHyP3N0vEyw",
"colab_type": "code",
"colab": {}
},
"source": [
"import tensorflow_hub as hub\n",
"from tensorflow.keras.models import Model"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "7HjVGwopwtkz",
"colab_type": "code",
"colab": {}
},
"source": [
"from bert.tokenization.bert_tokenization import FullTokenizer"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "uIpL0ymoXfPG",
"colab_type": "code",
"colab": {}
},
"source": [
"def get_masks(tokens, max_seq_length):\n",
" \"\"\"Mask for padding\"\"\"\n",
" if len(tokens)>max_seq_length:\n",
" #raise IndexError(\"Token length more than max seq l ength!\")\n",
" tokens = tokens[0:max_seq_length]\n",
" return [1]*len(tokens)\n",
" else :\n",
" return [1]*len(tokens) + [0] * (max_seq_length - len(tokens))\n",
"\n",
"def get_segments(tokens, max_seq_length):\n",
" \"\"\"Segments: 0 for the first sequence, 1 for the second\"\"\"\n",
" if len(tokens)>max_seq_length:\n",
" #raise IndexError(\"Token length more than max seq length!\")\n",
" tokens = tokens[:max_seq_length]\n",
" segments = []\n",
" current_segment_id = 0\n",
" for token in tokens:\n",
" segments.append(current_segment_id)\n",
" if token == \"[SEP]\":\n",
" current_segment_id = 1\n",
" return segments\n",
" else:\n",
" segments = []\n",
" current_segment_id = 0\n",
" for token in tokens:\n",
" segments.append(current_segment_id)\n",
" if token == \"[SEP]\":\n",
" current_segment_id = 1\n",
" return segments + [0] * (max_seq_length - len(tokens))\n",
"\n",
"\n",
"def get_ids(tokens, tokenizer, max_seq_length):\n",
" \"\"\"Token ids from Tokenizer vocab\"\"\"\n",
" \n",
" if len(tokens)>max_seq_length:\n",
" tokens = tokens[:max_seq_length]\n",
" token_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
" return token_ids\n",
" else:\n",
" token_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
" input_ids = token_ids + [0] * (max_seq_length-len(token_ids))\n",
" return input_ids"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "X-xxH8EHbY2v",
"colab_type": "code",
"colab": {}
},
"source": [
"def prep(s, get = 'id'):\n",
" stokens = tokenizer.tokenize(s)\n",
" stokens = [\"[CLS]\"] + stokens + [\"[SEP]\"]\n",
" if get == 'id':\n",
" input_ids = get_ids(stokens, tokenizer, max_seq_length)\n",
" return input_ids\n",
" elif get == 'mask':\n",
" input_masks = get_masks(stokens, max_seq_length)\n",
" return input_masks\n",
" else:\n",
" input_segments = get_segments(stokens, max_seq_length)\n",
" return input_segments"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "mMWxG4q4YUug",
"colab_type": "code",
"colab": {}
},
"source": [
"bert_layer = hub.KerasLayer(\"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1\",trainable=True)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "csIOK88KvvJI",
"colab_type": "code",
"colab": {}
},
"source": [
"vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()\n",
"do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()\n",
"tokenizer = FullTokenizer(vocab_file, do_lower_case)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zAsxhVgCfS4o",
"colab_type": "code",
"outputId": "9a0b1c51-f731-4343-94c5-138e3c6381dc",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
}
},
"source": [
"#TESTING\n",
"max_seq_length = 128 # Your choice here.\n",
"\n",
"s1 = train['STORY'].iloc[0]\n",
"stokens1 = tokenizer.tokenize(s1)\n",
"stokens1 = [\"[CLS]\"] + stokens1 + [\"[SEP]\"]\n",
"\n",
"input_ids1 = get_ids(stokens1, tokenizer, max_seq_length)\n",
"input_masks1 = get_masks(stokens1, max_seq_length)\n",
"input_segments1 = get_segments(stokens1, max_seq_length)\n",
"\n",
"print(\"IDS # len:\" , len(input_ids1), \" ::: \",input_ids1)\n",
"print(\"MASKS # len:\" , len(input_masks1), \" ::: \",input_masks1)\n",
"print(\"SEGEMNTS # len:\" , len(input_segments1), \" ::: \",input_segments1)"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"text": [
"IDS # len: 128 ::: [101, 2012, 2184, 1024, 5890, 1037, 1012, 1049, 1012, 3802, 1010, 1996, 23268, 3557, 3919, 2779, 2001, 2091, 4229, 1012, 2538, 2685, 1010, 2030, 1014, 1012, 2403, 1003, 1010, 2012, 2656, 1010, 4261, 2549, 1012, 5641, 1012, 1996, 1055, 1004, 1052, 3156, 2001, 2091, 1017, 1012, 5757, 2685, 1010, 2030, 1014, 1012, 2340, 1003, 1010, 2012, 1016, 1010, 3938, 2549, 1012, 3486, 1998, 1996, 17235, 2850, 4160, 12490, 2001, 2091, 1023, 1012, 4466, 2685, 1010, 2030, 1014, 1012, 2260, 1003, 1010, 2012, 1021, 1010, 5989, 2549, 1012, 6273, 1012, 1997, 1996, 2756, 1055, 1004, 1052, 3156, 3316, 7316, 3463, 2061, 2521, 1010, 6535, 1012, 1017, 1003, 2031, 15602, 2034, 1011, 4284, 16565, 10197, 1010, 2682, 1996, 2779, 1997, 2627, 2176, 7728, 1010, 2429, 2000, 25416, 5498, 29068, 2951]\n",
"MASKS # len: 128 ::: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n",
"SEGEMNTS # len: 128 ::: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "ZOoTerQiOLQG",
"colab": {}
},
"source": [
"input_word_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,\n",
" name=\"input_word_ids\")\n",
"input_mask = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,\n",
" name=\"input_mask\")\n",
"segment_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,\n",
" name=\"segment_ids\")\n",
"bert_inputs = [input_word_ids, input_mask, segment_ids]\n",
"\n",
"pooled_output, _ = bert_layer(bert_inputs) \n",
"\n",
"dense = tf.keras.layers.Dense(256, activation='relu')(pooled_output)\n",
"\n",
"pred = tf.keras.layers.Dense(1, activation='sigmoid')(dense)\n",
"\n",
"model = Model(inputs=bert_inputs, outputs=pred)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5Y6pEy3_2J97",
"colab_type": "code",
"outputId": "2b3a038d-23ff-496e-dcca-13e7a7b5fff5",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 391
}
},
"source": [
"model.summary()"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"text": [
"Model: \"model\"\n",
"__________________________________________________________________________________________________\n",
"Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
"input_word_ids (InputLayer) [(None, 128)] 0 \n",
"__________________________________________________________________________________________________\n",
"input_mask (InputLayer) [(None, 128)] 0 \n",
"__________________________________________________________________________________________________\n",
"segment_ids (InputLayer) [(None, 128)] 0 \n",
"__________________________________________________________________________________________________\n",
"keras_layer (KerasLayer) [(None, 768), (None, 109482241 input_word_ids[0][0] \n",
" input_mask[0][0] \n",
" segment_ids[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense (Dense) (None, 256) 196864 keras_layer[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_1 (Dense) (None, 1) 257 dense[0][0] \n",
"==================================================================================================\n",
"Total params: 109,679,362\n",
"Trainable params: 109,679,361\n",
"Non-trainable params: 1\n",
"__________________________________________________________________________________________________\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "b1TH-1VNnneq",
"colab_type": "code",
"colab": {}
},
"source": [
"model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "_9cujFyv3aqU",
"colab_type": "code",
"colab": {}
},
"source": [
"INPUT_IDS = list(train['STORY'].apply(lambda x: prep(x, get = 'id')))"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Elk1yYaN4pHh",
"colab_type": "code",
"colab": {}
},
"source": [
"INPUT_MASKS = list(train['STORY'].apply(lambda x: prep(x, get = 'mask')))"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "-qgbq7mH49i3",
"colab_type": "code",
"colab": {}
},
"source": [
"INPUT_SEGS = list(train['STORY'].apply(lambda x: prep(x, get = 'seg')))"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Q2KSXgVA4d0A",
"colab_type": "code",
"outputId": "a1ff2513-ec1b-4849-dc4d-348328ba4630",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 358
}
},
"source": [
"model.fit([INPUT_IDS,INPUT_MASKS,INPUT_SEGS], list(train.SECTION))"
],
"execution_count": 31,
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-31-d9f382cba5d4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mINPUT_IDS\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mINPUT_MASKS\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mINPUT_SEGS\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSECTION\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 728\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 729\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 730\u001b[0m def evaluate(self,\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0mvalidation_steps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidation_steps\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m distribution_strategy=strategy)\n\u001b[0m\u001b[1;32m 225\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0mtotal_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_total_number_of_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtraining_data_adapter\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36m_process_training_inputs\u001b[0;34m(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 545\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 548\u001b[0m \u001b[0mval_adapter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36m_process_inputs\u001b[0;34m(model, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 605\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 606\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 607\u001b[0m \u001b[0;31m# As a fallback for the data type that does not work with\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 608\u001b[0m \u001b[0;31m# _standardize_user_data, use the _prepare_model_with_inputs.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/data_adapter.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, x, y, sample_weights, batch_size, shuffle, **kwargs)\u001b[0m\n\u001b[1;32m 477\u001b[0m self._internal_adapter = TensorLikeDataAdapter(\n\u001b[1;32m 478\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weights\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 479\u001b[0;31m batch_size=batch_size, shuffle=shuffle, **kwargs)\n\u001b[0m\u001b[1;32m 480\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 481\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/data_adapter.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, x, y, sample_weights, batch_size, epochs, steps, shuffle, **kwargs)\u001b[0m\n\u001b[1;32m 243\u001b[0m label, \", \".join([str(i.shape[0]) for i in nest.flatten(data)]))\n\u001b[1;32m 244\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"Please provide data which shares the same first dimension.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0mnum_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnum_samples\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Data cardinality is ambiguous:\n x sizes: 3\n y sizes: 6102\nPlease provide data which shares the same first dimension."
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "HDN_CHmlZY23",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 358
},
"outputId": "ab939bc3-6b25-43f4-b995-ae4cfc9d26e6"
},
"source": [
"model.fit([[INPUT_IDS],[INPUT_MASKS],[INPUT_SEGS]], list(train.SECTION))"
],
"execution_count": 32,
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-32-901afbd62100>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mINPUT_IDS\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mINPUT_MASKS\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mINPUT_SEGS\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSECTION\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 728\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 729\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 730\u001b[0m def evaluate(self,\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0mvalidation_steps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidation_steps\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m distribution_strategy=strategy)\n\u001b[0m\u001b[1;32m 225\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0mtotal_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_total_number_of_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtraining_data_adapter\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36m_process_training_inputs\u001b[0;34m(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 545\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 548\u001b[0m \u001b[0mval_adapter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36m_process_inputs\u001b[0;34m(model, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 605\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 606\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 607\u001b[0m \u001b[0;31m# As a fallback for the data type that does not work with\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 608\u001b[0m \u001b[0;31m# _standardize_user_data, use the _prepare_model_with_inputs.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/data_adapter.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, x, y, sample_weights, batch_size, shuffle, **kwargs)\u001b[0m\n\u001b[1;32m 477\u001b[0m self._internal_adapter = TensorLikeDataAdapter(\n\u001b[1;32m 478\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weights\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 479\u001b[0;31m batch_size=batch_size, shuffle=shuffle, **kwargs)\n\u001b[0m\u001b[1;32m 480\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 481\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/data_adapter.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, x, y, sample_weights, batch_size, epochs, steps, shuffle, **kwargs)\u001b[0m\n\u001b[1;32m 243\u001b[0m label, \", \".join([str(i.shape[0]) for i in nest.flatten(data)]))\n\u001b[1;32m 244\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"Please provide data which shares the same first dimension.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0mnum_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnum_samples\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Data cardinality is ambiguous:\n x sizes: 3\n y sizes: 6102\nPlease provide data which shares the same first dimension."
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment