Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save MikeOfZen/abadf58b9c68acd1b33c6e39af7b3f7a to your computer and use it in GitHub Desktop.
Save MikeOfZen/abadf58b9c68acd1b33c6e39af7b3f7a to your computer and use it in GitHub Desktop.
TPU training error .ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "TPU training error .ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true,
"machine_shape": "hm",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "TPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/MikeOfZen/abadf58b9c68acd1b33c6e39af7b3f7a/catsdogs-transfer-learning-inception-tpu-tensorboard.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "P8HTG5Ejqqrq",
"colab_type": "code",
"colab": {}
},
"source": [
"USING_TPU=True #Change this to switch between gpu and tpu (also must be changed for the notebook settings offcours)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gP7aBI2b-EQx",
"colab_type": "code",
"colab": {}
},
"source": [
"from google.colab import auth\n",
"auth.authenticate_user()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "eALMi4wB9fRO",
"colab_type": "code",
"colab": {}
},
"source": [
"%tensorflow_version 2.x\n",
"#must use tf 1.x to use TPU properly\n",
"import tensorflow as tf\n",
"import matplotlib.pyplot as plt\n",
"import tensorflow_datasets as tfds\n",
"import os\n",
"import IPython\n",
"print(\"TF version:\",tf.version.GIT_VERSION, tf.version.VERSION)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Z0fIuiLOL--R",
"colab_type": "code",
"colab": {}
},
"source": [
"#constants\n",
"IMG_HEIGHT=IMG_WIDTH=299\n",
"\n",
"DATASET_SIZE=23262\n",
"BATCH_SIZE=32\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "x2O14Njm-90k",
"colab_type": "text"
},
"source": [
"Dataset init"
]
},
{
"cell_type": "code",
"metadata": {
"id": "H561tB1H_P1R",
"colab_type": "code",
"colab": {}
},
"source": [
"@tf.function\n",
"def convert(image,label):\n",
" return (tf.image.convert_image_dtype(image, tf.float32),tf.expand_dims(tf.cast(label,tf.float32),0))\n",
"@tf.function\n",
"def resize(image,label):\n",
" return (tf.image.resize(image,(IMG_HEIGHT,IMG_WIDTH)),label)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "iKEc3TzT-WND",
"colab_type": "code",
"colab": {}
},
"source": [
"test_split, valid_split, train_split = tfds.Split.TRAIN.subsplit([10, 10, 80])\n",
"\n",
"#THE DATASET BUCKET MUST BE CHANGED FOR IT TO RUN!!!!!!!!!!!\n",
"train_ds = (tfds.load(\"cats_vs_dogs\", split=train_split,data_dir=\"gs://datasets_bucket_a/tmp/\", as_supervised=True) \n",
" .map(convert)\n",
" .map(resize)\n",
" .batch(BATCH_SIZE))\n",
"\n",
"validation_ds = (tfds.load(\"cats_vs_dogs\", split=valid_split,data_dir=\"gs://datasets_bucket_a/tmp/\", as_supervised=True) \n",
" .map(convert)\n",
" .map(resize)\n",
" .batch(BATCH_SIZE))#.cache()\n",
"\n",
"test_ds = (tfds.load(\"cats_vs_dogs\", split=test_split,data_dir=\"gs://datasets_bucket_a/tmp/\", as_supervised=True) \n",
" .map(convert)\n",
" .map(resize)\n",
" .batch(BATCH_SIZE))#.cache()\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nx-smnX_hgf5",
"colab_type": "code",
"colab": {}
},
"source": [
"training_batches=int(DATASET_SIZE*0.8/BATCH_SIZE)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ENcdVMAP-xpt",
"colab_type": "code",
"colab": {}
},
"source": [
"CLASSES=[\"Cat\",\"Dog\"]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Qh8yrs0s_Qn2",
"colab_type": "code",
"colab": {}
},
"source": [
"@tf.function\n",
"def augment(images,labels):\n",
" #mirror\n",
" images=tf.image.random_flip_left_right(images)\n",
" #adjust contrast\n",
" images=tf.image.random_contrast(images, lower=0.5, upper=1.5)\n",
" images=tf.image.random_brightness(images, max_delta=0.2)\n",
" images=tf.image.random_hue(images,0.1)\n",
" images=tf.image.random_saturation(images,0.8,1.2)\n",
" images=tf.clip_by_value(images,0,1) #clipping is required as some of these functions seems to go out of bounds [0..1]\n",
" return images,labels"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "aYeI4RPL_YYC",
"colab_type": "text"
},
"source": [
"Prepeare training set"
]
},
{
"cell_type": "code",
"metadata": {
"id": "bofEKw7W_dJS",
"colab_type": "code",
"colab": {}
},
"source": [
"train_ds_aug=(\n",
" train_ds\n",
" .take(training_batches) #an attempt to solve dataset cardinality problem, doesnt affect the issue\n",
" #.cache() #must be disabled for TF 2 to work, in TPU setting\n",
" .repeat()\n",
" #.shuffle(30)\n",
" .map(augment)\n",
" .prefetch(2))"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "m-P__gd-_AAY",
"colab_type": "text"
},
"source": [
"Service func"
]
},
{
"cell_type": "code",
"metadata": {
"id": "sTbcgKJC-8FE",
"colab_type": "code",
"colab": {}
},
"source": [
"def show_item(ds):\n",
" item=next(iter(ds.take(1)))\n",
" plt.imshow(item[0])\n",
" _=plt.title(f\"It's a {CLASSES[item[1].numpy()]}\")\n",
"def show_batch(ds):\n",
" image_batch, label_batch=next(iter(ds))\n",
" plt.figure(figsize=(10,10))\n",
" for n in range(25):\n",
" ax = plt.subplot(5,5,n+1)\n",
" plt.imshow(image_batch[n])\n",
" plt.axis('off')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "YgdaK1Sw_IiJ",
"colab_type": "text"
},
"source": [
"# Model definition"
]
},
{
"cell_type": "code",
"metadata": {
"id": "pZcHLuunwdlU",
"colab_type": "code",
"colab": {}
},
"source": [
"#snippet to select layers above chosen layer.\n",
"def get_layers_above(cutoff_layer,model):\n",
"\n",
" def get_next_level(layer,model):\n",
" def wrap_list(val):\n",
" if type(val) is list:\n",
" return val\n",
" return [val] \n",
" r=[]\n",
" for output_t in wrap_list(layer.output):\n",
" r+=[x for x in model.layers if output_t.name in [y.name for y in wrap_list(x.input)]]\n",
" return r\n",
"\n",
" visited=set()\n",
" to_visit=set([cutoff_layer])\n",
"\n",
" while to_visit:\n",
" layer=to_visit.pop()\n",
" to_visit.update(get_next_level(layer,model))\n",
" visited.add(layer)\n",
" return list(visited)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "h292y488KUgU",
"colab_type": "code",
"colab": {}
},
"source": [
"def create_model():\n",
" base_model = tf.keras.applications.InceptionV3(include_top=False,weights='imagenet',input_shape=(IMG_HEIGHT,IMG_WIDTH,3))\n",
" upper_layers=get_layers_above(base_model.get_layer('mixed8'),base_model)\n",
"\n",
" for layer in base_model.layers:\n",
" layer.trainable = False\n",
" for layer in upper_layers:\n",
" layer.trainable=True\n",
"\n",
" x = tf.keras.layers.Conv2D(448,4,2)(base_model.output)\n",
" x = tf.keras.layers.Flatten()(x)\n",
" x = tf.keras.layers.Dense(128, activation='relu')(x)\n",
" x = tf.keras.layers.Dropout(0.2)(x) \n",
" x = tf.keras.layers.Dense (1, activation='sigmoid')(x) \n",
"\n",
" model = tf.keras.Model(base_model.input, x)\n",
" return model"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "IyyQ_xEIQ7KD",
"colab_type": "code",
"colab": {}
},
"source": [
"#cpu_model=create_model()\n",
"#cpu_model.summary()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "2aYipz5OJgVB",
"colab_type": "code",
"colab": {}
},
"source": [
"#_=tf.keras.utils.plot_model(cpu_model, to_file=\"full_model.png\", show_shapes=True)\n",
"#IPython.display.Image(\"model.png\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "0MWlLSLpR0DS",
"colab_type": "text"
},
"source": [
"# TPU config"
]
},
{
"cell_type": "code",
"metadata": {
"id": "WN_0FujNM6Rb",
"colab_type": "code",
"colab": {}
},
"source": [
"if USING_TPU:\n",
" try:\n",
" os.environ['COLAB_TPU_ADDR']\n",
" print(\"TPU Found: \"+os.environ['COLAB_TPU_ADDR'])\n",
" except KeyError:\n",
" print(\"Must load TPU\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "cqQbM792R7P4",
"colab_type": "code",
"colab": {}
},
"source": [
"if USING_TPU:\n",
" resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])\n",
" tf.config.experimental_connect_to_cluster(resolver)\n",
" tf.tpu.experimental.initialize_tpu_system(resolver)\n",
" strategy = tf.distribute.experimental.TPUStrategy(resolver)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "jHk1qIhqKb0b",
"colab_type": "code",
"colab": {}
},
"source": [
"if USING_TPU:\n",
" with strategy.scope():\n",
" model=create_model()\n",
" model.compile(\n",
" optimizer=tf.keras.optimizers.Adam(),\n",
" loss=\"binary_crossentropy\",\n",
" metrics=['acc'])\n",
"else:\n",
" model=create_model()\n",
" model.compile(\n",
" optimizer=tf.keras.optimizers.Adam(),\n",
" loss=\"binary_crossentropy\",\n",
" metrics=['acc'])"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "58scjZ2eSFgx",
"colab_type": "text"
},
"source": [
"# TRAINING"
]
},
{
"cell_type": "code",
"metadata": {
"id": "KoTruG99SEhX",
"colab_type": "code",
"colab": {}
},
"source": [
"model.fit(train_ds_aug,epochs=4,steps_per_epoch=training_batches)#,validation_data=validation_ds,validation_steps=10,validation_freq=5)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "n3JoQXmETqSc",
"colab_type": "code",
"colab": {}
},
"source": [
"model.evaluate(test_ds)"
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment