devmvrborges/tensorflow-detectar-objetos.ipynb Secret

## tensorflow-detectar-objetos.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "name": "TensorFlow - Detectar objetos",
      "provenance": [],
      "private_outputs": true,
      "collapsed_sections": [],
      "toc_visible": true,
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/devmvrborges/f00910f62da88a149a2f5e2aa5e7ebfe/tensorflow-detectar-objetos.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "cellView": "both",
        "colab_type": "code",
        "id": "KUu4vOt5zI9d",
        "colab": {}
      },
      "source": [
        "#--------------\n",
        "#\n",
        "# ummaker.com\n",
        "# TensorFlow - https://www.tensorflow.org/?hl=pt-br\n",
        "# Colab - https://colab.research.google.com/\n",
        "#\n",
        "#--------------"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "v4XGxDrCkeip"
      },
      "source": [
        "## Setup\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "cellView": "both",
        "colab_type": "code",
        "id": "6cPY9Ou4sWs_",
        "colab": {}
      },
      "source": [
        "# Nosso cérebro, TensorFlow para utilizar o módulo TensorFlow HUB\n",
        "import tensorflow as tf\n",
        "import tensorflow_hub as hub\n",
        "\n",
        "# para realizar downloads de imagens\n",
        "import matplotlib.pyplot as plt\n",
        "import tempfile\n",
        "from six.moves.urllib.request import urlopen\n",
        "from six import BytesIO\n",
        "\n",
        "# para exibir imagens\n",
        "import numpy as np\n",
        "from PIL import Image\n",
        "from PIL import ImageColor\n",
        "from PIL import ImageDraw\n",
        "from PIL import ImageFont\n",
        "from PIL import ImageOps\n",
        "\n",
        "# caso haja interferencia de tempo\n",
        "import time\n",
        "\n",
        "# Versão TensorFlow\n",
        "print(\"Versão TensorFlow: %s\" % tf.__version__)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9QArrZZ0WciH",
        "colab_type": "text"
      },
      "source": [
        "## Funções necessárias"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "CuRnbgZ5VVFp",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def display_image(image):\n",
        "  fig = plt.figure(figsize=(20, 15))\n",
        "  plt.grid(False)\n",
        "  plt.imshow(image)\n",
        "\n",
        "\n",
        "def download_and_resize_image(url, new_width=256, new_height=256,\n",
        "                              display=False):\n",
        "  _, filename = tempfile.mkstemp(suffix=\".jpg\")\n",
        "  response = urlopen(url)\n",
        "  image_data = response.read()\n",
        "  image_data = BytesIO(image_data)\n",
        "  pil_image = Image.open(image_data)\n",
        "  pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.ANTIALIAS)\n",
        "  pil_image_rgb = pil_image.convert(\"RGB\")\n",
        "  pil_image_rgb.save(filename, format=\"JPEG\", quality=90)\n",
        "  print(\"Image downloaded to %s.\" % filename)\n",
        "  if display:\n",
        "    display_image(pil_image)\n",
        "  return filename\n",
        "\n",
        "\n",
        "def draw_bounding_box_on_image(image,\n",
        "                               ymin,\n",
        "                               xmin,\n",
        "                               ymax,\n",
        "                               xmax,\n",
        "                               color,\n",
        "                               font,\n",
        "                               thickness=4,\n",
        "                               display_str_list=()):\n",
        "  \"\"\"Adds a bounding box to an image.\"\"\"\n",
        "  draw = ImageDraw.Draw(image)\n",
        "  im_width, im_height = image.size\n",
        "  (left, right, top, bottom) = (xmin * im_width, xmax * im_width,\n",
        "                                ymin * im_height, ymax * im_height)\n",
        "  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),\n",
        "             (left, top)],\n",
        "            width=thickness,\n",
        "            fill=color)\n",
        "\n",
        "  # If the total height of the display strings added to the top of the bounding\n",
        "  # box exceeds the top of the image, stack the strings below the bounding box\n",
        "  # instead of above.\n",
        "  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]\n",
        "  # Each display_str has a top and bottom margin of 0.05x.\n",
        "  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)\n",
        "\n",
        "  if top > total_display_str_height:\n",
        "    text_bottom = top\n",
        "  else:\n",
        "    text_bottom = top + total_display_str_height\n",
        "  # Reverse list and print from bottom to top.\n",
        "  for display_str in display_str_list[::-1]:\n",
        "    text_width, text_height = font.getsize(display_str)\n",
        "    margin = np.ceil(0.05 * text_height)\n",
        "    draw.rectangle([(left, text_bottom - text_height - 2 * margin),\n",
        "                    (left + text_width, text_bottom)],\n",
        "                   fill=color)\n",
        "    draw.text((left + margin, text_bottom - text_height - margin),\n",
        "              display_str,\n",
        "              fill=\"black\",\n",
        "              font=font)\n",
        "    text_bottom -= text_height - 2 * margin\n",
        "\n",
        "\n",
        "def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):\n",
        "  \"\"\"Overlay labeled boxes on an image with formatted scores and label names.\"\"\"\n",
        "  colors = list(ImageColor.colormap.values())\n",
        "\n",
        "  try:\n",
        "    font = ImageFont.truetype(\"/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf\",\n",
        "                              25)\n",
        "  except IOError:\n",
        "    print(\"Font not found, using default font.\")\n",
        "    font = ImageFont.load_default()\n",
        "\n",
        "  for i in range(min(boxes.shape[0], max_boxes)):\n",
        "    if scores[i] >= min_score:\n",
        "      ymin, xmin, ymax, xmax = tuple(boxes[i])\n",
        "      display_str = \"{}: {}%\".format(class_names[i].decode(\"ascii\"),\n",
        "                                     int(100 * scores[i]))\n",
        "      color = colors[hash(class_names[i]) % len(colors)]\n",
        "      image_pil = Image.fromarray(np.uint8(image)).convert(\"RGB\")\n",
        "      draw_bounding_box_on_image(\n",
        "          image_pil,\n",
        "          ymin,\n",
        "          xmin,\n",
        "          ymax,\n",
        "          xmax,\n",
        "          color,\n",
        "          font,\n",
        "          display_str_list=[display_str])\n",
        "      np.copyto(image, np.array(image_pil))\n",
        "  return image\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "D19UCu9Q2-_8"
      },
      "source": [
        "## Aplicar o modelo\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "t-VdfLbC1w51"
      },
      "source": [
        "Base de modelos matemáticos\n",
        "\n",
        "* **FasterRCNN+InceptionResNet V2**: high accuracy,\n",
        "* **ssd+mobilenet V2**: small and fast."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "uazJ5ASc2_QE",
        "colab": {}
      },
      "source": [
        "module_handle = \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\" #@param [\"https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1\", \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\"]\n",
        "\n",
        "detector = hub.load(module_handle).signatures['default']"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "kwGJV96WWBLH",
        "colab": {}
      },
      "source": [
        "def load_img(path):\n",
        "  img = tf.io.read_file(path)\n",
        "  img = tf.image.decode_jpeg(img, channels=3)\n",
        "  return img\n",
        "  \n",
        "def run_detector(detector, path):\n",
        "  img = load_img(path)\n",
        "\n",
        "  converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]\n",
        "  start_time = time.time()\n",
        "  result = detector(converted_img)\n",
        "  end_time = time.time()\n",
        "\n",
        "  result = {key:value.numpy() for key,value in result.items()}\n",
        "\n",
        "  print(\"Found %d objects.\" % len(result[\"detection_scores\"]))\n",
        "  print(\"Inference time: \", end_time-start_time)\n",
        "\n",
        "  image_with_boxes = draw_boxes(\n",
        "      img.numpy(), result[\"detection_boxes\"],\n",
        "      result[\"detection_class_entities\"], result[\"detection_scores\"])\n",
        "\n",
        "  display_image(image_with_boxes)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RaREWWJVaKl5",
        "colab_type": "text"
      },
      "source": [
        "## Resultado"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "rubdr2JXfsa1",
        "colab": {}
      },
      "source": [
        "image_urls = [\n",
        "  \"http://ummaker.com/wp-content/uploads/2020/09/mesadetrabalho_278018948.jpg\",\n",
        "  ]\n",
        "\n",
        "def detect_img(image_url):\n",
        "  start_time = time.time()\n",
        "  image_path = download_and_resize_image(image_url, 640, 480)\n",
        "  run_detector(detector, image_path)\n",
        "  end_time = time.time()\n",
        "  print(\"Inference time:\",end_time-start_time)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "otPnrxMKIrj5",
        "colab": {}
      },
      "source": [
        "detect_img(image_urls[0])"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"accelerator": "GPU",
	"colab": {
	"name": "TensorFlow - Detectar objetos",
	"provenance": [],
	"private_outputs": true,
	"collapsed_sections": [],
	"toc_visible": true,
	"include_colab_link": true
	},
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/devmvrborges/f00910f62da88a149a2f5e2aa5e7ebfe/tensorflow-detectar-objetos.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"cellView": "both",
	"colab_type": "code",
	"id": "KUu4vOt5zI9d",
	"colab": {}
	},
	"source": [
	"#--------------\n",
	"#\n",
	"# ummaker.com\n",
	"# TensorFlow - https://www.tensorflow.org/?hl=pt-br\n",
	"# Colab - https://colab.research.google.com/\n",
	"#\n",
	"#--------------"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"colab_type": "text",
	"id": "v4XGxDrCkeip"
	},
	"source": [
	"## Setup\n"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"cellView": "both",
	"colab_type": "code",
	"id": "6cPY9Ou4sWs_",
	"colab": {}
	},
	"source": [
	"# Nosso cérebro, TensorFlow para utilizar o módulo TensorFlow HUB\n",
	"import tensorflow as tf\n",
	"import tensorflow_hub as hub\n",
	"\n",
	"# para realizar downloads de imagens\n",
	"import matplotlib.pyplot as plt\n",
	"import tempfile\n",
	"from six.moves.urllib.request import urlopen\n",
	"from six import BytesIO\n",
	"\n",
	"# para exibir imagens\n",
	"import numpy as np\n",
	"from PIL import Image\n",
	"from PIL import ImageColor\n",
	"from PIL import ImageDraw\n",
	"from PIL import ImageFont\n",
	"from PIL import ImageOps\n",
	"\n",
	"# caso haja interferencia de tempo\n",
	"import time\n",
	"\n",
	"# Versão TensorFlow\n",
	"print(\"Versão TensorFlow: %s\" % tf.__version__)"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "9QArrZZ0WciH",
	"colab_type": "text"
	},
	"source": [
	"## Funções necessárias"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "CuRnbgZ5VVFp",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"def display_image(image):\n",
	" fig = plt.figure(figsize=(20, 15))\n",
	" plt.grid(False)\n",
	" plt.imshow(image)\n",
	"\n",
	"\n",
	"def download_and_resize_image(url, new_width=256, new_height=256,\n",
	" display=False):\n",
	" _, filename = tempfile.mkstemp(suffix=\".jpg\")\n",
	" response = urlopen(url)\n",
	" image_data = response.read()\n",
	" image_data = BytesIO(image_data)\n",
	" pil_image = Image.open(image_data)\n",
	" pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.ANTIALIAS)\n",
	" pil_image_rgb = pil_image.convert(\"RGB\")\n",
	" pil_image_rgb.save(filename, format=\"JPEG\", quality=90)\n",
	" print(\"Image downloaded to %s.\" % filename)\n",
	" if display:\n",
	" display_image(pil_image)\n",
	" return filename\n",
	"\n",
	"\n",
	"def draw_bounding_box_on_image(image,\n",
	" ymin,\n",
	" xmin,\n",
	" ymax,\n",
	" xmax,\n",
	" color,\n",
	" font,\n",
	" thickness=4,\n",
	" display_str_list=()):\n",
	" \"\"\"Adds a bounding box to an image.\"\"\"\n",
	" draw = ImageDraw.Draw(image)\n",
	" im_width, im_height = image.size\n",
	" (left, right, top, bottom) = (xmin * im_width, xmax * im_width,\n",
	" ymin * im_height, ymax * im_height)\n",
	" draw.line([(left, top), (left, bottom), (right, bottom), (right, top),\n",
	" (left, top)],\n",
	" width=thickness,\n",
	" fill=color)\n",
	"\n",
	" # If the total height of the display strings added to the top of the bounding\n",
	" # box exceeds the top of the image, stack the strings below the bounding box\n",
	" # instead of above.\n",
	" display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]\n",
	" # Each display_str has a top and bottom margin of 0.05x.\n",
	" total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)\n",
	"\n",
	" if top > total_display_str_height:\n",
	" text_bottom = top\n",
	" else:\n",
	" text_bottom = top + total_display_str_height\n",
	" # Reverse list and print from bottom to top.\n",
	" for display_str in display_str_list[::-1]:\n",
	" text_width, text_height = font.getsize(display_str)\n",
	" margin = np.ceil(0.05 * text_height)\n",
	" draw.rectangle([(left, text_bottom - text_height - 2 * margin),\n",
	" (left + text_width, text_bottom)],\n",
	" fill=color)\n",
	" draw.text((left + margin, text_bottom - text_height - margin),\n",
	" display_str,\n",
	" fill=\"black\",\n",
	" font=font)\n",
	" text_bottom -= text_height - 2 * margin\n",
	"\n",
	"\n",
	"def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):\n",
	" \"\"\"Overlay labeled boxes on an image with formatted scores and label names.\"\"\"\n",
	" colors = list(ImageColor.colormap.values())\n",
	"\n",
	" try:\n",
	" font = ImageFont.truetype(\"/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf\",\n",
	" 25)\n",
	" except IOError:\n",
	" print(\"Font not found, using default font.\")\n",
	" font = ImageFont.load_default()\n",
	"\n",
	" for i in range(min(boxes.shape[0], max_boxes)):\n",
	" if scores[i] >= min_score:\n",
	" ymin, xmin, ymax, xmax = tuple(boxes[i])\n",
	" display_str = \"{}: {}%\".format(class_names[i].decode(\"ascii\"),\n",
	" int(100 * scores[i]))\n",
	" color = colors[hash(class_names[i]) % len(colors)]\n",
	" image_pil = Image.fromarray(np.uint8(image)).convert(\"RGB\")\n",
	" draw_bounding_box_on_image(\n",
	" image_pil,\n",
	" ymin,\n",
	" xmin,\n",
	" ymax,\n",
	" xmax,\n",
	" color,\n",
	" font,\n",
	" display_str_list=[display_str])\n",
	" np.copyto(image, np.array(image_pil))\n",
	" return image\n"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"colab_type": "text",
	"id": "D19UCu9Q2-_8"
	},
	"source": [
	"## Aplicar o modelo\n",
	"\n"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"colab_type": "text",
	"id": "t-VdfLbC1w51"
	},
	"source": [
	"Base de modelos matemáticos\n",
	"\n",
	"* FasterRCNN+InceptionResNet V2: high accuracy,\n",
	"* ssd+mobilenet V2: small and fast."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab_type": "code",
	"id": "uazJ5ASc2_QE",
	"colab": {}
	},
	"source": [
	"module_handle = \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\" #@param [\"https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1\", \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\"]\n",
	"\n",
	"detector = hub.load(module_handle).signatures['default']"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab_type": "code",
	"id": "kwGJV96WWBLH",
	"colab": {}
	},
	"source": [
	"def load_img(path):\n",
	" img = tf.io.read_file(path)\n",
	" img = tf.image.decode_jpeg(img, channels=3)\n",
	" return img\n",
	" \n",
	"def run_detector(detector, path):\n",
	" img = load_img(path)\n",
	"\n",
	" converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]\n",
	" start_time = time.time()\n",
	" result = detector(converted_img)\n",
	" end_time = time.time()\n",
	"\n",
	" result = {key:value.numpy() for key,value in result.items()}\n",
	"\n",
	" print(\"Found %d objects.\" % len(result[\"detection_scores\"]))\n",
	" print(\"Inference time: \", end_time-start_time)\n",
	"\n",
	" image_with_boxes = draw_boxes(\n",
	" img.numpy(), result[\"detection_boxes\"],\n",
	" result[\"detection_class_entities\"], result[\"detection_scores\"])\n",
	"\n",
	" display_image(image_with_boxes)"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "RaREWWJVaKl5",
	"colab_type": "text"
	},
	"source": [
	"## Resultado"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab_type": "code",
	"id": "rubdr2JXfsa1",
	"colab": {}
	},
	"source": [
	"image_urls = [\n",
	" \"http://ummaker.com/wp-content/uploads/2020/09/mesadetrabalho_278018948.jpg\",\n",
	" ]\n",
	"\n",
	"def detect_img(image_url):\n",
	" start_time = time.time()\n",
	" image_path = download_and_resize_image(image_url, 640, 480)\n",
	" run_detector(detector, image_path)\n",
	" end_time = time.time()\n",
	" print(\"Inference time:\",end_time-start_time)"
	],
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab_type": "code",
	"id": "otPnrxMKIrj5",
	"colab": {}
	},
	"source": [
	"detect_img(image_urls[0])"
	],
	"execution_count": null,
	"outputs": []
	}
	]
	}