Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Object Detection
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.1 (/Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Object-Detector-App.iml" filepath="$PROJECT_DIR$/.idea/Object-Detector-App.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.5.3 (~/anaconda/envs/object-detection/bin/python)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<created>1499365852136</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1499365852136</updated>
</task>
<servers />
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager />
<watches-manager />
</component>
</project>
name: object-detection
channels: !!python/tuple
- menpo
- defaults
dependencies:
- freetype=2.5.5=2
- jbig=2.1=0
- jlaura::opencv3=3.0.0=py35_0
- jpeg=9b=0
- libpng=1.6.27=0
- libtiff=4.0.6=3
- menpo::tbb=4.3_20141023=0
- mkl=2017.0.1=0
- numpy=1.13.0=py35_0
- olefile=0.44=py35_0
- openssl=1.0.2l=0
- pillow=4.1.1=py35_0
- pip=9.0.1=py35_1
- python=3.5.3=1
- readline=6.2=2
- setuptools=27.2.0=py35_0
- sqlite=3.13.0=0
- tk=8.5.18=0
- wheel=0.29.0=py35_0
- xz=5.2.2=1
- zlib=1.2.8=3
- pip:
- backports.weakref==1.0rc1
- bleach==1.5.0
- html5lib==0.9999999
- markdown==2.2.0
- protobuf==3.3.0
- six==1.10.0
- tensorflow==1.2.0
- werkzeug==0.12.2
prefix: /Users/datitran/anaconda/envs/object-detection
MIT License
Copyright (c) 2017 Dat Tran
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (~/anaconda/bin/python)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/object_detection.iml" filepath="$PROJECT_DIR$/.idea/object_detection.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="Nosetests" />
<option name="PROJECT_TEST_RUNNER" value="Nosetests" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="afa8758a-48f3-4f65-8fa7-5a1beb5bcd6b" name="Default" comment="" />
<ignored path="object_detection.iws" />
<ignored path=".idea/workspace.xml" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
</component>
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
<component name="FavoritesManager">
<favorites_list name="object_detection" />
</component>
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="object_detection.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/object_detection.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="195">
<caret line="13" column="0" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="32" />
<folding />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/object_detection.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="y" value="23" />
<option name="width" value="1280" />
<option name="height" value="1351" />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
<manualOrder />
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scratches" />
<pane id="ProjectPane">
<subPane>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="object_detection" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="object_detection" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="object_detection" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="afa8758a-48f3-4f65-8fa7-5a1beb5bcd6b" name="Default" comment="" />
<created>1497876882068</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1497876882068</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="0" y="23" width="1280" height="1351" extended-state="0" />
<editor active="false" />
<layout>
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.2494043" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="Vcs.Log.UiProperties">
<option name="RECENTLY_FILTERED_USER_GROUPS">
<collection />
</option>
<option name="RECENTLY_FILTERED_BRANCH_GROUPS">
<collection />
</option>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager />
<watches-manager />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/object_detection.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="195">
<caret line="13" column="0" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="32" />
<folding />
</state>
</provider>
</entry>
</component>
</project>
# Tensorflow Object Detection API: Anchor Generator implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "grid_anchor_generator",
srcs = [
"grid_anchor_generator.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_test(
name = "grid_anchor_generator_test",
srcs = [
"grid_anchor_generator_test.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
],
)
py_library(
name = "multiple_grid_anchor_generator",
srcs = [
"multiple_grid_anchor_generator.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list_ops",
],
)
py_test(
name = "multiple_grid_anchor_generator_test",
srcs = [
"multiple_grid_anchor_generator_test.py",
],
deps = [
":multiple_grid_anchor_generator",
"//third_party/py/numpy",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly as used in Faster RCNN.
Generates grid anchors on the fly as described in:
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
"""
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import box_list
from object_detection.utils import ops
class GridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generates a grid of anchors at given scales and aspect ratios."""
def __init__(self,
scales=(0.5, 1.0, 2.0),
aspect_ratios=(0.5, 1.0, 2.0),
base_anchor_size=None,
anchor_stride=None,
anchor_offset=None):
"""Constructs a GridAnchorGenerator.
Args:
scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
base_anchor_size: base anchor size as height, width (
(length-2 float32 list, default=[256, 256])
anchor_stride: difference in centers between base anchors for adjacent
grid positions (length-2 float32 list, default=[16, 16])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need additional calculation if other
padding is used (length-2 float32 tensor, default=[0, 0])
"""
# Handle argument defaults
if base_anchor_size is None:
base_anchor_size = [256, 256]
base_anchor_size = tf.constant(base_anchor_size, tf.float32)
if anchor_stride is None:
anchor_stride = [16, 16]
anchor_stride = tf.constant(anchor_stride, dtype=tf.float32)
if anchor_offset is None:
anchor_offset = [0, 0]
anchor_offset = tf.constant(anchor_offset, dtype=tf.float32)
self._scales = scales
self._aspect_ratios = aspect_ratios
self._base_anchor_size = base_anchor_size
self._anchor_stride = anchor_stride
self._anchor_offset = anchor_offset
def name_scope(self):
return 'GridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the `generate` function.
"""
return [len(self._scales) * len(self._aspect_ratios)]
def _generate(self, feature_map_shape_list):
"""Generates a collection of bounding boxes to be used as anchors.
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0)]. For example, setting
feature_map_shape_list=[(8, 8)] asks for anchors that correspond
to an 8x8 layer. For this anchor generator, only lists of length 1 are
allowed.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == 1):
raise ValueError('feature_map_shape_list must be a list of length 1.')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
grid_height, grid_width = feature_map_shape_list[0]
scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
self._aspect_ratios)
scales_grid = tf.reshape(scales_grid, [-1])
aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
return tile_anchors(grid_height,
grid_width,
scales_grid,
aspect_ratios_grid,
self._base_anchor_size,
self._anchor_stride,
self._anchor_offset)
def tile_anchors(grid_height,
grid_width,
scales,
aspect_ratios,
base_anchor_size,
anchor_stride,
anchor_offset):
"""Create a tiled set of anchors strided along a grid in image space.
This op creates a set of anchor boxes by placing a "basis" collection of
boxes with user-specified scales and aspect ratios centered at evenly
distributed points along a grid. The basis collection is specified via the
scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
.1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
placing it over its respective center.
Grid points are specified via grid_height, grid_width parameters as well as
the anchor_stride and anchor_offset parameters.
Args:
grid_height: size of the grid in the y direction (int or int scalar tensor)
grid_width: size of the grid in the x direction (int or int scalar tensor)
scales: a 1-d (float) tensor representing the scale of each box in the
basis set.
aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
box in the basis set. The length of the scales and aspect_ratios tensors
must be equal.
base_anchor_size: base anchor size as [height, width]
(float tensor of shape [2])
anchor_stride: difference in centers between base anchors for adjacent grid
positions (float tensor of shape [2])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need some additional calculation if other
padding is used (float tensor of shape [2])
Returns:
a BoxList holding a collection of N anchor boxes
"""
ratio_sqrts = tf.sqrt(aspect_ratios)
heights = scales / ratio_sqrts * base_anchor_size[0]
widths = scales * ratio_sqrts * base_anchor_size[1]
# Get a grid of box centers
y_centers = tf.to_float(tf.range(grid_height))
y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
x_centers = tf.to_float(tf.range(grid_width))
x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
bbox_centers = tf.reshape(bbox_centers, [-1, 2])
bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
return box_list.BoxList(bbox_corners)
def _center_size_bbox_to_corners_bbox(centers, sizes):
"""Converts bbox center-size representation to corners representation.
Args:
centers: a tensor with shape [N, 2] representing bounding box centers
sizes: a tensor with shape [N, 2] representing bounding boxes
Returns:
corners: tensor with shape [N, 4] representing bounding boxes in corners
representation
"""
return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.grid_anchor_generator."""
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
class GridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
scales = [0.5, 1.0, 2.0]
aspect_ratios = [0.25, 1.0, 4.0]
anchor_offset = [7, -3]
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales, aspect_ratios,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = [10, 10]
anchor_stride = [19, 19]
anchor_offset = [0, 0]
scales = [0.5, 1.0, 2.0]
aspect_ratios = [1.0]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=anchor_stride,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
Generates grid anchors on the fly corresponding to multiple CNN layers as
described in:
"SSD: Single Shot MultiBox Detector"
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
(see Section 2.2: Choosing scales and aspect ratios for default boxes)
"""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import anchor_generator
from object_detection.core import box_list_ops
class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generate a grid of anchors for multiple CNN layers."""
def __init__(self,
box_specs_list,
base_anchor_size=None,
clip_window=None):
"""Constructs a MultipleGridAnchorGenerator.
To construct anchors, at multiple grid resolutions, one must provide a
list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
size, a corresponding list of (scale, aspect ratio) box specifications.
For example:
box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
[(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
To support the fully convolutional setting, we pass grid sizes in at
generation time, while scale and aspect ratios are fixed at construction
time.
Args:
box_specs_list: list of list of (scale, aspect ratio) pairs with the
outside list having the same number of entries as feature_map_shape_list
(which is passed in at generation time).
base_anchor_size: base anchor size as [height, width]
(length-2 float tensor, default=[256, 256]).
clip_window: a tensor of shape [4] specifying a window to which all
anchors should be clipped. If clip_window is None, then no clipping
is performed.
Raises:
ValueError: if box_specs_list is not a list of list of pairs
ValueError: if clip_window is not either None or a tensor of shape [4]
"""
if isinstance(box_specs_list, list) and all(
[isinstance(list_item, list) for list_item in box_specs_list]):
self._box_specs = box_specs_list
else:
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
if base_anchor_size is None:
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
self._base_anchor_size = base_anchor_size
if clip_window is not None and clip_window.get_shape().as_list() != [4]:
raise ValueError('clip_window must either be None or a shape [4] tensor')
self._clip_window = clip_window
self._scales = []
self._aspect_ratios = []
for box_spec in self._box_specs:
if not all([isinstance(entry, tuple) and len(entry) == 2
for entry in box_spec]):
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
scales, aspect_ratios = zip(*box_spec)
self._scales.append(scales)
self._aspect_ratios.append(aspect_ratios)
def name_scope(self):
return 'MultipleGridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the Generate function.
"""
return [len(box_specs) for box_specs in self._box_specs]
def _generate(self,
feature_map_shape_list,
im_height=1,
im_width=1,
anchor_strides=None,
anchor_offsets=None):
"""Generates a collection of bounding boxes to be used as anchors.
The number of anchors generated for a single grid with shape MxM where we
place k boxes over each grid center is k*M^2 and thus the total number of
anchors is the sum over all grids. In our box_specs_list example
(see the constructor docstring), we would place two boxes over each grid
point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
output anchors follows the order of how the grid sizes and box_specs are
specified (with box_spec index varying the fastest, followed by width
index, then height index, then grid index).
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0), (height_1, width_1), ...]. For example,
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
im_width: the width of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
anchor_strides: list of pairs of strides (in y and x directions
respectively). For example, setting
anchor_strides=[(.25, .25), (.5, .5)] means that we want the anchors
corresponding to the first layer to be strided by .25 and those in the
second layer to be strided by .5 in both y and x directions. By
default, if anchor_strides=None, then they are set to be the reciprocal
of the corresponding grid sizes. The pairs can also be specified as
dynamic tf.int or tf.float numbers, e.g. for variable shape input
images.
anchor_offsets: list of pairs of offsets (in y and x directions
respectively). The offset specifies where we want the center of the
(0, 0)-th anchor to lie for each layer. For example, setting
anchor_offsets=[(.125, .125), (.25, .25)]) means that we want the
(0, 0)-th anchor of the first layer to lie at (.125, .125) in image
space and likewise that we want the (0, 0)-th anchor of the second
layer to lie at (.25, .25) in image space. By default, if
anchor_offsets=None, then they are set to be half of the corresponding
anchor stride. The pairs can also be specified as dynamic tf.int or
tf.float numbers, e.g. for variable shape input images.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == len(self._box_specs)):
raise ValueError('feature_map_shape_list must be a list with the same '
'length as self._box_specs')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
if not anchor_strides:
anchor_strides = [(tf.to_float(im_height) / tf.to_float(pair[0]),
tf.to_float(im_width) / tf.to_float(pair[1]))
for pair in feature_map_shape_list]
if not anchor_offsets:
anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
for stride in anchor_strides]
for arg, arg_name in zip([anchor_strides, anchor_offsets],
['anchor_strides', 'anchor_offsets']):
if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
raise ValueError('%s must be a list with the same length '
'as self._box_specs' % arg_name)
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in arg]):
raise ValueError('%s must be a list of pairs.' % arg_name)
anchor_grid_list = []
min_im_shape = tf.to_float(tf.minimum(im_height, im_width))
base_anchor_size = min_im_shape * self._base_anchor_size
for grid_size, scales, aspect_ratios, stride, offset in zip(
feature_map_shape_list, self._scales, self._aspect_ratios,
anchor_strides, anchor_offsets):
anchor_grid_list.append(
grid_anchor_generator.tile_anchors(
grid_height=grid_size[0],
grid_width=grid_size[1],
scales=scales,
aspect_ratios=aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=stride,
anchor_offset=offset))
concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
num_anchors = concatenated_anchors.num_boxes_static()
if num_anchors is None:
num_anchors = concatenated_anchors.num_boxes()
if self._clip_window is not None:
clip_window = tf.multiply(
tf.to_float([im_height, im_width, im_height, im_width]),
self._clip_window)
concatenated_anchors = box_list_ops.clip_to_window(
concatenated_anchors, clip_window, filter_nonoverlapping=False)
# TODO: make reshape an option for the clip_to_window op
concatenated_anchors.set(
tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))
stddevs_tensor = 0.01 * tf.ones(
[num_anchors, 4], dtype=tf.float32, name='stddevs')
concatenated_anchors.add_field('stddev', stddevs_tensor)
return concatenated_anchors
def create_ssd_anchors(num_layers=6,
min_scale=0.2,
max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
base_anchor_size=None,
reduce_boxes_in_lowest_layer=True):
"""Creates MultipleGridAnchorGenerator for SSD anchors.
This function instantiates a MultipleGridAnchorGenerator that reproduces
``default box`` construction proposed by Liu et al in the SSD paper.
See Section 2.2 for details. Grid sizes are assumed to be passed in
at generation time from finest resolution to coarsest resolution --- this is
used to (linearly) interpolate scales of anchor boxes corresponding to the
intermediate grid sizes.
Anchors that are returned by calling the `generate` method on the returned
MultipleGridAnchorGenerator object are always in normalized coordinates
and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
Args:
num_layers: integer number of grid layers to create anchors for (actual
grid sizes passed in at generation time)
min_scale: scale of anchors corresponding to finest resolution (float)
max_scale: scale of anchors corresponding to coarsest resolution (float)
aspect_ratios: list or tuple of (float) aspect ratios to place on each
grid point.
base_anchor_size: base anchor size as [height, width].
reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
boxes per location is used in the lowest layer.
Returns:
a MultipleGridAnchorGenerator
"""
if base_anchor_size is None:
base_anchor_size = [1.0, 1.0]
base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
box_specs_list = []
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
for layer, scale, scale_next in zip(
range(num_layers), scales[:-1], scales[1:]):
layer_box_specs = []
if layer == 0 and reduce_boxes_in_lowest_layer:
layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
else:
for aspect_ratio in aspect_ratios:
layer_box_specs.append((scale, aspect_ratio))
if aspect_ratio == 1.0:
layer_box_specs.append((np.sqrt(scale*scale_next), 1.0))
box_specs_list.append(layer_box_specs)
return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
class MultipleGridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor_grid(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
(.5, 1.0), (1.0, 1.0), (2.0, 1.0),
(.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)],
anchor_strides=[(16, 16)],
anchor_offsets=[(7, -3)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = tf.constant([10, 10], dtype=tf.float32)
box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)],
anchor_strides=[(19, 19)],
anchor_offsets=[(0, 0)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_non_square(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(tf.constant(
1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_unnormalized(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., 0., 320., 320.], [0., 320., 320., 640.]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(
feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
2, dtype=tf.int32))],
im_height=320,
im_width=640)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_multiple_grids(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[-.25, -.25, .75, .75],
[.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
[-.25, .25, .75, 1.25],
[.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
[.25, -.25, 1.25, .75],
[.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
[.25, .25, 1.25, 1.25],
[.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
# only test first entry of larger set of anchors
exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
[.125-1.0, .125-1.0, .125+1.0, .125+1.0],
[.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (56, 4))
big_grid_corners = anchor_corners_out[0:3, :]
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
self.assertAllClose(big_grid_corners, exp_big_grid_corners)
def test_construct_multiple_grids_with_clipping(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[0, 0, .75, .75],
[0, 0, .25+.5*h, .25+.5*w],
[0, .25, .75, 1],
[0, .75-.5*w, .25+.5*h, 1],
[.25, 0, 1, .75],
[.75-.5*h, 0, 1, .25+.5*w],
[.25, .25, 1, 1],
[.75-.5*h, .75-.5*w, 1, 1]]
clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size, clip_window=clip_window)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
def test_invalid_box_specs(self):
# not all box specs are pairs
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5, .3)]]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
# box_specs_list is not a list of lists
box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
def test_invalid_generate_arguments(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
# incompatible lengths with box_specs_list
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.5, .5)],
anchor_offsets=[(.25, .25)])
# not pairs
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25, .1), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125), (.25)])
class CreateSSDAnchorsTest(tf.test.TestCase):
def test_create_ssd_anchors_returns_correct_shape(self):
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=True)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (7308, 4))
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=False)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (11640, 4))
if __name__ == '__main__':
tf.test.main()
import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from utils import FPS, WebcamVideoStream
from multiprocessing import Process, Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
CWD_PATH = os.getcwd()
# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def detect_objects(image_np, sess, detection_graph):
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
return image_np
def worker(input_q, output_q):
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
fps = FPS().start()
while True:
fps.update()
frame = input_q.get()
output_q.put(detect_objects(frame, sess, detection_graph))
fps.stop()
sess.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-src', '--source', dest='video_source', type=int,
default=0, help='Device index of the camera.')
parser.add_argument('-wd', '--width', dest='width', type=int,
default=480, help='Width of the frames in the video stream.')
parser.add_argument('-ht', '--height', dest='height', type=int,
default=360, help='Height of the frames in the video stream.')
parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
default=2, help='Number of workers.')
parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
default=5, help='Size of the queue.')
args = parser.parse_args()
logger = multiprocessing.log_to_stderr()
logger.setLevel(multiprocessing.SUBDEBUG)
input_q = Queue(maxsize=args.queue_size)
output_q = Queue(maxsize=args.queue_size)
process = Process(target=worker, args=((input_q, output_q)))
process.daemon = True
pool = Pool(args.num_workers, worker, (input_q, output_q))
video_capture = WebcamVideoStream(src=args.video_source,
width=args.width,
height=args.height).start()
fps = FPS().start()
while True: # fps._numFrames < 120
frame = video_capture.read()
input_q.put(frame)
t = time.time()
cv2.imshow('Video', output_q.get())
fps.update()
print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
fps.stop()
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
video_capture.stop()
cv2.destroyAllWindows()
#!/bin/bash
# Determine the directory containing this script
if [[ -n $BASH_VERSION ]]; then
_SCRIPT_LOCATION=${BASH_SOURCE[0]}
_SHELL="bash"
elif [[ -n $ZSH_VERSION ]]; then
_SCRIPT_LOCATION=${funcstack[1]}
_SHELL="zsh"
else
echo "Only bash and zsh are supported"
return 1
fi
_CONDA_DIR=$(dirname "$_SCRIPT_LOCATION")
if [ $# -gt 1 ]; then
(>&2 echo "Error: did not expect more than one argument.")
(>&2 echo " (Got $@)")
return 1
fi
case "$(uname -s)" in
CYGWIN*|MINGW*|MSYS*)
EXT=".exe"
export MSYS2_ENV_CONV_EXCL=CONDA_PATH
# ignore any windows backup paths from bat-based activation
if [ "${CONDA_PATH_BACKUP:0:1}" != "/" ]; then
unset CONDA_PATH_BACKUP
fi
export _CONDA_PYTHON="$_CONDA_DIR/../python"
;;
*)
EXT=""
export _CONDA_PYTHON="$_CONDA_DIR/python"
;;
esac
# Ensure that this script is sourced, not executed
# Also note that errors are ignored as `activate foo` doesn't generate a bad
# value for $0 which would cause errors.
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "activate" ]]; then
(>&2 echo "Error: activate must be sourced. Run 'source activate envname'
instead of 'activate envname'.
")
"$_CONDA_DIR/conda" ..activate $_SHELL$EXT -h
exit 1
fi
if [ "$#" -eq "0" ]; then
args=('root')
else
args=$@
fi
"$_CONDA_DIR/conda" ..checkenv $_SHELL$EXT "$args"
if (( $? != 0 )); then
return 1
fi
# Ensure we deactivate any scripts from the old env
_CONDA_HOLD=true
source "$_CONDA_DIR/deactivate"
unset _CONDA_HOLD
_NEW_PART=$("$_CONDA_DIR/conda" ..activate $_SHELL$EXT "$args")
if (( $? == 0 )); then
export CONDA_PATH_BACKUP="$PATH"
# export this to restore it upon deactivation
export CONDA_PS1_BACKUP="$PS1"
# look if the deactivate script left a placeholder for us
if [[ $PATH == *"CONDA_PATH_PLACEHOLDER"* ]]; then
# If it did, replace it with our _NEW_PART
export PATH="$($_CONDA_PYTHON -c "import re; print(re.sub(r'CONDA_PATH_PLACEHOLDER', r'$_NEW_PART', '$PATH', 1))")"
else
export PATH="$_NEW_PART:$PATH"
fi
# CONDA_DEFAULT_ENV is the shortest representation of how conda recognizes your env.
# It can be an env name, or a full path.
# Last date of change: 2016-06-21
# If the string contains / it's a path
if [[ "$@" == */* ]]; then
export CONDA_DEFAULT_ENV=$(get_abs_filename "$args")
else
export CONDA_DEFAULT_ENV="$args"
fi
# CONDA_PREFIX is always the full path to the activated environment. It is not set
# when no environment is active.
# Legacy support: CONDA_DEFAULT_ENV is either env name or full path if given as path.
# CONDA_PREFIX is always the full path, for consistency.
# Last date of change: 2016-06-21
firstpath=${_NEW_PART%%:*}
export CONDA_PREFIX="$(echo ${firstpath} | sed "s|/bin$||")" &>/dev/null
# if CONDA_DEFAULT_ENV not in PS1, prepend it with parentheses
if [ $("$_CONDA_DIR/conda" ..changeps1) = "1" ]; then
if ! $(grep -q CONDA_DEFAULT_ENV <<<$PS1); then
if ! $(grep -q "POWERLINE" <<<$PS1); then
export PS1="(${CONDA_DEFAULT_ENV}) $PS1"
fi
fi
fi
# Load any of the scripts found $PREFIX/etc/conda/activate.d AFTER activation
_CONDA_D="${CONDA_PREFIX}/etc/conda/activate.d"
if [[ -d "$_CONDA_D" ]]; then
eval $(find "$_CONDA_D" -iname "*.sh" -exec echo source \'{}\'';' \;)
fi
unset _CONDA_PYTHON
else
unset _CONDA_PYTHON
return $?
fi
unset CONDA_PATH
if [[ -n $BASH_VERSION ]]; then
hash -r
elif [[ -n $ZSH_VERSION ]]; then
rehash
else
echo "Only bash and zsh are supported"
return 1
fi
#!/Users/tarrysingh/anaconda/bin/python
if __name__ == '__main__':
import sys
import conda.cli
sys.exit(conda.cli.main())
#!/bin/bash
# Determine the directory containing this script
if [[ -n $BASH_VERSION ]]; then
_SCRIPT_LOCATION=${BASH_SOURCE[0]}
_SHELL="bash"
elif [[ -n $ZSH_VERSION ]]; then
_SCRIPT_LOCATION=${funcstack[1]}
_SHELL="zsh"
else
echo "Only bash and zsh are supported"
return 1
fi
_CONDA_DIR=$(dirname "$_SCRIPT_LOCATION")
case "$(uname -s)" in
CYGWIN*|MINGW*|MSYS*)
EXT=".exe"
export MSYS2_ENV_CONV_EXCL=CONDA_PATH
;;
*)
EXT=""
;;
esac
# shift over all args. We don't accept any, so it's OK that we ignore them all here.
while [[ $# > 0 ]]
do
key="$1"
case $key in
-h|--help)
"$_CONDA_DIR/conda" ..deactivate $_SHELL$EXT -h
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "deactivate" ]]; then
exit 0
else
return 0
fi
;;
esac
shift # past argument or value
done
# Ensure that this script is sourced, not executed
# Note that if the script was executed, we're running inside bash!
# Also note that errors are ignored as `activate foo` doesn't generate a bad
# value for $0 which would cause errors.
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "deactivate" ]]; then
(>&2 echo "Error: deactivate must be sourced. Run 'source deactivate'
instead of 'deactivate'.
")
"$_CONDA_DIR/conda" ..deactivate $_SHELL$EXT -h
exit 1
fi
if [[ -z "$CONDA_PATH_BACKUP" ]]; then
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "deactivate" ]]; then
exit 0
else
return 0
fi
fi
if (( $? == 0 )); then
# Inverse of activation: run deactivate scripts prior to deactivating env
_CONDA_D="${CONDA_PREFIX}/etc/conda/deactivate.d"
if [[ -d $_CONDA_D ]]; then
eval $(find "$_CONDA_D" -iname "*.sh" -exec echo source \'{}\'';' \;)
fi
# # get the activation path that would have been provided for this prefix
# _LAST_ACTIVATE_PATH=$("$_CONDA_DIR/conda" ..activate $_SHELL$EXT "$CONDA_PREFIX")
#
# # in activate, we replace a placeholder so that conda keeps its place in the PATH order
# # The activate script sets _CONDA_HOLD here to activate that behavior.
# # Otherwise, PATH is simply removed.
# if [ -n "$_CONDA_HOLD" ]; then
# export PATH="$($_CONDA_PYTHON2 -c "import re; print(re.sub(r'$_LAST_ACTIVATE_PATH(:?)', r'CONDA_PATH_PLACEHOLDER\1', '$PATH', 1))")"
# else
# export PATH="$($_CONDA_PYTHON2 -c "import re; print(re.sub(r'$_LAST_ACTIVATE_PATH(:?)', r'', '$PATH', 1))")"
# fi
#
# unset _LAST_ACTIVATE_PATH
export PATH=$("$_CONDA_DIR/conda" ..deactivate.path $_SHELL$EXT "$CONDA_PREFIX")
unset CONDA_DEFAULT_ENV
unset CONDA_PREFIX
unset CONDA_PATH_BACKUP
export PS1="$CONDA_PS1_BACKUP"
unset CONDA_PS1_BACKUP
unset _CONDA_PYTHON2
else
unset _CONDA_PYTHON2
return $?
fi
if [[ -n $BASH_VERSION ]]; then
hash -r
elif [[ -n $ZSH_VERSION ]]; then
rehash
fi
# Tensorflow Object Detection API: Box Coder implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "faster_rcnn_box_coder",
srcs = [
"faster_rcnn_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "faster_rcnn_box_coder_test",
srcs = [
"faster_rcnn_box_coder_test.py",
],
deps = [
":faster_rcnn_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "keypoint_box_coder",
srcs = [
"keypoint_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "keypoint_box_coder_test",
srcs = [
"keypoint_box_coder_test.py",
],
deps = [
":keypoint_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_library(
name = "mean_stddev_box_coder",
srcs = [
"mean_stddev_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "mean_stddev_box_coder_test",
srcs = [
"mean_stddev_box_coder_test.py",
],
deps = [
":mean_stddev_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "square_box_coder",
srcs = [
"square_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "square_box_coder_test",
srcs = [
"square_box_coder_test.py",
],
deps = [
":square_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively.
See http://arxiv.org/abs/1506.01497 for details.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
EPSILON = 1e-8
class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Faster RCNN box coder."""
def __init__(self, scale_factors=None):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
return tf.transpose(tf.stack([ty, tx, th, tw]))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.core import box_list
class FasterRcnnBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_very_small_Width_nan_after_encoding(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint box coder.
The keypoint box coder follows the coding schema described below (this is
similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
to box coordinates):
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
tky0 = (ky0 - ya) / ha
tkx0 = (kx0 - xa) / ha
tky1 = (ky1 - ya) / ha
tkx1 = (kx1 - xa) / ha
...
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
anchor-encoded keypoint coordinates.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
EPSILON = 1e-8
class KeypointBoxCoder(box_coder.BoxCoder):
"""Keypoint box coder."""
def __init__(self, num_keypoints, scale_factors=None):
"""Constructor for KeypointBoxCoder.
Args:
num_keypoints: Number of keypoints to encode/decode.
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
In addition to scaling ty and tx, the first 2 scalars are used to scale
the y and x coordinates of the keypoints as well. If set to None, does
not perform scaling.
"""
self._num_keypoints = num_keypoints
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
self._keypoint_scale_factors = None
if scale_factors is not None:
self._keypoint_scale_factors = tf.expand_dims(tf.tile(
[tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
[num_keypoints]), 1)
@property
def code_size(self):
return 4 + self._num_keypoints * 2
def _encode(self, boxes, anchors):
"""Encode a box and keypoint collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
tensors with the shape [N, 4], and keypoints are tensors with the shape
[N, num_keypoints, 2].
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
represent the y and x coordinates of the first keypoint, tky1 and tkx1
represent the y and x coordinates of the second keypoint, and so on.
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
keypoints = boxes.get_field(fields.BoxListFields.keypoints)
keypoints = tf.transpose(tf.reshape(keypoints,
[-1, self._num_keypoints * 2]))
num_boxes = boxes.num_boxes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
tboxes = tf.stack([ty, tx, th, tw])
return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes and keypoints.
Args:
rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
anchor-encoded boxes and keypoints
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes and keypoints.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
num_codes = tf.shape(rel_codes)[0]
result = tf.unstack(tf.transpose(rel_codes))
ty, tx, th, tw = result[:4]
tkeypoints = result[4:]
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
decoded_boxes_keypoints = box_list.BoxList(
tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
keypoints = tf.reshape(tf.transpose(keypoints),
[-1, self._num_keypoints, 2])
decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
return decoded_boxes_keypoints
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.keypoint_box_coder."""
import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
class KeypointBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))