Last active April 5, 2021 00:49
README: TF Object Detection


  1. Install tensorflow-gpu by running
  conda install -c anaconda tensorflow-gpu
  1. Install NVIDIA dependencies for using GPU from https//

  2. You need the following libraries

sudo apt-get install protobuf-compiler python-pil python-lxml python-tk
sudo pip install jupyter
sudo pip install matplotlib
  1. Make sure your annotations are in PASCAL VOC format.


  1. Get the Tensorflow Object Detection API
git clone
  1. The Tensorflow Object Detection API uses Protobufs to configure model and training parameters; compile Protobuf libraries
cd models/research
protoc object_detection/protos/*.proto --python_out=.
  1. Add Libraries to PYTHONPATH
# From models/research/
export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
  1. Test Installation
python object_detection/builders/

Train your own object detector

Proposed directory tree

  ├── images
  │   └── train
  │   |   └── IMG1.jpg
  │   |   └── IMG1.xml
  │   |   └── IMG2.jpg
  │   |   └── IMG2.xml
  │   └── test
  │   |   └── IMG_A.jpg
  │   |   └── IMG_A.xml
  │   |   └── IMG_B.jpg
  │   |   └── IMG_B.xml
  ├── data
  │   └── train_labels.csv
  │   └── train.record
  │   └── test_labels.csv
  │   └── test.record
  │   └── tf_object_detection.pbtxt
  ├── training
  │   └── ssd_mobilenet_v1_coco.config
  ├── ssd_mobilenet_v1_coco_2017_11_17
  │   └── checkpoint
  │   └── frozen_inference_graph.pb
  │   └── ...

Data Preparation

  1. Create necessary folders
mkdir tf_object_detection
cd tf_object_detection/
mkdir images data
  1. Split the data into train and test sets
# from tf_object_detection/images/
python --annotations=Annotations/ --images=JPEGImages/ --testsize=0.1 --outputdir=.
  1. Convert annotations from xml to csv
cd tf_object_detection
# get the following script for conversion

Make the following changes to

def main():
    for directory in ['train', 'test']:
      image_path = 'images/{}'.format(directory)
      xml_df = xml_to_csv(image_path)
      xml_df.to_csv('data/{}_labels.csv'.format(directory), index=None)

    print('Successfully converted xml to csv.')

and run..

  1. Convert dataset to TFRecords
cd tf_object_detection
# get the following script for conversion

Make the following changes to

# NOTE: If you have multiple categories..
def class_text_to_int(row_label):
    if row_label == 'boat':
        return 1
    elif row_label == 'buoy':
        return 2
    elif row_label == 'other':
        return 3
def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
          path = os.getcwd() + '/images/train'
          tf_example = create_tf_example(group, path)

          path = os.getcwd() + '/images/test'
          tf_example = create_tf_example(group, path)

        print('Check the path to {}'.format(group.filename))


    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(output_path))

and run..

python --csv_input=data/train_labels.csv  --output_path=data/train.record
python --csv_input=data/test_labels.csv  --output_path=data/test.record

Model Selection and Preparation

  1. Download the model (.tar.gz) from the table here and extract it to tf_object_detection/

  2. Download the model config file (.config) from here

  • change num_classes to the number of objects in your dataset
  • edit the following chunk:
fine_tune_checkpoint: "ssd_mobilenet_v1_coco_2017_11_17/model.ckpt" # NOTE: enter your model name
train_input_reader: {
  tf_record_input_reader {
    input_path: "data/train.record" # NOTE: path to train.record
  label_map_path: "data/tf_object_detection.pbtxt" # NOTE: path to label map
eval_input_reader: {
  tf_record_input_reader {
    input_path: "data/test.record" # NOTE: path to test.record
  label_map_path: "data/tf_object_detection.pbtxt" # NOTE: path to label map
  shuffle: false
  num_readers: 1
  1. Create labelmap
# from tf_object_detection/data/
nano tf_object_detection.pbtxt

# add the following to tf_object_detection.pbtxt
item {
  id: 1
  name: 'boat'

item {
  id: 2
  name: 'land'

item {
  id: 3
  name: 'miscellaneous'
  1. Training preparation
# from tf_object_detection
cp -r data/ models/research/object_detection/.
cp -r images/ models/research/object_detection/.
cp -r ssd_mobilenet_v1_coco_2017_11_17/ models/research/object_detection/.
cp ssd_mobilenet_v1_coco.config models/research/object_detection/.


  1. Start training!
# from models/research/object_detection/
mkdir training/
python --logtostderr --train_dir=training/ --pipeline_config_path=ssd_mobilenet_v1_coco.config --num_clones=2 --ps_tasks=1
  1. Stop Training
    Stop training once there is no significant decrease in error over the past hour or so. Or add EarlyStopping

  2. Export inference graph

# For trained_checkpoint_prefix, get the latest checkpoint file that has all three files (meta, index, ckpt)
# Change the output_directory name as you wish
python --input_type image_tensor --pipeline_config_path training/ssd_mobilenet_v1_coco.config --trained_checkpoint_prefix training/model.ckpt-9858 --output_directory boat_detection_graph
  1. Test model
# get script for evaluation


# example
# python -i GOPR0293.MP4 -n 7 -m detection_graph_rfcn_resnet101 -l tf_object_detection.pbtxt -o out.MP4
  1. For later use
    Move the events.out*(files used by tensorboard), model.ckpt.*(checkpoint) files, and checkpoint to detection_graph_$XYZ to save it for later use.
mv training/model.ckpt* detection_graph_$XYZ/.
mv training/events.out* detection_graph_$XYZ/.
mv training/checkpoint detection_graph_$XYZ/.
  1. Extras

    • check GPU usage: watch -n 0.5 nvidia-smi
    • check TensorBoard
    cd models/research/object_detection/
    tensorboard --logdir=training/
    # go to the link shown
  2. You can continue training by modifying step 13 by changing the train_dir to detection_graph_$XYZ

python --logtostderr --train_dir=detection_graph_rfcn_resnet101/ --pipeline_config_path=training/rfcn_resnet101_coco.config

To check TensorBoard:

cd models/research/object_detection/
tensorboard --logdir=detection_graph_$XYZ/
# go to the link shown

Fast iterations

  1. If you have got new data, generate the .csv and .record files, repeat steps 2, 4, 5 (edit def class_text_to_int), 6, 10 (change num_classes), 11 and 12 from above.
  2. If you want to try new model, repeat steps 7, 8, 9, 11 (change fine_tune_checkpoint), 12 (copy just the model and .config file in training to models/research/object_detection/), clean up models/research/object_detection/training/ so that it only contains .config files, and 13 (start training with your new config file).
  3. Make sure you have different name for output_directory in step 16 for every new dataset/model


  1. MemoryError or ResourceExhaustedError: change the batch size to something smaller
  2. ImportError: No module named 'deployment' or No module named 'nets'; Fix:
    cd models/research/
    export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
  3. ValueError: not enough values to unpack: change the batch_size in the .config file to match the num_clones
  4. ValueError: Tried to convert 't' to a tensor and failed.:
  • In models/research/object_detection/utils/, change
    rate_index = tf.reduce_max(tf.where(tf.greater_equal(global_step, boundaries), range(num_boundaries), [0] * num_boundaries))
    rate_index = tf.reduce_max(tf.where(tf.greater_equal(global_step, boundaries), list(range(num_boundaries)),
    [0] * num_boundaries))
  • Verify data (xml, csv, tfrecords) thoroughly for malformed dataset
Purpose: Split the dataset into train and test sets
import os
from sklearn.cross_validation import train_test_split
import argparse
from tqdm import tqdm
from shutil import copyfile
argparser = argparse.ArgumentParser(description='Split dataset into train and test set')
argparser.add_argument('-a', '--annotations',
help='path to annotations\' directory')
argparser.add_argument('-i', '--images',
help='path to images\' directory')
argparser.add_argument('-o', '--outputdir',
help='where do you want your train and test directories?')
argparser.add_argument('-s', '--testsize',
help='test set size % (0 to 1)',
args = argparser.parse_args()
# parse arguments
ANNOTATIONS = args.annotations
IMAGES = args.images
OUTPUT_DIR = args.outputdir
TEST_SET_SIZE = float(args.testsize)
# create train and test directories
if not os.path.isdir(os.path.join(OUTPUT_DIR, "train")):
os.makedirs(os.path.join(OUTPUT_DIR, "train"))
print("\nCreated {} directory\n".format(os.path.join(OUTPUT_DIR, "train")))
if not os.path.isdir(os.path.join(OUTPUT_DIR, "test")):
os.makedirs(os.path.join(OUTPUT_DIR, "test"))
print("\nCreated {} directory\n".format(os.path.join(OUTPUT_DIR, "test")))
# get annotations only ending with '.xml'
annots = []
for filename in os.listdir(ANNOTATIONS):
if filename.endswith('.xml'):
# split the data into test and train
X = y = annots
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SET_SIZE, random_state=None)
print("\nTraining set size: ", len(X_train), "\nTest set size: ", len(X_test))
# copy the files according to the split
bad_files = 0
pbar = tqdm(total=len(X_train), position=1, desc="Copying train set..")
for f in X_train:
# copy annotation
copyfile(os.path.join(ANNOTATIONS, f), os.path.join(OUTPUT_DIR, 'train', f))
# copy image
img_file = f.replace(".xml", ".jpg")
copyfile(os.path.join(IMAGES, img_file), os.path.join(OUTPUT_DIR, 'train', img_file))
bad_files += 1
pbar = tqdm(total=len(X_test), position=3, desc="Copying test set..")
for f in X_test:
# copy annotations
copyfile(os.path.join(ANNOTATIONS, f), os.path.join(OUTPUT_DIR, 'test', f))
# copy image
img_file = f.replace(".xml", ".jpg")
copyfile(os.path.join(IMAGES, img_file), os.path.join(OUTPUT_DIR, 'test', img_file))
bad_files += 1
print("\n\nBad files count: ", bad_files)
