Skip to content

Instantly share code, notes, and snippets.

@harshilpatel312
Last active October 3, 2023 02:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save harshilpatel312/ff08b49fd71a3eeaeb209c91de3dfde1 to your computer and use it in GitHub Desktop.
Save harshilpatel312/ff08b49fd71a3eeaeb209c91de3dfde1 to your computer and use it in GitHub Desktop.
Multiple object tracker (OpenCV + Python)

Prerequisites

  1. Install opencv
pip install opencv-python
pip install opencv-contrib-python

Usage

  1. Clone multiobjecttracker.py and pascal_voc_io.py.
wget https://gist.githubusercontent.com/harshilpatel312/ff08b49fd71a3eeaeb209c91de3dfde1/raw/726e0b2086068d959d79902e49663191533a66e7/multiobjecttracker.py

wget https://gist.githubusercontent.com/harshilpatel312/ff08b49fd71a3eeaeb209c91de3dfde1/raw/726e0b2086068d959d79902e49663191533a66e7/pascal_voc_io.py
  1. To start the tracker, run
python multiobjecttracker.py --input=$(PATH_TO_VIDEO) --dir=$(PATH_TO_OUTPUT_DIR) --frame=$(FRAME_NUMBER_TO_START_DETECTION_FROM)
# example: `python multiobjecttracker.py --input=/home/hkp/projects/tracker/data/video.MP4 --dir=data_tracker/ --frame=1`
  1. The script will open the video frame mentioned in the --frame argument above. You can annotate as many objects you want to.

  2. Annotate procedure is as follows:

    4.1 Draw bbox around an object

    4.2 Switch to terminal

    4.3 Label the object

    4.4 Switch to opencv video's

    4.5 Repeat 4.1 to 4.4 to label more objects. If there are no more objects to label, press ESC/ENTER key on opencv window, and press ENTER key on terminal when it asks you to label.

  3. To track, switch to opencv window, keep pressing ENTER key to track the object frame by frame. (Toggle between fast and slow tracking by pressing 'e' and 'w' respectively).

  4. If the tracker is misbehaving, press '0' (zero) key and relabel the objects as shown in 4. It is recommended that you slow down the tracker by pressing 'w' and then press '0' to relabel.

  5. To exit the tracking interface, press ESC on opencv window and type 'y' on terminal.

Verification

  1. Clone verification.py.
wget https://gist.githubusercontent.com/harshilpatel312/ff08b49fd71a3eeaeb209c91de3dfde1/raw/73c05f1df86e5d4743265ffb4c13a4be3b00572a/verification.py
  1. Run:
python verification.py --directory=$(DATA_DIRECTORY) --output=something.mp4
  1. Check something.mp4 to see if data is acceptable.
'''
Purpose: Easy video labeling using tracking
'''
import numpy as np
import cv2
import os
from collections import defaultdict
from pascal_voc_io import XMLWriter
import argparse
from tqdm import tqdm
argparser = argparse.ArgumentParser(description='Multitracker for objects in the video')
argparser.add_argument('-i','--input',
help='input video (.mp4)')
argparser.add_argument('-f','--frame',
help='frame number to start annotation from',
default='1')
argparser.add_argument('-d','--dir',
help='relative path to output directory')
args = argparser.parse_args()
INPUT_VIDEO = args.input
OUTPUT_DIR = args.dir
FRAME_NUMBER = int(args.frame)
#############################################
# open video handle
frame = FRAME_NUMBER
cap = cv2.VideoCapture(INPUT_VIDEO)
for i in tqdm(range(frame)):
ret, image = cap.read()
# check if output path exists
if not os.path.isdir(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
print("Created {} directory\n".format(OUTPUT_DIR))
# initialize multitracker
tracker = cv2.MultiTracker_create()
tracker_list = []
init_once = False
# draw bboxs around objects and label them
while(True):
bbox = cv2.selectROI('tracking', image)
label = input("Label the object: ")
if not label:
break
tracker_list.append([label, bbox])
waitkey = 0
while cap.isOpened():
ret, image = cap.read()
if ret:
# press 'esc' to exit
if cv2.waitKey(waitkey) == 27:
# just make sure
ans = input("\nDo you want to quit? [y/N] ")
if ans == 'y' or ans == 'Y':
break
else:
continue
# press 'f' to print out current frame number
if cv2.waitKey(waitkey) == 102:
print("Frame number: ", frame)
# press 'w' to switch waitkey to 0
if cv2.waitKey(waitkey) == 119:
print("\nChanging waitkey to 0\n")
waitkey = 0
# press 'e' to switch waitkey to 1
if cv2.waitKey(waitkey) == 101:
print("\nChanging waitkey to 1\n")
waitkey = 1
# if 'DEL' pressed, reinitialize tracker (+ bboxs and labels)
if cv2.waitKey(waitkey) == 48:
tracker_list = []
tracker = cv2.MultiTracker_create()
init_once = False
while(True):
bbox = cv2.selectROI('tracking', image)
label = input("Label the object: ")
if not label:
break
tracker_list.append([label, bbox])
# add bboxs for tracking
if not init_once:
for label, bbox in tracker_list:
tracker.add(cv2.TrackerMIL_create(), image, bbox)
init_once = True
# update tracker
ret, boxes = tracker.update(image)
try:
for idx, item in enumerate(tracker_list):
item[1] = list(boxes[idx])
except:
print('tracker_list problem')
# write .jpg and .xml to disk
filename = str(frame).rjust(5, '0') + '.jpg'
cv2.imwrite(OUTPUT_DIR + filename, image)
XMLWriter(folder=OUTPUT_DIR,
filename=filename,
imgSize=image.shape,
localImgPath=OUTPUT_DIR + filename,
detection_info=tracker_list)
# draw tracker bboxs on image
for newbox in boxes:
p1 = (int(newbox[0]), int(newbox[1]))
p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3]))
cv2.rectangle(image, p1, p2, (200,0,0))
cv2.imshow('tracking', image)
frame += 1
#!/usr/bin/env python
# -*- coding: utf8 -*-
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
import codecs
XML_EXT = '.xml'
ENCODE_METHOD = 'utf-8'
class PascalVocReader:
def __init__(self):
# shapes type:
# [label, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color, difficult]
self.verified = False
def addShape(self, label, bndbox, difficult):
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
self.shapes.append((label, points, None, None, difficult))
def parseXML(self, filepath):
self.shapes = []
assert filepath.endswith(XML_EXT), "Unsupport file format"
parser = etree.XMLParser(encoding=ENCODE_METHOD)
xmltree = ElementTree.parse(filepath, parser=parser).getroot()
filename = xmltree.find('filename').text
try:
verified = xmltree.attrib['verified']
if verified == 'yes':
self.verified = True
except KeyError:
self.verified = False
for object_iter in xmltree.findall('object'):
bndbox = object_iter.find("bndbox")
label = object_iter.find('name').text
# Add chris
difficult = False
if object_iter.find('difficult') is not None:
difficult = bool(int(object_iter.find('difficult').text))
self.addShape(label, bndbox, difficult)
return self.shapes
class PascalVocWriter:
def __init__(self, foldername, filename, imgSize,databaseSrc='Unknown', localImgPath=None):
self.foldername = foldername
self.filename = filename
self.databaseSrc = databaseSrc
self.imgSize = imgSize
self.boxlist = []
self.localImgPath = localImgPath
self.verified = False
def prettify(self, elem):
"""
Return a pretty-printed XML string for the Element.
"""
rough_string = ElementTree.tostring(elem, 'utf8')
root = etree.fromstring(rough_string)
return etree.tostring(root, pretty_print=True, encoding=ENCODE_METHOD).replace(" ".encode(), "\t".encode())
# minidom does not support UTF-8
'''reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent="\t", encoding=ENCODE_METHOD)'''
def genXML(self):
"""
Return XML root
"""
# Check conditions
if self.filename is None or \
self.foldername is None or \
self.imgSize is None:
return None
top = Element('annotation')
if self.verified:
top.set('verified', 'yes')
folder = SubElement(top, 'folder')
folder.text = self.foldername
filename = SubElement(top, 'filename')
filename.text = self.filename
if self.localImgPath is not None:
localImgPath = SubElement(top, 'path')
localImgPath.text = self.localImgPath
source = SubElement(top, 'source')
database = SubElement(source, 'database')
database.text = self.databaseSrc
size_part = SubElement(top, 'size')
width = SubElement(size_part, 'width')
height = SubElement(size_part, 'height')
depth = SubElement(size_part, 'depth')
width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0])
if len(self.imgSize) == 3:
depth.text = str(self.imgSize[2])
else:
depth.text = '1'
segmented = SubElement(top, 'segmented')
segmented.text = '0'
return top
def addBndBox(self, xmin, ymin, xmax, ymax, name, difficult):
bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
bndbox['name'] = name
bndbox['difficult'] = difficult
self.boxlist.append(bndbox)
def appendObjects(self, top):
for each_object in self.boxlist:
object_item = SubElement(top, 'object')
name = SubElement(object_item, 'name')
try:
name.text = unicode(each_object['name'])
except NameError:
# Py3: NameError: name 'unicode' is not defined
name.text = each_object['name']
pose = SubElement(object_item, 'pose')
pose.text = "Unspecified"
truncated = SubElement(object_item, 'truncated')
if int(each_object['ymax']) == int(self.imgSize[0]) or (int(each_object['ymin'])== 1):
truncated.text = "1" # max == height or min
elif (int(each_object['xmax'])==int(self.imgSize[1])) or (int(each_object['xmin'])== 1):
truncated.text = "1" # max == width or min
else:
truncated.text = "0"
difficult = SubElement(object_item, 'difficult')
difficult.text = str( bool(each_object['difficult']) & 1 )
bndbox = SubElement(object_item, 'bndbox')
xmin = SubElement(bndbox, 'xmin')
xmin.text = str(each_object['xmin'])
ymin = SubElement(bndbox, 'ymin')
ymin.text = str(each_object['ymin'])
xmax = SubElement(bndbox, 'xmax')
xmax.text = str(each_object['xmax'])
ymax = SubElement(bndbox, 'ymax')
ymax.text = str(each_object['ymax'])
def save(self, targetFile=None):
root = self.genXML()
self.appendObjects(root)
out_file = None
if targetFile is None:
out_file = codecs.open(
self.filename + XML_EXT, 'w', encoding=ENCODE_METHOD)
else:
out_file = codecs.open(targetFile, 'w', encoding=ENCODE_METHOD)
prettifyResult = self.prettify(root)
out_file.write(prettifyResult.decode('utf8'))
out_file.close()
def XMLWriter(folder, filename, imgSize, localImgPath, detection_info):
'''
Write XML based on detection information to disk
:param folder: directory to save the generated xml file
:param filename: image file's name
:param imgSize: image's size
:param localImgPath: folder + filename
:param detection_info: list of lists with detection information
'''
# instantiate PascalVocWriter with params
writer = PascalVocWriter(folder, filename, imgSize, localImgPath=localImgPath)
# make bboxs from detection_info
difficult = 0
for category, bbox in detection_info:
writer.addBndBox(int(bbox[0]), int(bbox[1]), int(bbox[0]) + int(bbox[2]), int(bbox[1]) + int(bbox[3]), category, difficult)
# write to disk
writer.save(folder + filename.replace('.jpg', '.xml'))
'''
Purpose: Given images and corresponding annotations, this script makes a video of the labelled data so its easy to visualize all your data.
'''
from pascal_voc_io import PascalVocReader
import cv2
from glob import glob
from tqdm import tqdm
import numpy as np
import argparse
argparser = argparse.ArgumentParser(description='Test data validity')
argparser.add_argument('-d','--directory',
help='path to directory containing images + annotations')
argparser.add_argument('-o','--output',
help='output file (.mp4)')
args = argparser.parse_args()
DIR = args.directory
OUTPUT_FILE = args.output
# instantiate the reader
reader = PascalVocReader()
# get all images and annotations
xmls = glob(DIR + '*.xml')
imgs = glob(DIR + '*.jpg')
# set width and height for the output video
frame_w = 1920
frame_h = 1080
video_writer = cv2.VideoWriter(OUTPUT_FILE,
cv2.VideoWriter_fourcc(*'MPEG'),
50.0,
(frame_w, frame_h))
# display the images+annnots and write them to a video
for f in tqdm(sorted(xmls)):
# get bboxs from xml
bboxs = reader.parseXML(f)
# get corresponding image file
img_file = f.split(".")[0] + ".jpg"
img = cv2.imread(img_file)
# for each bbox in xml, draw the bbox on image, and write image to video
for bbox in bboxs:
box = bbox[1]
cv2.line(img,box[0],box[1],(0,255,0),2)
cv2.line(img,box[1],box[2],(0,255,0),2)
cv2.line(img,box[2],box[3],(0,255,0),2)
cv2.line(img,box[3],box[0],(0,255,0),2)
cv2.putText(img, bbox[0], box[0], cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 2, cv2.LINE_AA)
video_writer.write(np.uint8(img))
# cv2.imshow("im", img)
# cv2.waitKey(0)
video_writer.release()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment