Last active August 15, 2018 07:34
// Wiki page about how to create .pbtxt files:
// This page is referenced to a single script
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
using namespace cv;
using namespace dnn;
const char* classes[] = {"background", "person", "bicycle", "car", "motorcycle",
"airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
"unknown", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "unknown", "backpack",
"umbrella", "unknown", "unknown", "handbag", "tie", "suitcase", "frisbee", "skis",
"snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
"surfboard", "tennis racket", "bottle", "unknown", "wine glass", "cup", "fork", "knife",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
"pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "unknown", "dining table",
"unknown", "unknown", "toilet", "unknown", "tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "unknown",
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
int main(int argc, char** argv) {
Net net = readNetFromTensorflow("ssd_mobilenet_v1_coco.pb", "ssd_mobilenet_v1_coco.pbtxt");
VideoCapture cap(0);
Mat frame, blob;
while (waitKey(1) < 0) {
cap >> frame;
if (frame.empty())
blobFromImage(frame, blob, 1.0 / 127.5, Size(300, 300),
Scalar(127.5, 127.5, 127.5), /*swapRB*/ true, /*crop*/ false);
Mat out = net.forward();
float* detections = (float*);
for (int i = 0; i <; i += 7) {
float confidence = detections[i + 2]; // A value in range [0, 1]
if (confidence > 0.2) {
int classId = int(detections[i + 1]);
int left = (int)(frame.cols * detections[i + 3]);
int top = (int)(frame.rows * detections[i + 4]);
int right = (int)(frame.cols * detections[i + 5]);
int bottom = (int)(frame.rows * detections[i + 6]);
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0));
std::string label = format("%s: %.2f", classes[classId], confidence);
int baseLine = 0;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
rectangle(frame, Point(left, top - labelSize.height),
Point(left + labelSize.width, top + baseLine),
Scalar(255, 255, 255), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
imshow("MobileNet-SSD using OpenCV", frame);
return 0;
