From 3659d84f24ddc95102483cca430e01dc05568cbb Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 31 Mar 2017 18:42:51 +0000
Subject: [PATCH] Added tracking: numerating the detected objects on video

---
 src/yolo_v2_class.cpp    |    3 +
 src/yolo_console_dll.cpp |   44 ++++++++++++++++-----
 src/yolo_v2_class.hpp    |   57 ++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index 4938ba8..5172b5e 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -1,9 +1,10 @@
 #include <iostream>
+#include <iomanip> 
 #include <string>
 #include <vector>
 #include <fstream>
 
-//#define OPENCV
+#define OPENCV
 
 #include "yolo_v2_class.hpp"	// imported functions from DLL
 
@@ -13,21 +14,27 @@
 #pragma comment(lib, "opencv_core249.lib")
 #pragma comment(lib, "opencv_imgproc249.lib")
 #pragma comment(lib, "opencv_highgui249.lib")
-void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec) {
+void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std::string> obj_names, unsigned int wait_msec = 0) {
 	for (auto &i : result_vec) {
-		cv::rectangle(mat_img, cv::Rect(i.x, i.y, i.w, i.h), cv::Scalar(50, 200, 50), 3);
+		cv::Scalar color(60, 160, 260);
+		cv::rectangle(mat_img, cv::Rect(i.x, i.y, i.w, i.h), color, 3);
+		if(obj_names.size() > i.obj_id)
+			putText(mat_img, obj_names[i.obj_id], cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, color);
+		if(i.track_id > 0)
+			putText(mat_img, std::to_string(i.track_id), cv::Point2f(i.x+5, i.y + 15), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, color);
 	}
 	cv::imshow("window name", mat_img);
-	cv::waitKey(0);
+	cv::waitKey(wait_msec);
 }
 #endif	// OPENCV
 
+
 void show_result(std::vector<bbox_t> const result_vec, std::vector<std::string> const obj_names) {
 	for (auto &i : result_vec) {
 		if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - ";
-		std::cout << "obj_id = " << i.obj_id << " - x = " << i.x << ", y = " << i.y 
+		std::cout << "obj_id = " << i.obj_id << ",  x = " << i.x << ", y = " << i.y 
 			<< ", w = " << i.w << ", h = " << i.h
-			<< ", prob = " << i.prob << std::endl;
+			<< std::setprecision(3) << ", prob = " << i.prob << std::endl;
 	}
 }
 
@@ -50,23 +57,38 @@
 	while (true) 
 	{
 		std::string filename;
-		std::cout << "input image filename: ";
+		std::cout << "input image or video filename: ";
 		std::cin >> filename;
 		if (filename.size() == 0) break;
 		
 		try {
 #ifdef OPENCV
-			cv::Mat mat_img = cv::imread(filename);
-			std::vector<bbox_t> result_vec = detector.detect(mat_img);
-			draw_boxes(mat_img, result_vec);
+			std::string const file_ext = filename.substr(filename.find_last_of(".") + 1);
+			if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov") {	// video file
+				cv::Mat frame;
+				detector.nms = 0.02;	// comment it - if track_id is not required
+				for(cv::VideoCapture cap(filename); cap >> frame, cap.isOpened();) {
+					std::vector<bbox_t> result_vec = detector.detect(frame, 0.2);
+					result_vec = detector.tracking(result_vec);	// comment it - if track_id is not required
+
+					draw_boxes(frame, result_vec, obj_names, 3);
+					show_result(result_vec, obj_names);
+				}
+			}
+			else {	// image file
+				cv::Mat mat_img = cv::imread(filename);
+				std::vector<bbox_t> result_vec = detector.detect(mat_img);
+				draw_boxes(mat_img, result_vec, obj_names);
+				show_result(result_vec, obj_names);
+			}
 #else
 			//std::vector<bbox_t> result_vec = detector.detect(filename);
 
 			auto img = detector.load_image(filename);
 			std::vector<bbox_t> result_vec = detector.detect(img);
 			detector.free_image(img);
-#endif
 			show_result(result_vec, obj_names);
+#endif			
 		}
 		catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); }
 		catch (...) { std::cerr << "unknown exception \n"; getchar(); }
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index ea13ea3..8643a22 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -154,7 +154,7 @@
 	cudaSetDevice(net.gpu_index);
 	//std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
 
-	float nms = .4;
+	//float nms = .4;
 
 	image im;
 	im.c = img.c;
@@ -189,6 +189,7 @@
 			bbox.h = b.h*im.h;
 			bbox.obj_id = obj_id;
 			bbox.prob = prob;
+			bbox.track_id = 0;
 
 			bbox_vec.push_back(bbox);
 		}
diff --git a/src/yolo_v2_class.hpp b/src/yolo_v2_class.hpp
index e3d7933..37fcd61 100644
--- a/src/yolo_v2_class.hpp
+++ b/src/yolo_v2_class.hpp
@@ -1,6 +1,8 @@
 #pragma once
 #include <memory>
 #include <vector>
+#include <deque>
+#include <algorithm>
 
 #ifdef OPENCV
 #include <opencv2/opencv.hpp>			// C++
@@ -18,6 +20,7 @@
 	unsigned int x, y, w, h;	// (x,y) - top-left corner, (w, h) - width & height of bounded box
 	float prob;					// confidence - probability that the object was found correctly
 	unsigned int obj_id;		// class of object - from range [0, classes-1]
+	unsigned int track_id;		// tracking id for video (0 - untracked, 1 - inf - tracked object)
 };
 
 struct image_t {
@@ -31,6 +34,7 @@
 class Detector {
 	std::shared_ptr<void> detector_gpu_ptr;
 public:
+	float nms = .4;
 
 	YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
 	YOLODLL_API ~Detector();
@@ -107,6 +111,59 @@
 	}
 
 #endif	// OPENCV
+
+	std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
+
+public:
+	std::vector<bbox_t> tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story = 4)
+	{
+		bool prev_track_id_present = false;
+		for (auto &i : prev_bbox_vec_deque)
+			if (i.size() > 0) prev_track_id_present = true;
+
+		static unsigned int track_id = 1;
+
+		if(!prev_track_id_present) {
+			//track_id = 1;
+			for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
+				cur_bbox_vec[i].track_id = track_id++;
+			prev_bbox_vec_deque.push_front(cur_bbox_vec);
+			if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back();
+			return cur_bbox_vec;
+		}
+
+		std::vector<unsigned int> dist_vec(cur_bbox_vec.size(), std::numeric_limits<unsigned int>::max());
+
+		for (auto &prev_bbox_vec : prev_bbox_vec_deque) {
+			for (auto &i : prev_bbox_vec) {
+				int cur_index = -1;
+				for (size_t m = 0; m < cur_bbox_vec.size(); ++m) {
+					bbox_t const& k = cur_bbox_vec[m];
+					if (i.obj_id == k.obj_id) {
+						unsigned int cur_dist = sqrt(((float)i.x - k.x)*((float)i.x - k.x) + ((float)i.y - k.y)*((float)i.y - k.y));
+						if (cur_dist < 100 && (k.track_id == 0 || dist_vec[m] > cur_dist)) {
+							dist_vec[m] = cur_dist;
+							cur_index = m;
+						}
+					}
+				}
+
+				bool track_id_absent = !std::any_of(cur_bbox_vec.begin(), cur_bbox_vec.end(), [&](bbox_t const& b) { return b.track_id == i.track_id; });
+
+				if (cur_index >= 0 && track_id_absent)
+					cur_bbox_vec[cur_index].track_id = i.track_id;
+			}
+		}
+
+		for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
+			if (cur_bbox_vec[i].track_id == 0)
+				cur_bbox_vec[i].track_id = track_id++;
+
+		prev_bbox_vec_deque.push_front(cur_bbox_vec);
+		if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back();
+
+		return cur_bbox_vec;
+	}
 };
 
 

--
Gitblit v1.10.0