~speedprog/mtg/mtg_card_detector.git

parent: b202eaee | patch | commit | show whitespace

Fixed - use individual track_id for each class of object

AlexeyAB

2017-08-08 4528f9b4b49dc701d3de7b38fa59c17c41702679

Fixed - use individual track_id for each class of object

3 files modified

	src/yolo_console_dll.cpp	13 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo_v2_class.cpp	14 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo_v2_class.hpp	2 ●●●●● patch \| view \| raw \| blame \| history

 src/yolo_console_dll.cpp

@@ -31,9 +31,10 @@
            std::string obj_name = obj_names[i.obj_id];
            if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id);
            cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0);
            size_t const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
            if(i.x > 3 && (i.x + max_width) < mat_img.cols && i.y > 30 && i.y < mat_img.rows)
                cv::rectangle(mat_img, cv::Point2f(i.x - 3, i.y - 30), cv::Point2f(i.x + max_width, i.y), color, CV_FILLED, 8, 0);
            int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
            cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 3, 0), std::max((int)i.y - 30, 0)), 
                cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)), 
                color, CV_FILLED, 8, 0);
            putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2);
        }
    }
@@ -79,7 +80,9 @@
#ifdef OPENCV
            std::string const file_ext = filename.substr(filename.find_last_of(".") + 1);
            std::string const protocol = filename.substr(0, 4);
            if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" || protocol == "rtsp") {  // video file
            if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" ||    // video file
                protocol == "rtsp" || protocol == "http")   // video network stream
            {
                cv::Mat frame, prev_frame, det_frame;
                std::vector<bbox_t> result_vec, thread_result_vec;
                detector.nms = 0.02;    // comment it - if track_id is not required
@@ -98,7 +101,7 @@
                        show_result(result_vec, obj_names);
                    }
                    prev_frame = frame;
                    //if(protocol == "rtsp") while (!ready_flag) cap.grab();    // use if cam-fps 2x or more than dnn-fps
                    //if (protocol == "rtsp" || protocol == "http") do { cap.grab(); } while (!ready_flag); // use if cam-fps 2x or more than dnn-fps
                    ready_flag = false;
                }
            }

 src/yolo_v2_class.cpp

@@ -30,6 +30,7 @@
    float *avg;
    float *predictions[FRAMES];
    int demo_index;
    unsigned int *track_id;
};


@@ -71,6 +72,9 @@
    detector_gpu.probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *));
    for (j = 0; j < l.w*l.h*l.n; ++j) detector_gpu.probs[j] = (float *)calloc(l.classes, sizeof(float));

    detector_gpu.track_id = (unsigned int *)calloc(l.classes, sizeof(unsigned int));
    for (j = 0; j < l.classes; ++j) detector_gpu.track_id[j] = 1;

#ifdef GPU
    cudaSetDevice(old_gpu_index);
#endif
@@ -82,6 +86,8 @@
    detector_gpu_t &detector_gpu = *reinterpret_cast<detector_gpu_t *>(detector_gpu_ptr.get());
    layer l = detector_gpu.net.layers[detector_gpu.net.n - 1];

    free(detector_gpu.track_id);

    free(detector_gpu.avg);
    for (int j = 0; j < FRAMES; ++j) free(detector_gpu.predictions[j]);
    for (int j = 0; j < FRAMES; ++j) if(detector_gpu.images[j].data) free(detector_gpu.images[j].data);
@@ -244,16 +250,16 @@

YOLODLL_API std::vector<bbox_t> Detector::tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story)
{
    detector_gpu_t &det_gpu = *reinterpret_cast<detector_gpu_t *>(detector_gpu_ptr.get());

    bool prev_track_id_present = false;
    for (auto &i : prev_bbox_vec_deque)
        if (i.size() > 0) prev_track_id_present = true;

    static unsigned int track_id = 1;

    if (!prev_track_id_present) {
        //track_id = 1;
        for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
            cur_bbox_vec[i].track_id = track_id++;
            cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++;
        prev_bbox_vec_deque.push_front(cur_bbox_vec);
        if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back();
        return cur_bbox_vec;
@@ -287,7 +293,7 @@

    for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
        if (cur_bbox_vec[i].track_id == 0)
            cur_bbox_vec[i].track_id = track_id++;
            cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++;

    prev_bbox_vec_deque.push_front(cur_bbox_vec);
    if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back();

 src/yolo_v2_class.hpp

@@ -54,7 +54,7 @@
    YOLODLL_API int get_net_width();
    YOLODLL_API int get_net_height();

    YOLODLL_API std::vector<bbox_t> tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story = 4);
    YOLODLL_API std::vector<bbox_t> tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story = 6);

#ifdef OPENCV
    std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)

			@@ -31,9 +31,10 @@
			std::string obj_name = obj_names[i.obj_id];
			if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id);
			cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0);
			size_t const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
			if(i.x > 3 && (i.x + max_width) < mat_img.cols && i.y > 30 && i.y < mat_img.rows)
			cv::rectangle(mat_img, cv::Point2f(i.x - 3, i.y - 30), cv::Point2f(i.x + max_width, i.y), color, CV_FILLED, 8, 0);
			int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
			cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 3, 0), std::max((int)i.y - 30, 0)),
			cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)),
			color, CV_FILLED, 8, 0);
			putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2);
			}
			}
			@@ -79,7 +80,9 @@
			#ifdef OPENCV
			std::string const file_ext = filename.substr(filename.find_last_of(".") + 1);
			std::string const protocol = filename.substr(0, 4);
			if (file_ext == "avi" \|\| file_ext == "mp4" \|\| file_ext == "mjpg" \|\| file_ext == "mov" \|\| protocol == "rtsp") { // video file
			if (file_ext == "avi" \|\| file_ext == "mp4" \|\| file_ext == "mjpg" \|\| file_ext == "mov" \|\| // video file
			protocol == "rtsp" \|\| protocol == "http") // video network stream
			{
			cv::Mat frame, prev_frame, det_frame;
			std::vector<bbox_t> result_vec, thread_result_vec;
			detector.nms = 0.02; // comment it - if track_id is not required
			@@ -98,7 +101,7 @@
			show_result(result_vec, obj_names);
			}
			prev_frame = frame;
			//if(protocol == "rtsp") while (!ready_flag) cap.grab(); // use if cam-fps 2x or more than dnn-fps
			//if (protocol == "rtsp" \|\| protocol == "http") do { cap.grab(); } while (!ready_flag); // use if cam-fps 2x or more than dnn-fps
			ready_flag = false;
			}
			}

			@@ -30,6 +30,7 @@
			float *avg;
			float *predictions[FRAMES];
			int demo_index;
			unsigned int *track_id;
			};


			@@ -71,6 +72,9 @@
			detector_gpu.probs = (float *)calloc(l.wl.hl.n, sizeof(float ));
			for (j = 0; j < l.wl.hl.n; ++j) detector_gpu.probs[j] = (float *)calloc(l.classes, sizeof(float));

			detector_gpu.track_id = (unsigned int *)calloc(l.classes, sizeof(unsigned int));
			for (j = 0; j < l.classes; ++j) detector_gpu.track_id[j] = 1;

			#ifdef GPU
			cudaSetDevice(old_gpu_index);
			#endif
			@@ -82,6 +86,8 @@
			detector_gpu_t &detector_gpu = reinterpret_cast<detector_gpu_t >(detector_gpu_ptr.get());
			layer l = detector_gpu.net.layers[detector_gpu.net.n - 1];

			free(detector_gpu.track_id);

			free(detector_gpu.avg);
			for (int j = 0; j < FRAMES; ++j) free(detector_gpu.predictions[j]);
			for (int j = 0; j < FRAMES; ++j) if(detector_gpu.images[j].data) free(detector_gpu.images[j].data);
			@@ -244,16 +250,16 @@

			YOLODLL_API std::vector<bbox_t> Detector::tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story)
			{
			detector_gpu_t &det_gpu = reinterpret_cast<detector_gpu_t >(detector_gpu_ptr.get());

			bool prev_track_id_present = false;
			for (auto &i : prev_bbox_vec_deque)
			if (i.size() > 0) prev_track_id_present = true;

			static unsigned int track_id = 1;

			if (!prev_track_id_present) {
			//track_id = 1;
			for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
			cur_bbox_vec[i].track_id = track_id++;
			cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++;
			prev_bbox_vec_deque.push_front(cur_bbox_vec);
			if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back();
			return cur_bbox_vec;
			@@ -287,7 +293,7 @@

			for (size_t i = 0; i < cur_bbox_vec.size(); ++i)
			if (cur_bbox_vec[i].track_id == 0)
			cur_bbox_vec[i].track_id = track_id++;
			cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++;

			prev_bbox_vec_deque.push_front(cur_bbox_vec);
			if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back();

			@@ -54,7 +54,7 @@
			YOLODLL_API int get_net_width();
			YOLODLL_API int get_net_height();

			YOLODLL_API std::vector<bbox_t> tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story = 4);
			YOLODLL_API std::vector<bbox_t> tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story = 6);

			#ifdef OPENCV
			std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)