AlexeyAB
2017-09-12 8fa9f44211e5b06a5e4c0859eb31af6a45492222
Improved speed of yolo_console_dll.cpp - 40 FPS on 4K using GeForce GTX 960
3 files modified
47 ■■■■■ changed files
src/yolo_console_dll.cpp 14 ●●●●● patch | view | raw | blame | history
src/yolo_v2_class.cpp 4 ●●●● patch | view | raw | blame | history
src/yolo_v2_class.hpp 29 ●●●● patch | view | raw | blame | history
src/yolo_console_dll.cpp
@@ -101,6 +101,7 @@
                protocol == "rtsp://" || protocol == "http://" || protocol == "https:/")    // video network stream
            {
                cv::Mat cap_frame, cur_frame, det_frame, write_frame;
                std::shared_ptr<image_t> det_image;
                std::vector<bbox_t> result_vec, thread_result_vec;
                detector.nms = 0.02;    // comment it - if track_id is not required
                std::atomic<bool> consumed, videowrite_ready;
@@ -116,9 +117,10 @@
                std::chrono::steady_clock::time_point steady_start, steady_end;
                cv::VideoCapture cap(filename); cap >> cur_frame;
                int const video_fps = cap.get(CV_CAP_PROP_FPS);
                cv::Size const frame_size = cur_frame.size();
                cv::VideoWriter output_video;
                if (save_output_videofile)
                    output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), cur_frame.size(), true);
                    output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true);
                while (!cur_frame.empty()) {
                    if (t_cap.joinable()) {
@@ -132,7 +134,7 @@
                    if(consumed)
                    {
                        std::unique_lock<std::mutex> lock(mtx);
                        cur_frame.copyTo(det_frame);
                        det_image = detector.mat_to_image_resize(cur_frame);
                        result_vec = thread_result_vec;
                        result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
                        consumed = false;
@@ -140,14 +142,14 @@
                    // launch thread once
                    if (!t_detect.joinable()) {
                        t_detect = std::thread([&]() {
                            cv::Mat current_mat = det_frame.clone();
                            auto current_image = det_image;
                            consumed = true;
                            while (!current_mat.empty()) {
                                auto result = detector.detect(current_mat, 0.24, true);
                            while (current_image.use_count() > 0) {
                                auto result = detector.detect_resized(*current_image, frame_size, 0.24, true);
                                ++fps_det_counter;
                                std::unique_lock<std::mutex> lock(mtx);
                                thread_result_vec = result;
                                current_mat = det_frame.clone();
                                current_image = det_image;
                                consumed = true;
                                cv.notify_all();
                            }
src/yolo_v2_class.cpp
@@ -109,11 +109,11 @@
#endif
}
YOLODLL_API int Detector::get_net_width() {
YOLODLL_API int Detector::get_net_width() const {
    detector_gpu_t &detector_gpu = *reinterpret_cast<detector_gpu_t *>(detector_gpu_ptr.get());
    return detector_gpu.net.w;
}
YOLODLL_API int Detector::get_net_height() {
YOLODLL_API int Detector::get_net_height() const {
    detector_gpu_t &detector_gpu = *reinterpret_cast<detector_gpu_t *>(detector_gpu_ptr.get());
    return detector_gpu.net.h;
}
src/yolo_v2_class.hpp
@@ -51,8 +51,8 @@
    YOLODLL_API std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
    static YOLODLL_API image_t load_image(std::string image_filename);
    static YOLODLL_API void free_image(image_t m);
    YOLODLL_API int get_net_width();
    YOLODLL_API int get_net_height();
    YOLODLL_API int get_net_width() const;
    YOLODLL_API int get_net_height() const;
    YOLODLL_API std::vector<bbox_t> tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story = 6);
@@ -60,14 +60,27 @@
    std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)
    {
        if(mat.data == NULL)
            throw std::runtime_error("file not found");
            throw std::runtime_error("Image is empty");
        auto image_ptr = mat_to_image_resize(mat);
        return detect_resized(*image_ptr, mat.size(), thresh, use_mean);
    }
    std::vector<bbox_t> detect_resized(image_t img, cv::Size init_size, float thresh = 0.2, bool use_mean = false)
    {
        if (img.data == NULL)
            throw std::runtime_error("Image is empty");
        auto detection_boxes = detect(img, thresh, use_mean);
        float wk = (float)init_size.width / img.w, hk = (float)init_size.height / img.h;
        for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk;
        return detection_boxes;
    }
    std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const
    {
        if (mat.data == NULL) return std::shared_ptr<image_t>(NULL);
        cv::Mat det_mat;
        cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));
        auto image_ptr = mat_to_image(det_mat);
        auto detection_boxes = detect(*image_ptr, thresh, use_mean);
        float wk = (float)mat.cols / det_mat.cols, hk = (float)mat.rows / det_mat.rows;
        for (auto &i : detection_boxes) i.x*=wk, i.w*= wk, i.y*=hk, i.h*=hk;
        return detection_boxes;
        return mat_to_image(det_mat);
    }
    static std::shared_ptr<image_t> mat_to_image(cv::Mat img)