AlexeyAB
2018-01-07 0cb81e5f50ab396726a334784db308025d74c2b7
Some stream fixes
5 files modified
27 ■■■■■ changed files
src/cuda.h 1 ●●●● patch | view | raw | blame | history
src/network_kernels.cu 2 ●●●●● patch | view | raw | blame | history
src/yolo_console_dll.cpp 3 ●●●● patch | view | raw | blame | history
src/yolo_v2_class.cpp 4 ●●● patch | view | raw | blame | history
src/yolo_v2_class.hpp 17 ●●●●● patch | view | raw | blame | history
src/cuda.h
@@ -26,6 +26,7 @@
void cuda_push_array(float *x_gpu, float *x, size_t n);
void cuda_pull_array(float *x_gpu, float *x, size_t n);
void cuda_set_device(int n);
int cuda_get_device();
void cuda_free(float *x_gpu);
void cuda_random(float *x_gpu, size_t n);
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
src/network_kernels.cu
@@ -51,6 +51,7 @@
            fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
        }
        l.forward_gpu(l, state);
        cudaStreamSynchronize(get_cuda_stream());
        state.input = l.output_gpu;
    }
}
@@ -392,6 +393,7 @@
float *network_predict_gpu(network net, float *input)
{
    if (net.gpu_index != cuda_get_device())
    cuda_set_device(net.gpu_index);
    int size = get_network_input_size(net) * net.batch;
    network_state state;
src/yolo_console_dll.cpp
@@ -158,7 +158,6 @@
                        det_image = detector.mat_to_image_resize(cur_frame);
                        result_vec = thread_result_vec;
                        result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
#ifdef TRACK_OPTFLOW
                        // track optical flow
                        if (track_optflow_queue.size() > 0) {
@@ -189,7 +188,7 @@
                                //std::vector<bbox_t> result;
                                auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true
                                //Sleep(200);
                                Sleep(50);
                                //Sleep(50);
                                ++fps_det_counter;
                                std::unique_lock<std::mutex> lock(mtx);
                                thread_result_vec = result;
src/yolo_v2_class.cpp
@@ -34,7 +34,7 @@
};
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id)
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id)
{
    int old_gpu_index;
#ifdef GPU
@@ -178,6 +178,7 @@
    int old_gpu_index;
#ifdef GPU
    cudaGetDevice(&old_gpu_index);
    if(cur_gpu_id != old_gpu_index)
    cudaSetDevice(net.gpu_index);
#endif
    //std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
@@ -242,6 +243,7 @@
        free(sized.data);
#ifdef GPU
    if (cur_gpu_id != old_gpu_index)
    cudaSetDevice(old_gpu_index);
#endif
src/yolo_v2_class.hpp
@@ -47,6 +47,7 @@
class Detector {
    std::shared_ptr<void> detector_gpu_ptr;
    std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
    const int cur_gpu_id;
public:
    float nms = .4;
@@ -170,8 +171,8 @@
        sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
        sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21));    // 15, 21, 31
        sync_PyrLKOpticalFlow_gpu->setMaxLevel(5);      // +- 50 ptx
        sync_PyrLKOpticalFlow_gpu->setNumIters(2000);   // def: 30
        sync_PyrLKOpticalFlow_gpu->setMaxLevel(3);      // +- 5 ptx
        sync_PyrLKOpticalFlow_gpu->setNumIters(1000);   // def: 30
        cv::cuda::setDevice(old_gpu_id);
    }
@@ -190,10 +191,9 @@
    void update_tracking_flow(cv::Mat src_mat)
    {
        int const old_gpu_id = cv::cuda::getDevice();
        if (old_gpu_id != gpu_id)
        cv::cuda::setDevice(gpu_id);
        //cv::cuda::Stream stream;
        if (src_mat.channels() == 3) {
            if (src_mat_gpu.cols == 0) {
                src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type());
@@ -203,6 +203,7 @@
            src_mat_gpu.upload(src_mat, stream);
            cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
        }
        if (old_gpu_id != gpu_id)
        cv::cuda::setDevice(old_gpu_id);
    }
@@ -215,19 +216,18 @@
        }
        int const old_gpu_id = cv::cuda::getDevice();
        if(old_gpu_id != gpu_id)
        cv::cuda::setDevice(gpu_id);
        //cv::cuda::Stream stream;
        if (dst_mat_gpu.cols == 0) {
            dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
            dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
            tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
        }
        dst_mat_gpu.upload(dst_mat, stream);
        cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
        if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
@@ -237,6 +237,8 @@
            return cur_bbox_vec;
        }
        //return cur_bbox_vec;
        cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
        for (auto &i : cur_bbox_vec) {
@@ -298,6 +300,7 @@
                    }
        }
        if (old_gpu_id != gpu_id)
        cv::cuda::setDevice(old_gpu_id);
        return result_bbox_vec;