From 0cb81e5f50ab396726a334784db308025d74c2b7 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Sun, 07 Jan 2018 17:44:04 +0000
Subject: [PATCH] Some stream fixes
---
src/yolo_v2_class.cpp | 8 +++++---
src/network_kernels.cu | 4 +++-
src/yolo_console_dll.cpp | 3 +--
src/yolo_v2_class.hpp | 25 ++++++++++++++-----------
src/cuda.h | 1 +
5 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/src/cuda.h b/src/cuda.h
index 31f9092..31577ff 100644
--- a/src/cuda.h
+++ b/src/cuda.h
@@ -26,6 +26,7 @@
void cuda_push_array(float *x_gpu, float *x, size_t n);
void cuda_pull_array(float *x_gpu, float *x, size_t n);
void cuda_set_device(int n);
+int cuda_get_device();
void cuda_free(float *x_gpu);
void cuda_random(float *x_gpu, size_t n);
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index 7a261c5..341c7f8 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -51,6 +51,7 @@
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
}
l.forward_gpu(l, state);
+ cudaStreamSynchronize(get_cuda_stream());
state.input = l.output_gpu;
}
}
@@ -392,7 +393,8 @@
float *network_predict_gpu(network net, float *input)
{
- cuda_set_device(net.gpu_index);
+ if (net.gpu_index != cuda_get_device())
+ cuda_set_device(net.gpu_index);
int size = get_network_input_size(net) * net.batch;
network_state state;
state.index = 0;
diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index 0b47554..abf4bdc 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -158,7 +158,6 @@
det_image = detector.mat_to_image_resize(cur_frame);
result_vec = thread_result_vec;
result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
-
#ifdef TRACK_OPTFLOW
// track optical flow
if (track_optflow_queue.size() > 0) {
@@ -189,7 +188,7 @@
//std::vector<bbox_t> result;
auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true
//Sleep(200);
- Sleep(50);
+ //Sleep(50);
++fps_det_counter;
std::unique_lock<std::mutex> lock(mtx);
thread_result_vec = result;
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index 1777aa2..88d2bc1 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -34,7 +34,7 @@
};
-YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id)
+YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id)
{
int old_gpu_index;
#ifdef GPU
@@ -178,7 +178,8 @@
int old_gpu_index;
#ifdef GPU
cudaGetDevice(&old_gpu_index);
- cudaSetDevice(net.gpu_index);
+ if(cur_gpu_id != old_gpu_index)
+ cudaSetDevice(net.gpu_index);
#endif
//std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
@@ -242,7 +243,8 @@
free(sized.data);
#ifdef GPU
- cudaSetDevice(old_gpu_index);
+ if (cur_gpu_id != old_gpu_index)
+ cudaSetDevice(old_gpu_index);
#endif
return bbox_vec;
diff --git a/src/yolo_v2_class.hpp b/src/yolo_v2_class.hpp
index edb24cb..392b71a 100644
--- a/src/yolo_v2_class.hpp
+++ b/src/yolo_v2_class.hpp
@@ -47,6 +47,7 @@
class Detector {
std::shared_ptr<void> detector_gpu_ptr;
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
+ const int cur_gpu_id;
public:
float nms = .4;
@@ -170,8 +171,8 @@
sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31
- sync_PyrLKOpticalFlow_gpu->setMaxLevel(5); // +- 50 ptx
- sync_PyrLKOpticalFlow_gpu->setNumIters(2000); // def: 30
+ sync_PyrLKOpticalFlow_gpu->setMaxLevel(3); // +- 5 ptx
+ sync_PyrLKOpticalFlow_gpu->setNumIters(1000); // def: 30
cv::cuda::setDevice(old_gpu_id);
}
@@ -190,9 +191,8 @@
void update_tracking_flow(cv::Mat src_mat)
{
int const old_gpu_id = cv::cuda::getDevice();
- cv::cuda::setDevice(gpu_id);
-
- //cv::cuda::Stream stream;
+ if (old_gpu_id != gpu_id)
+ cv::cuda::setDevice(gpu_id);
if (src_mat.channels() == 3) {
if (src_mat_gpu.cols == 0) {
@@ -203,7 +203,8 @@
src_mat_gpu.upload(src_mat, stream);
cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
}
- cv::cuda::setDevice(old_gpu_id);
+ if (old_gpu_id != gpu_id)
+ cv::cuda::setDevice(old_gpu_id);
}
@@ -215,9 +216,8 @@
}
int const old_gpu_id = cv::cuda::getDevice();
- cv::cuda::setDevice(gpu_id);
-
- //cv::cuda::Stream stream;
+ if(old_gpu_id != gpu_id)
+ cv::cuda::setDevice(gpu_id);
if (dst_mat_gpu.cols == 0) {
dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
@@ -225,9 +225,9 @@
tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
}
-
dst_mat_gpu.upload(dst_mat, stream);
+
cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
@@ -237,6 +237,8 @@
return cur_bbox_vec;
}
+ //return cur_bbox_vec;
+
cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
for (auto &i : cur_bbox_vec) {
@@ -298,7 +300,8 @@
}
}
- cv::cuda::setDevice(old_gpu_id);
+ if (old_gpu_id != gpu_id)
+ cv::cuda::setDevice(old_gpu_id);
return result_bbox_vec;
}
--
Gitblit v1.10.0