From 8fa9f44211e5b06a5e4c0859eb31af6a45492222 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Tue, 12 Sep 2017 14:16:21 +0000
Subject: [PATCH] Improved speed of yolo_console_dll.cpp - 40 FPS on 4K using GeForce GTX 960

---
 src/blas_kernels.cu |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index d940176..79fc1c1 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -223,6 +223,7 @@
             local[id] += (i+id < spatial) ? delta[index] : 0;
         }
     }
+	__syncthreads();
 
     if(id == 0){
         mean_delta[filter] = 0;
@@ -251,6 +252,7 @@
             local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0;
         }
     }
+	__syncthreads();
 
     if(id == 0){
         variance_delta[filter] = 0;
@@ -446,6 +448,7 @@
             local[id] += (i+id < spatial) ? x[index] : 0;
         }
     }
+	__syncthreads();
 
     if(id == 0){
         mean[filter] = 0;
@@ -474,6 +477,7 @@
             local[id] += (i+id < spatial) ? pow((x[index] - mean[filter]), 2) : 0;
         }
     }
+	__syncthreads();
 
     if(id == 0){
         variance[filter] = 0;

--
Gitblit v1.10.0