From 51477ab274bbb40be259844bdaab9685f693b028 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 24 Jul 2017 23:40:23 +0000
Subject: [PATCH] Increased 2.3 times performance of Yolo on CPU by using OpenMP in both darknet_no_gpu and yolo_cpp_dll_no_gpu

---
 build/darknet/yolo_cpp_dll_no_gpu.vcxproj |    1 +
 src/gemm.c                                |   21 +++++++++++++--------
 build/darknet/darknet_no_gpu.vcxproj      |    2 ++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/build/darknet/darknet_no_gpu.vcxproj b/build/darknet/darknet_no_gpu.vcxproj
index 3f4681d..6da02a2 100644
--- a/build/darknet/darknet_no_gpu.vcxproj
+++ b/build/darknet/darknet_no_gpu.vcxproj
@@ -89,6 +89,7 @@
       <AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>_MBCS;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions>
+      <OpenMPSupport>true</OpenMPSupport>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -133,6 +134,7 @@
       <PrecompiledHeaderCompileAs>CompileAsCpp</PrecompiledHeaderCompileAs>
       <CompileAs>Default</CompileAs>
       <UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions>
+      <OpenMPSupport>true</OpenMPSupport>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
diff --git a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj
index 96afb0d..2b746f5 100644
--- a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj
+++ b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj
@@ -139,6 +139,7 @@
       <UndefinePreprocessorDefinitions>
       </UndefinePreprocessorDefinitions>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <OpenMPSupport>true</OpenMPSupport>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
diff --git a/src/gemm.c b/src/gemm.c
index 3003be0..a4db8a4 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -151,14 +151,19 @@
             C[i*ldc + j] *= BETA;
         }
     }
-    if(!TA && !TB)
-        gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
-    else if(TA && !TB)
-        gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
-    else if(!TA && TB)
-        gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
-    else
-        gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
+
+	int t;
+	#pragma omp parallel for
+	for (t = 0; t < M; ++t) {
+		if (!TA && !TB)
+			gemm_nn(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc);
+		else if (TA && !TB)
+			gemm_tn(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc);
+		else if (!TA && TB)
+			gemm_nt(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc);
+		else
+			gemm_tt(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc);
+	}
 }
 
 #ifdef GPU

--
Gitblit v1.10.0