Increased 2.3 times performance of Yolo on CPU by using OpenMP in both darknet_no_gpu and yolo_cpp_dll_no_gpu
| | |
| | | <AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
| | | <PreprocessorDefinitions>_MBCS;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
| | | <UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions> |
| | | <OpenMPSupport>true</OpenMPSupport> |
| | | </ClCompile> |
| | | <Link> |
| | | <GenerateDebugInformation>true</GenerateDebugInformation> |
| | |
| | | <PrecompiledHeaderCompileAs>CompileAsCpp</PrecompiledHeaderCompileAs> |
| | | <CompileAs>Default</CompileAs> |
| | | <UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions> |
| | | <OpenMPSupport>true</OpenMPSupport> |
| | | </ClCompile> |
| | | <Link> |
| | | <GenerateDebugInformation>true</GenerateDebugInformation> |
| | |
| | | <UndefinePreprocessorDefinitions> |
| | | </UndefinePreprocessorDefinitions> |
| | | <MultiProcessorCompilation>true</MultiProcessorCompilation> |
| | | <OpenMPSupport>true</OpenMPSupport> |
| | | </ClCompile> |
| | | <Link> |
| | | <GenerateDebugInformation>true</GenerateDebugInformation> |
| | |
| | | C[i*ldc + j] *= BETA; |
| | | } |
| | | } |
| | | |
| | | int t; |
| | | #pragma omp parallel for |
| | | for (t = 0; t < M; ++t) { |
| | | if(!TA && !TB) |
| | | gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); |
| | | gemm_nn(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); |
| | | else if(TA && !TB) |
| | | gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); |
| | | gemm_tn(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); |
| | | else if(!TA && TB) |
| | | gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); |
| | | gemm_nt(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); |
| | | else |
| | | gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); |
| | | gemm_tt(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); |
| | | } |
| | | } |
| | | |
| | | #ifdef GPU |