From 04c0c4fdf84bb7b9e89604e8b9a6079016d5968c Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Wed, 04 Jul 2018 16:06:41 +0000
Subject: [PATCH] Merge pull request #1132 from tinohager/master

---
 src/gemm.c |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/gemm.c b/src/gemm.c
index 4eb36fb..506687b 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -92,6 +92,7 @@
 #include <ammintrin.h>
 #include <immintrin.h>
 #include <smmintrin.h>
+#include <cpuid.h>
 
 void asm_cpuid(uint32_t* abcd, uint32_t eax)
 {
@@ -109,17 +110,18 @@
 	abcd[2] = ecx;
 	abcd[3] = edx;
 }
+
 #endif
 
-inline int simd_detect_x86(unsigned int idFeature)
+int simd_detect_x86(unsigned int idFeature)
 {
 	uint32_t regs[4];	// EAX, EBX, ECX, EDX;
 #ifdef _WIN32
 	__cpuid(regs, 0);
 	if (regs[0] > 1U) __cpuid(regs, 1);
 #else
-	asm_cpuid(regs, 0);
-	if (regs[0] > 1U) asm_cpuid(regs, 0);
+	__get_cpuid(0, &regs[0], &regs[1], &regs[2], &regs[3]);
+	if(regs[0] > 1U) __get_cpuid(1, &regs[0], &regs[1], &regs[2], &regs[3]);
 #endif
 
 	if ((regs[2] & idFeature) != idFeature)
@@ -127,7 +129,7 @@
 	return 1;
 }
 
-inline int is_fma_avx() {
+int is_fma_avx() {
 	static int result = -1;
 	if (result == -1) {
 		result = simd_detect_x86(AVXFlag);
@@ -271,10 +273,12 @@
         float *C, int ldc)
 {
     //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc);
-    int i, j;
-    for(i = 0; i < M; ++i){
-        for(j = 0; j < N; ++j){
-            C[i*ldc + j] *= BETA;
+    if (BETA != 1){
+        int i, j;
+        for(i = 0; i < M; ++i){
+            for(j = 0; j < N; ++j){
+                C[i*ldc + j] *= BETA;
+            }
         }
     }
 

--
Gitblit v1.10.0