From 1e9d1fcedf1a361bcdb384f15b5b14bdb526576d Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Sat, 30 Jun 2018 20:12:25 +0000
Subject: [PATCH] Fixed arch=compute_53,code=[sm_53,compute_53] for Jetson TX1
---
src/gemm.c | 16 ++++++++++------
1 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/src/gemm.c b/src/gemm.c
index 0efefd0..506687b 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -92,6 +92,7 @@
#include <ammintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
+#include <cpuid.h>
void asm_cpuid(uint32_t* abcd, uint32_t eax)
{
@@ -109,6 +110,7 @@
abcd[2] = ecx;
abcd[3] = edx;
}
+
#endif
int simd_detect_x86(unsigned int idFeature)
@@ -118,8 +120,8 @@
__cpuid(regs, 0);
if (regs[0] > 1U) __cpuid(regs, 1);
#else
- asm_cpuid(regs, 0);
- if (regs[0] > 1U) asm_cpuid(regs, 0);
+ __get_cpuid(0, ®s[0], ®s[1], ®s[2], ®s[3]);
+ if(regs[0] > 1U) __get_cpuid(1, ®s[0], ®s[1], ®s[2], ®s[3]);
#endif
if ((regs[2] & idFeature) != idFeature)
@@ -271,10 +273,12 @@
float *C, int ldc)
{
//printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc);
- int i, j;
- for(i = 0; i < M; ++i){
- for(j = 0; j < N; ++j){
- C[i*ldc + j] *= BETA;
+ if (BETA != 1){
+ int i, j;
+ for(i = 0; i < M; ++i){
+ for(j = 0; j < N; ++j){
+ C[i*ldc + j] *= BETA;
+ }
}
}
--
Gitblit v1.10.0