From edbccdfcaf46f11e631afe98796f3e6e170da5d0 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sun, 26 Oct 2014 05:04:34 +0000
Subject: [PATCH] Maybe something changed?
---
src/gemm.cl | 41 +++++++----------------------------------
1 files changed, 7 insertions(+), 34 deletions(-)
diff --git a/src/gemm.cl b/src/gemm.cl
index 7c868f4..c5a0698 100644
--- a/src/gemm.cl
+++ b/src/gemm.cl
@@ -1,5 +1,4 @@
-
__kernel void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
__global float *A, int lda,
__global float *B, int ldb,
@@ -11,11 +10,11 @@
float val = 0;
- int row_block = get_group_id(0);
- int col_block = get_group_id(1);
+ int row_block = get_group_id(1);
+ int col_block = get_group_id(0);
- int sub_row = get_local_id(0);
- int sub_col = get_local_id(1);
+ int sub_row = get_local_id(1);
+ int sub_col = get_local_id(0);
int row = row_block*BLOCK + sub_row;
int col = col_block*BLOCK + sub_col;
@@ -28,8 +27,8 @@
int brow = i + sub_row;
int bcol = col_block*BLOCK + sub_col;
- Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
- Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
+ if(arow < M && acol < K)Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
+ if(brow < K && bcol < N)Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
barrier(CLK_LOCAL_MEM_FENCE);
@@ -40,33 +39,7 @@
}
if(row < M && col < N){
- C[row*ldc+col] = val;
+ C[row*ldc+col] = ALPHA*val + BETA*C[row*ldc+col];
}
}
-/*
-__kernel void gemm_slow(int TA, int TB, int M, int N, int K, float ALPHA,
- __global float *A, int lda,
- __global float *B, int ldb,
- float BETA,
- __global float *C, int ldc)
-{
- float val = 0;
- int row = get_global_id(0);
- int col = get_global_id(1);
- int i;
- for(i = 0; i < K; ++i){
- float Aval;
- if(TA) Aval = A[i*lda+row];
- else Aval = A[row*lda+i];
-
- float Bval;
- if(TB) Bval = B[col*ldb+i];
- else Bval = B[col+i*ldb];
-
- val += Aval*Bval;
- }
- C[row*ldc+col] = val;
-}
-
-*/
--
Gitblit v1.10.0