From 5c9a773bb6e994889354c181de5a872e867aa35b Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sun, 26 Oct 2014 00:35:25 +0000
Subject: [PATCH] Maybe some stuff changed
---
src/gemm.cl | 12 ++++++------
1 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/gemm.cl b/src/gemm.cl
index 91375a7..c5a0698 100644
--- a/src/gemm.cl
+++ b/src/gemm.cl
@@ -10,11 +10,11 @@
float val = 0;
- int row_block = get_group_id(0);
- int col_block = get_group_id(1);
+ int row_block = get_group_id(1);
+ int col_block = get_group_id(0);
- int sub_row = get_local_id(0);
- int sub_col = get_local_id(1);
+ int sub_row = get_local_id(1);
+ int sub_col = get_local_id(0);
int row = row_block*BLOCK + sub_row;
int col = col_block*BLOCK + sub_col;
@@ -27,8 +27,8 @@
int brow = i + sub_row;
int bcol = col_block*BLOCK + sub_col;
- Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
- Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
+ if(arow < M && acol < K)Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
+ if(brow < K && bcol < N)Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
barrier(CLK_LOCAL_MEM_FENCE);
--
Gitblit v1.10.0