From 5c9a773bb6e994889354c181de5a872e867aa35b Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sun, 26 Oct 2014 00:35:25 +0000
Subject: [PATCH] Maybe some stuff changed

---
 src/gemm.cl |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gemm.cl b/src/gemm.cl
index 91375a7..c5a0698 100644
--- a/src/gemm.cl
+++ b/src/gemm.cl
@@ -10,11 +10,11 @@
 
     float val = 0;
     
-    int row_block = get_group_id(0);
-    int col_block = get_group_id(1);
+    int row_block = get_group_id(1);
+    int col_block = get_group_id(0);
 
-    int sub_row = get_local_id(0);
-    int sub_col = get_local_id(1);
+    int sub_row = get_local_id(1);
+    int sub_col = get_local_id(0);
 
     int row = row_block*BLOCK + sub_row;
     int col = col_block*BLOCK + sub_col;
@@ -27,8 +27,8 @@
         int brow = i + sub_row;
         int bcol = col_block*BLOCK + sub_col;
 
-        Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
-        Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
+        if(arow < M && acol < K)Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
+        if(brow < K && bcol < N)Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
 
         barrier(CLK_LOCAL_MEM_FENCE);
 

--
Gitblit v1.10.0