From d9f1b0b16edeb59281355a855e18a8be343fc33c Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Fri, 08 Aug 2014 19:04:15 +0000
Subject: [PATCH] probably how maxpool layers should be

---
 src/gemm.cl |   33 +++------------------------------
 1 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/src/gemm.cl b/src/gemm.cl
index 7c868f4..9e45783 100644
--- a/src/gemm.cl
+++ b/src/gemm.cl
@@ -1,5 +1,4 @@
 
-
 __kernel void gemm(int TA, int TB, int M, int N, int K, float ALPHA, 
                     __global float *A, int lda, 
                     __global float *B, int ldb,
@@ -28,8 +27,8 @@
         int brow = i + sub_row;
         int bcol = col_block*BLOCK + sub_col;
 
-        Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
-        Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
+        if(arow < M && acol < K)Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol];
+        if(brow < K && bcol < N)Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol];
 
         barrier(CLK_LOCAL_MEM_FENCE);
 
@@ -40,33 +39,7 @@
     }
 
     if(row < M && col < N){
-        C[row*ldc+col] = val;
+        C[row*ldc+col] = ALPHA*val + BETA*C[row*ldc+col];
     }
 }
 
-/*
-__kernel void gemm_slow(int TA, int TB, int M, int N, int K, float ALPHA, 
-                    __global float *A, int lda, 
-                    __global float *B, int ldb,
-                    float BETA,
-                    __global float *C, int ldc)
-{
-    float val = 0;
-    int row = get_global_id(0);
-    int col = get_global_id(1);
-    int i;
-    for(i = 0; i < K; ++i){
-        float Aval;
-        if(TA) Aval = A[i*lda+row]; 
-        else Aval = A[row*lda+i];
-
-        float Bval;
-        if(TB) Bval = B[col*ldb+i];
-        else Bval = B[col+i*ldb];
-
-        val += Aval*Bval;
-    }
-    C[row*ldc+col] = val;
-}
-
-*/

--
Gitblit v1.10.0