From aa5996d58e68edfbefe51061856aecd549dd09c4 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 13 Jan 2015 01:27:08 +0000
Subject: [PATCH] Faster

---
 src/convolutional_layer.cl |   16 +++++++++++-----
 1 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/convolutional_layer.cl b/src/convolutional_layer.cl
index 92c9d29..3b091cf 100644
--- a/src/convolutional_layer.cl
+++ b/src/convolutional_layer.cl
@@ -11,15 +11,21 @@
 
 __kernel void learn_bias(int batch, int n, int size, __global float *delta, __global float *bias_updates)
 {
+    __local float part[BLOCK];
     int i,b;
-    int filter = get_global_id(0);
+    int filter = get_group_id(0);
+    int p = get_local_id(0);
     float sum = 0;
     for(b = 0; b < batch; ++b){
-        for(i = 0; i < size; ++i){
-            int index = i + size*(filter + n*b);
-            sum += delta[index];
+        for(i = 0; i < size; i += BLOCK){
+            int index = p + i + size*(filter + n*b);
+            sum += (index < size) ? delta[index] : 0;
         }
     }
-    bias_updates[filter] += sum;
+    part[p] = sum;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(p == 0){
+        for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i];
+    }
 }
 

--
Gitblit v1.10.0