From aa5996d58e68edfbefe51061856aecd549dd09c4 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 13 Jan 2015 01:27:08 +0000
Subject: [PATCH] Faster
---
src/convolutional_layer.cl | 16 +++++++++++-----
1 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/convolutional_layer.cl b/src/convolutional_layer.cl
index 92c9d29..3b091cf 100644
--- a/src/convolutional_layer.cl
+++ b/src/convolutional_layer.cl
@@ -11,15 +11,21 @@
__kernel void learn_bias(int batch, int n, int size, __global float *delta, __global float *bias_updates)
{
+ __local float part[BLOCK];
int i,b;
- int filter = get_global_id(0);
+ int filter = get_group_id(0);
+ int p = get_local_id(0);
float sum = 0;
for(b = 0; b < batch; ++b){
- for(i = 0; i < size; ++i){
- int index = i + size*(filter + n*b);
- sum += delta[index];
+ for(i = 0; i < size; i += BLOCK){
+ int index = p + i + size*(filter + n*b);
+ sum += (index < size) ? delta[index] : 0;
}
}
- bias_updates[filter] += sum;
+ part[p] = sum;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(p == 0){
+ for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i];
+ }
}
--
Gitblit v1.10.0