From 08b757a0bf76efe8c76b453063a1bb19315bcaa6 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Wed, 14 Jan 2015 20:18:57 +0000
Subject: [PATCH] Stable, needs to be way faster
---
src/convolutional_layer.cl | 22 ++++++++++++++--------
1 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/src/convolutional_layer.cl b/src/convolutional_layer.cl
index 6393c37..3b091cf 100644
--- a/src/convolutional_layer.cl
+++ b/src/convolutional_layer.cl
@@ -1,25 +1,31 @@
__kernel void bias(int n, int size, __global float *biases, __global float *output)
{
- int batch = get_global_id(0);
- int id = get_global_id(1);
+ int id = get_global_id(0);
+ int batch = get_global_id(1);
int filter = id/size;
- int position = id%size;
+ //int position = id%size;
output[batch*n*size + id] = biases[filter];
}
__kernel void learn_bias(int batch, int n, int size, __global float *delta, __global float *bias_updates)
{
+ __local float part[BLOCK];
int i,b;
- int filter = get_global_id(0);
+ int filter = get_group_id(0);
+ int p = get_local_id(0);
float sum = 0;
for(b = 0; b < batch; ++b){
- for(i = 0; i < size; ++i){
- int index = i + size*(filter + n*b);
- sum += delta[index];
+ for(i = 0; i < size; i += BLOCK){
+ int index = p + i + size*(filter + n*b);
+ sum += (index < size) ? delta[index] : 0;
}
}
- bias_updates[filter] += sum;
+ part[p] = sum;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(p == 0){
+ for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i];
+ }
}
--
Gitblit v1.10.0