From a6b2511a566f77a0838dc1dd0d5f3e3c49a8faa0 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sat, 25 Jun 2016 23:13:54 +0000
Subject: [PATCH] idk
---
src/convolutional_kernels.cu | 38 +++++++++++++++++++-------------------
1 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index cb50561..96398df 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -71,12 +71,6 @@
void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
{
- int i;
- int m = l.n;
- int k = l.size*l.size*l.c;
- int n = convolutional_out_height(l)*
- convolutional_out_width(l);
-
fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
if(l.binary){
binarize_filters_gpu(l.filters_gpu, l.n, l.c*l.size*l.size, l.binary_filters_gpu);
@@ -86,9 +80,7 @@
if(l.xnor){
binarize_filters_gpu(l.filters_gpu, l.n, l.c*l.size*l.size, l.binary_filters_gpu);
swap_binary(&l);
- for(i = 0; i < l.batch; ++i){
- binarize_input_gpu(state.input + i*l.inputs, l.c, l.h*l.w, l.binary_input_gpu + i*l.inputs);
- }
+ binarize_gpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input_gpu);
state.input = l.binary_input_gpu;
}
@@ -109,6 +101,10 @@
l.output_gpu);
#else
+ int i;
+ int m = l.n;
+ int k = l.size*l.size*l.c;
+ int n = l.out_w*l.out_h;
for(i = 0; i < l.batch; ++i){
im2col_ongpu(state.input + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, state.workspace);
float * a = l.filters_gpu;
@@ -121,27 +117,23 @@
if (l.batch_normalize) {
forward_batchnorm_layer_gpu(l, state);
}
- add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, n);
+ add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
- activate_array_ongpu(l.output_gpu, m*n*l.batch, l.activation);
+ activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
//if(l.dot > 0) dot_error_gpu(l);
if(l.binary || l.xnor) swap_binary(&l);
}
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
{
- int m = l.n;
- int n = l.size*l.size*l.c;
- int k = convolutional_out_height(l)*
- convolutional_out_width(l);
+ gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
- gradient_array_ongpu(l.output_gpu, m*k*l.batch, l.activation, l.delta_gpu);
-
- backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, k);
+ backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
if(l.batch_normalize){
backward_batchnorm_layer_gpu(l, state);
}
+ float *original_input = state.input;
if(l.xnor) state.input = l.binary_input_gpu;
#ifdef CUDNN
@@ -176,9 +168,14 @@
l.dsrcTensorDesc,
state.delta);
if(l.binary || l.xnor) swap_binary(&l);
+ if(l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, state.delta);
}
#else
+ int m = l.n;
+ int n = l.size*l.size*l.c;
+ int k = l.out_w*l.out_h;
+
int i;
for(i = 0; i < l.batch; ++i){
float * a = l.delta_gpu;
@@ -197,7 +194,10 @@
gemm_ongpu(1,0,n,k,m,1,a,n,b + i*k*m,k,0,c,k);
col2im_ongpu(state.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta + i*l.c*l.h*l.w);
- if(l.binary || l.xnor) swap_binary(&l);
+ if(l.binary || l.xnor) {
+ swap_binary(&l);
+ }
+ if(l.xnor) gradient_array_ongpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, state.delta + i*l.c*l.h*l.w);
}
}
#endif
--
Gitblit v1.10.0