From 033e934ce82826c73d851098baf7ce4b1a27c89a Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Wed, 21 Feb 2018 16:14:01 +0000
Subject: [PATCH] If there is excessive GPU-RAM consumption by CUDNN then then do not use Workspace

---
 src/shortcut_layer.c |   37 ++++++++++++++++++++-----------------
 1 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c
index ff1d50f..8bca50f 100644
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@@ -10,27 +10,26 @@
     layer l = {0};
     l.type = SHORTCUT;
     l.batch = batch;
-    l.w = w;
-    l.h = h;
-    l.c = c;
+    l.w = w2;
+    l.h = h2;
+    l.c = c2;
     l.out_w = w;
     l.out_h = h;
     l.out_c = c;
     l.outputs = w*h*c;
-    l.inputs = w*h*c;
-    int stride = w2 / w;
+    l.inputs = l.outputs;
 
-    assert(stride * w == w2);
-    assert(stride * h == h2);
-    assert(c >= c2);
-
-    l.stride = stride;
-    l.n = c2;
     l.index = index;
 
     l.delta =  calloc(l.outputs*batch, sizeof(float));
     l.output = calloc(l.outputs*batch, sizeof(float));;
+
+    l.forward = forward_shortcut_layer;
+    l.backward = backward_shortcut_layer;
     #ifdef GPU
+    l.forward_gpu = forward_shortcut_layer_gpu;
+    l.backward_gpu = backward_shortcut_layer_gpu;
+
     l.delta_gpu =  cuda_make_array(l.delta, l.outputs*batch);
     l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
     #endif
@@ -40,25 +39,29 @@
 void forward_shortcut_layer(const layer l, network_state state)
 {
     copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
-    shortcut_cpu(l.output, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output, l.stride, l.n);
+    shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output);
+    activate_array(l.output, l.outputs*l.batch, l.activation);
 }
 
 void backward_shortcut_layer(const layer l, network_state state)
 {
-    copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1);
-    shortcut_cpu(state.net.layers[l.index].delta, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta, 1, l.c);
+    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
+    axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1);
+    shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta);
 }
 
 #ifdef GPU
 void forward_shortcut_layer_gpu(const layer l, network_state state)
 {
     copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
-    shortcut_gpu(l.output_gpu, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output_gpu, l.stride, l.n);
+    shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
+    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
 }
 
 void backward_shortcut_layer_gpu(const layer l, network_state state)
 {
-    copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
-    shortcut_gpu(state.net.layers[l.index].delta_gpu, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta_gpu, 1, l.c);
+    gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
+    axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1);
+    shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu);
 }
 #endif

--
Gitblit v1.10.0