From 73f7aacf35ec9b1d0f9de9ddf38af0889f213e99 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 20 Sep 2016 18:34:49 +0000
Subject: [PATCH] better multigpu

---
 src/shortcut_layer.c |   31 ++++++++++++++-----------------
 1 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c
index ff1d50f..bf45516 100644
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@@ -10,22 +10,15 @@
     layer l = {0};
     l.type = SHORTCUT;
     l.batch = batch;
-    l.w = w;
-    l.h = h;
-    l.c = c;
+    l.w = w2;
+    l.h = h2;
+    l.c = c2;
     l.out_w = w;
     l.out_h = h;
     l.out_c = c;
     l.outputs = w*h*c;
-    l.inputs = w*h*c;
-    int stride = w2 / w;
+    l.inputs = l.outputs;
 
-    assert(stride * w == w2);
-    assert(stride * h == h2);
-    assert(c >= c2);
-
-    l.stride = stride;
-    l.n = c2;
     l.index = index;
 
     l.delta =  calloc(l.outputs*batch, sizeof(float));
@@ -40,25 +33,29 @@
 void forward_shortcut_layer(const layer l, network_state state)
 {
     copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
-    shortcut_cpu(l.output, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output, l.stride, l.n);
+    shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output);
+    activate_array(l.output, l.outputs*l.batch, l.activation);
 }
 
 void backward_shortcut_layer(const layer l, network_state state)
 {
-    copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1);
-    shortcut_cpu(state.net.layers[l.index].delta, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta, 1, l.c);
+    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
+    axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1);
+    shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta);
 }
 
 #ifdef GPU
 void forward_shortcut_layer_gpu(const layer l, network_state state)
 {
     copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
-    shortcut_gpu(l.output_gpu, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output_gpu, l.stride, l.n);
+    shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
+    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
 }
 
 void backward_shortcut_layer_gpu(const layer l, network_state state)
 {
-    copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
-    shortcut_gpu(state.net.layers[l.index].delta_gpu, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta_gpu, 1, l.c);
+    gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
+    axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1);
+    shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu);
 }
 #endif

--
Gitblit v1.10.0