From dda993f3dd3c753dfd580d485b39c1001830fee4 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Thu, 22 Feb 2018 19:54:40 +0000
Subject: [PATCH] Use half_float16 instead of float32 if defined both CUDNN and CUDNN_HALF. Use Tensor Cores.

---
 src/region_layer.c |   14 +++++++++-----
 1 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/region_layer.c b/src/region_layer.c
index f7eaef6..e8bbd5c 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -53,6 +53,8 @@
 
 void resize_region_layer(layer *l, int w, int h)
 {
+	int old_w = l->w;
+	int old_h = l->h;
     l->w = w;
     l->h = h;
 
@@ -63,11 +65,13 @@
     l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
 
 #ifdef GPU
-    cuda_free(l->delta_gpu);
-    cuda_free(l->output_gpu);
+	if (old_w < w || old_h < h) {
+		cuda_free(l->delta_gpu);
+		cuda_free(l->output_gpu);
 
-    l->delta_gpu =     cuda_make_array(l->delta, l->batch*l->outputs);
-    l->output_gpu =    cuda_make_array(l->output, l->batch*l->outputs);
+		l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
+		l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
+	}
 #endif
 }
 
@@ -170,7 +174,7 @@
         for (b = 0; b < l.batch; ++b){
             for(i = 0; i < l.h*l.w*l.n; ++i){
                 int index = size*i + b*l.outputs;
-                softmax(l.output + index + 5, l.classes, 1, l.output + index + 5);
+                softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1);
             }
         }
     }

--
Gitblit v1.10.0