From e4ab47dfcedb4c87e5eddf484caa4ac0c020fc9b Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Wed, 21 Feb 2018 12:35:09 +0000
Subject: [PATCH] Optimized resizing of region_layer for random=1
---
src/convolutional_layer.c | 32 ++++++++++++++++++++------------
1 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index cf5d252..ca83486 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -146,8 +146,12 @@
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
- cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
- cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
+#if(CUDNN_MAJOR >= 6)
+ cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); // cudnn 6.0
+#else
+ cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1
+#endif
+ cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->weightDesc,
l->convDesc,
@@ -355,6 +359,8 @@
void resize_convolutional_layer(convolutional_layer *l, int w, int h)
{
+ int old_w = l->w;
+ int old_h = l->h;
l->w = w;
l->h = h;
int out_w = convolutional_out_width(*l);
@@ -374,19 +380,21 @@
}
#ifdef GPU
- cuda_free(l->delta_gpu);
- cuda_free(l->output_gpu);
+ if (old_w < w || old_h < h) {
+ cuda_free(l->delta_gpu);
+ cuda_free(l->output_gpu);
- l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
- l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
+ l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
+ l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
- if(l->batch_normalize){
- cuda_free(l->x_gpu);
- cuda_free(l->x_norm_gpu);
+ if (l->batch_normalize) {
+ cuda_free(l->x_gpu);
+ cuda_free(l->x_norm_gpu);
- l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
- l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
- }
+ l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
+ l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
+ }
+ }
#ifdef CUDNN
cudnn_convolutional_setup(l);
#endif
--
Gitblit v1.10.0