From 033e934ce82826c73d851098baf7ce4b1a27c89a Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Wed, 21 Feb 2018 16:14:01 +0000
Subject: [PATCH] If there is excessive GPU-RAM consumption by CUDNN then then do not use Workspace
---
src/convolutional_layer.c | 33 +++++++++++++++++++++++++++------
1 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index ca83486..801270a 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -137,7 +137,7 @@
#ifdef GPU
#ifdef CUDNN
-void cudnn_convolutional_setup(layer *l)
+void cudnn_convolutional_setup(layer *l, int cudnn_preference)
{
cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
@@ -151,12 +151,21 @@
#else
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1
#endif
+ int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
+ int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST;
+ int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
+ if (cudnn_preference == cudnn_smallest) {
+ forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
+ backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
+ backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE;
+ }
+
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->weightDesc,
l->convDesc,
l->dstTensorDesc,
- CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
+ forward_algo,
0,
&l->fw_algo);
cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
@@ -164,7 +173,7 @@
l->ddstTensorDesc,
l->convDesc,
l->dsrcTensorDesc,
- CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
+ backward_algo,
0,
&l->bd_algo);
cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
@@ -172,7 +181,7 @@
l->ddstTensorDesc,
l->convDesc,
l->dweightDesc,
- CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
+ backward_filter,
0,
&l->bf_algo);
}
@@ -306,7 +315,7 @@
cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
cudnnCreateFilterDescriptor(&l.dweightDesc);
cudnnCreateConvolutionDescriptor(&l.convDesc);
- cudnn_convolutional_setup(&l);
+ cudnn_convolutional_setup(&l, cudnn_fastest);
#endif
}
#endif
@@ -396,10 +405,22 @@
}
}
#ifdef CUDNN
- cudnn_convolutional_setup(l);
+ cudnn_convolutional_setup(l, cudnn_fastest);
#endif
#endif
l->workspace_size = get_workspace_size(*l);
+
+#ifdef CUDNN
+ // check for excessive memory consumption
+ size_t free_byte;
+ size_t total_byte;
+ check_error(cudaMemGetInfo(&free_byte, &total_byte));
+ if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
+ printf(" used slow CUDNN algo without Workspace! \n");
+ cudnn_convolutional_setup(l, cudnn_smallest);
+ l->workspace_size = get_workspace_size(*l);
+ }
+#endif
}
void add_bias(float *output, float *biases, int batch, int n, int size)
--
Gitblit v1.10.0