| | |
| | | |
| | | #ifdef GPU |
| | | #ifdef CUDNN |
| | | void cudnn_convolutional_setup(layer *l) |
| | | void cudnn_convolutional_setup(layer *l, int cudnn_preference) |
| | | { |
| | | cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); |
| | | cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); |
| | |
| | | #else |
| | | cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1 |
| | | #endif |
| | | int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; |
| | | int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST; |
| | | int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST; |
| | | if (cudnn_preference == cudnn_smallest) { |
| | | forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; |
| | | backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; |
| | | backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE; |
| | | } |
| | | |
| | | cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), |
| | | l->srcTensorDesc, |
| | | l->weightDesc, |
| | | l->convDesc, |
| | | l->dstTensorDesc, |
| | | CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, |
| | | forward_algo, |
| | | 0, |
| | | &l->fw_algo); |
| | | cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), |
| | |
| | | l->ddstTensorDesc, |
| | | l->convDesc, |
| | | l->dsrcTensorDesc, |
| | | CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, |
| | | backward_algo, |
| | | 0, |
| | | &l->bd_algo); |
| | | cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), |
| | |
| | | l->ddstTensorDesc, |
| | | l->convDesc, |
| | | l->dweightDesc, |
| | | CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, |
| | | backward_filter, |
| | | 0, |
| | | &l->bf_algo); |
| | | } |
| | |
| | | cudnnCreateTensorDescriptor(&l.ddstTensorDesc); |
| | | cudnnCreateFilterDescriptor(&l.dweightDesc); |
| | | cudnnCreateConvolutionDescriptor(&l.convDesc); |
| | | cudnn_convolutional_setup(&l); |
| | | cudnn_convolutional_setup(&l, cudnn_fastest); |
| | | #endif |
| | | } |
| | | #endif |
| | |
| | | |
| | | void resize_convolutional_layer(convolutional_layer *l, int w, int h) |
| | | { |
| | | int old_w = l->w; |
| | | int old_h = l->h; |
| | | l->w = w; |
| | | l->h = h; |
| | | int out_w = convolutional_out_width(*l); |
| | |
| | | } |
| | | |
| | | #ifdef GPU |
| | | cuda_free(l->delta_gpu); |
| | | cuda_free(l->output_gpu); |
| | | if (old_w < w || old_h < h) { |
| | | cuda_free(l->delta_gpu); |
| | | cuda_free(l->output_gpu); |
| | | |
| | | l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); |
| | | l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); |
| | | l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); |
| | | l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); |
| | | |
| | | if(l->batch_normalize){ |
| | | cuda_free(l->x_gpu); |
| | | cuda_free(l->x_norm_gpu); |
| | | if (l->batch_normalize) { |
| | | cuda_free(l->x_gpu); |
| | | cuda_free(l->x_norm_gpu); |
| | | |
| | | l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); |
| | | l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); |
| | | } |
| | | l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs); |
| | | l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs); |
| | | } |
| | | } |
| | | #ifdef CUDNN |
| | | cudnn_convolutional_setup(l); |
| | | cudnn_convolutional_setup(l, cudnn_fastest); |
| | | #endif |
| | | #endif |
| | | l->workspace_size = get_workspace_size(*l); |
| | | |
| | | #ifdef CUDNN |
| | | // check for excessive memory consumption |
| | | size_t free_byte; |
| | | size_t total_byte; |
| | | check_error(cudaMemGetInfo(&free_byte, &total_byte)); |
| | | if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { |
| | | printf(" used slow CUDNN algo without Workspace! \n"); |
| | | cudnn_convolutional_setup(l, cudnn_smallest); |
| | | l->workspace_size = get_workspace_size(*l); |
| | | } |
| | | #endif |
| | | } |
| | | |
| | | void add_bias(float *output, float *biases, int batch, int n, int size) |