~speedprog/mtg/mtg_card_detector.git

			@@ -8,6 +8,10 @@
			#include <stdio.h>
			#include <time.h>

			#ifdef CUDNN
			#pragma comment(lib, "cudnn.lib")
			#endif

			#ifdef AI2
			#include "xnor_layer.h"
			#endif
			@@ -133,7 +137,7 @@

			#ifdef GPU
			#ifdef CUDNN
			void cudnn_convolutional_setup(layer *l)
			void cudnn_convolutional_setup(layer *l, int cudnn_preference)
			{
			cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
			cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
			@@ -142,13 +146,26 @@
			cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
			cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
			cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
			cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
			cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
			#if(CUDNN_MAJOR >= 6)
			cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); // cudnn 6.0
			#else
			cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1
			#endif
			int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
			int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST;
			int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
			if (cudnn_preference == cudnn_smallest) {
			forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
			backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
			backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE;
			}

			cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
			l->srcTensorDesc,
			l->weightDesc,
			l->convDesc,
			l->dstTensorDesc,
			CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
			forward_algo,
			0,
			&l->fw_algo);
			cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
			@@ -156,7 +173,7 @@
			l->ddstTensorDesc,
			l->convDesc,
			l->dsrcTensorDesc,
			CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
			backward_algo,
			0,
			&l->bd_algo);
			cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
			@@ -164,7 +181,7 @@
			l->ddstTensorDesc,
			l->convDesc,
			l->dweightDesc,
			CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
			backward_filter,
			0,
			&l->bf_algo);
			}
			@@ -298,7 +315,7 @@
			cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
			cudnnCreateFilterDescriptor(&l.dweightDesc);
			cudnnCreateConvolutionDescriptor(&l.convDesc);
			cudnn_convolutional_setup(&l);
			cudnn_convolutional_setup(&l, cudnn_fastest);
			#endif
			}
			#endif
			@@ -351,6 +368,8 @@

			void resize_convolutional_layer(convolutional_layer *l, int w, int h)
			{
			int old_w = l->w;
			int old_h = l->h;
			l->w = w;
			l->h = h;
			int out_w = convolutional_out_width(*l);
			@@ -370,24 +389,38 @@
			}

			#ifdef GPU
			cuda_free(l->delta_gpu);
			cuda_free(l->output_gpu);
			if (old_w < w \|\| old_h < h) {
			cuda_free(l->delta_gpu);
			cuda_free(l->output_gpu);

			l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
			l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
			l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
			l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);

			if(l->batch_normalize){
			cuda_free(l->x_gpu);
			cuda_free(l->x_norm_gpu);
			if (l->batch_normalize) {
			cuda_free(l->x_gpu);
			cuda_free(l->x_norm_gpu);

			l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
			l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
			}
			l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
			l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
			}
			}
			#ifdef CUDNN
			cudnn_convolutional_setup(l);
			cudnn_convolutional_setup(l, cudnn_fastest);
			#endif
			#endif
			l->workspace_size = get_workspace_size(*l);

			#ifdef CUDNN
			// check for excessive memory consumption
			size_t free_byte;
			size_t total_byte;
			check_error(cudaMemGetInfo(&free_byte, &total_byte));
			if (l->workspace_size > free_byte \|\| l->workspace_size >= total_byte / 2) {
			printf(" used slow CUDNN algo without Workspace! \n");
			cudnn_convolutional_setup(l, cudnn_smallest);
			l->workspace_size = get_workspace_size(*l);
			}
			#endif
			}

			void add_bias(float output, float biases, int batch, int n, int size)