~speedprog/mtg/mtg_card_detector.git

			@@ -137,7 +137,7 @@

			#ifdef GPU
			#ifdef CUDNN
			void cudnn_convolutional_setup(layer *l)
			void cudnn_convolutional_setup(layer *l, int cudnn_preference)
			{
			cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
			cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
			@@ -151,12 +151,21 @@
			#else
			cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1
			#endif
			int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
			int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST;
			int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
			if (cudnn_preference == cudnn_smallest) {
			forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
			backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
			backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE;
			}

			cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
			l->srcTensorDesc,
			l->weightDesc,
			l->convDesc,
			l->dstTensorDesc,
			CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
			forward_algo,
			0,
			&l->fw_algo);
			cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
			@@ -164,7 +173,7 @@
			l->ddstTensorDesc,
			l->convDesc,
			l->dsrcTensorDesc,
			CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
			backward_algo,
			0,
			&l->bd_algo);
			cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
			@@ -172,7 +181,7 @@
			l->ddstTensorDesc,
			l->convDesc,
			l->dweightDesc,
			CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
			backward_filter,
			0,
			&l->bf_algo);
			}
			@@ -306,7 +315,7 @@
			cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
			cudnnCreateFilterDescriptor(&l.dweightDesc);
			cudnnCreateConvolutionDescriptor(&l.convDesc);
			cudnn_convolutional_setup(&l);
			cudnn_convolutional_setup(&l, cudnn_fastest);
			#endif
			}
			#endif
			@@ -396,10 +405,22 @@
			}
			}
			#ifdef CUDNN
			cudnn_convolutional_setup(l);
			cudnn_convolutional_setup(l, cudnn_fastest);
			#endif
			#endif
			l->workspace_size = get_workspace_size(*l);

			#ifdef CUDNN
			// check for excessive memory consumption
			size_t free_byte;
			size_t total_byte;
			check_error(cudaMemGetInfo(&free_byte, &total_byte));
			if (l->workspace_size > free_byte \|\| l->workspace_size >= total_byte / 2) {
			printf(" used slow CUDNN algo without Workspace! \n");
			cudnn_convolutional_setup(l, cudnn_smallest);
			l->workspace_size = get_workspace_size(*l);
			}
			#endif
			}

			void add_bias(float output, float biases, int batch, int n, int size)

			@@ -20,7 +20,7 @@
			void add_bias_gpu(float output, float biases, int batch, int n, int size);
			void backward_bias_gpu(float bias_updates, float delta, int batch, int n, int size);
			#ifdef CUDNN
			void cudnn_convolutional_setup(layer *l);
			void cudnn_convolutional_setup(layer *l, int cudnn_preference);
			#endif
			#endif

			@@ -35,6 +35,7 @@

			#ifdef CUDNN
			cudnnHandle_t cudnn_handle();
			enum {cudnn_fastest, cudnn_smallest};
			#endif

			#endif

			@@ -316,7 +316,17 @@
			net->layers[i].batch = b;
			#ifdef CUDNN
			if(net->layers[i].type == CONVOLUTIONAL){
			cudnn_convolutional_setup(net->layers + i);
			layer *l = net->layers + i;
			cudnn_convolutional_setup(l, cudnn_fastest);
			// check for excessive memory consumption
			size_t free_byte;
			size_t total_byte;
			check_error(cudaMemGetInfo(&free_byte, &total_byte));
			if (l->workspace_size > free_byte \|\| l->workspace_size >= total_byte / 2) {
			printf(" used slow CUDNN algo without Workspace! \n");
			cudnn_convolutional_setup(l, cudnn_smallest);
			l->workspace_size = get_workspace_size(*l);
			}
			}
			#endif
			}
			@@ -378,7 +388,7 @@
			}
			#ifdef GPU
			if(gpu_index >= 0){
			printf(" try to allocate workspace, ");
			printf(" try to allocate workspace = %zu * sizeof(float), ", (workspace_size - 1) / sizeof(float) + 1);
			net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
			printf(" CUDA allocate done! \n");
			}else {

	src/convolutional_layer.c	33 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/cuda.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	14 ●●●●● patch \| view \| raw \| blame \| history