From 2a9f7e44ce1b73d3d56ef83f83e94f074ecac3f9 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 06 Apr 2018 17:34:38 +0000
Subject: [PATCH] Added automatic AVX support - speedup +20% on CPU x86_64 Intel Skylake

---
 src/convolutional_layer.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 7c0c00b..fb606ae 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -174,6 +174,9 @@
     cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->c, l->h, l->w);
     cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w);
     cudnnSetFilter4dDescriptor(l->weightDesc, data_type, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
+
+	// batch norm
+	cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
 #if(CUDNN_MAJOR >= 6)
 	cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);	// cudnn >= 6.0
 #else
@@ -341,6 +344,7 @@
             l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
         }
 #ifdef CUDNN
+		cudnnCreateTensorDescriptor(&l.normTensorDesc);
         cudnnCreateTensorDescriptor(&l.srcTensorDesc);
         cudnnCreateTensorDescriptor(&l.dstTensorDesc);
         cudnnCreateFilterDescriptor(&l.weightDesc);

--
Gitblit v1.10.0