src/convolutional_kernels.cu
@@ -2,6 +2,10 @@ #include "curand.h" #include "cublas_v2.h" #ifdef CUDNN #pragma comment(lib, "cudnn.lib") #endif extern "C" { #include "convolutional_layer.h" #include "batchnorm_layer.h" @@ -123,6 +127,7 @@ activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if(l.dot > 0) dot_error_gpu(l); if(l.binary || l.xnor) swap_binary(&l); //cudaDeviceSynchronize(); // for correct profiling of performance } void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)