| | |
| | | #include "curand.h" |
| | | #include "cublas_v2.h" |
| | | |
| | | #ifdef CUDNN |
| | | #pragma comment(lib, "cudnn.lib") |
| | | #endif |
| | | |
| | | extern "C" { |
| | | #include "convolutional_layer.h" |
| | | #include "batchnorm_layer.h" |
| | |
| | | activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); |
| | | //if(l.dot > 0) dot_error_gpu(l); |
| | | if(l.binary || l.xnor) swap_binary(&l); |
| | | //cudaDeviceSynchronize(); // for correct profiling of performance |
| | | } |
| | | |
| | | void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) |
| | |
| | | |
| | | if(l.batch_normalize){ |
| | | backward_batchnorm_layer_gpu(l, state); |
| | | //axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.x_gpu, 1, l.delta_gpu, 1); |
| | | } else { |
| | | //axpy_ongpu(l.outputs*l.batch, -state.net.decay, l.output_gpu, 1, l.delta_gpu, 1); |
| | | } |
| | | float *original_input = state.input; |
| | | |