src/network_kernels.cu
@@ -113,6 +113,13 @@ state.delta = 0; state.truth = *net.truth_gpu; state.train = 1; #ifdef CUDNN_HALF int i; for (i = 0; i < net.n; ++i) { layer l = net.layers[i]; cuda_convert_f32_to_f16(l.weights_gpu, l.c*l.n*l.size*l.size, (half *)l.weights_gpu16); } #endif forward_network_gpu(net, state); cudaStreamSynchronize(get_cuda_stream()); backward_network_gpu(net, state);