From cd2bdec09030edf7da79ecdeb38d908c106850b3 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 23 Feb 2018 12:05:31 +0000
Subject: [PATCH] Updated to CUDA 9.1. And fixed no_gpu dependecies.

---
 src/convolutional_kernels.cu |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index ee9b534..3b2a349 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -78,7 +78,7 @@
 {
 	int idx = blockIdx.x * blockDim.x + threadIdx.x;
 	if (idx < size) output_f16[idx] = __float2half(input_f32[idx]);
-	//if (idx < size) *((unsigned int *)output_f16 + idx) = __float2half(input_f32[idx]);
+	//if (idx < size) *((unsigned short *)output_f16 + idx) = __float2half(input_f32[idx]);
 }
 
 void cuda_convert_f32_to_f16(float* input_f32, size_t size, half *output_f16) {
@@ -89,7 +89,7 @@
 {
 	int idx = blockIdx.x * blockDim.x + threadIdx.x;
 	if (idx < size) output_f32[idx] = __half2float(input_f16[idx]);
-	//if (idx < size) output_f32[idx] = __half2float(*((unsigned int *)input_f16 + idx));
+	//if (idx < size) output_f32[idx] = __half2float(*((unsigned short *)input_f16 + idx));
 }
 
 void cuda_convert_f16_to_f32(half* input_f16, size_t size, float *output_f32) {
@@ -247,6 +247,7 @@
 
     if(state.delta){
         if(l.binary || l.xnor) swap_binary(&l);
+		// http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData
         cudnnConvolutionBackwardData(cudnn_handle(),
                 &one,
                 l.weightDesc,

--
Gitblit v1.10.0