From 85eafd3d590c449b35627af3e0f2a007c5a27d15 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Sun, 25 Feb 2018 12:23:11 +0000
Subject: [PATCH] Added partial.cmd
---
src/convolutional_kernels.cu | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index 135a2ea..3b2a349 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -77,7 +77,8 @@
__global__ void cuda_f32_to_f16(float* input_f32, size_t size, half *output_f16)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx < size) output_f16[idx] = input_f32[idx];
+ if (idx < size) output_f16[idx] = __float2half(input_f32[idx]);
+ //if (idx < size) *((unsigned short *)output_f16 + idx) = __float2half(input_f32[idx]);
}
void cuda_convert_f32_to_f16(float* input_f32, size_t size, half *output_f16) {
@@ -87,7 +88,8 @@
__global__ void cuda_f16_to_f32(half* input_f16, size_t size, float *output_f32)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx < size) output_f32[idx] = input_f16[idx];
+ if (idx < size) output_f32[idx] = __half2float(input_f16[idx]);
+ //if (idx < size) output_f32[idx] = __half2float(*((unsigned short *)input_f16 + idx));
}
void cuda_convert_f16_to_f32(half* input_f16, size_t size, float *output_f32) {
@@ -245,6 +247,7 @@
if(state.delta){
if(l.binary || l.xnor) swap_binary(&l);
+ // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData
cudnnConvolutionBackwardData(cudnn_handle(),
&one,
l.weightDesc,
--
Gitblit v1.10.0