From cad4d1618fee74471d335314cb77070fee951a42 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Sun, 25 Feb 2018 13:29:44 +0000
Subject: [PATCH] Added support for Tensor Cores CC >= 7.0 (V100). For FP16/32 (mixed precision) define CUDNN_HALF should be used.

---
 src/cuda.h |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/src/cuda.h b/src/cuda.h
index 31f9092..0bc0557 100644
--- a/src/cuda.h
+++ b/src/cuda.h
@@ -26,6 +26,7 @@
 void cuda_push_array(float *x_gpu, float *x, size_t n);
 void cuda_pull_array(float *x_gpu, float *x, size_t n);
 void cuda_set_device(int n);
+int cuda_get_device();
 void cuda_free(float *x_gpu);
 void cuda_random(float *x_gpu, size_t n);
 float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
@@ -34,6 +35,7 @@
 
 #ifdef CUDNN
 cudnnHandle_t cudnn_handle();
+enum {cudnn_fastest, cudnn_smallest};
 #endif
 
 #endif

--
Gitblit v1.10.0