From cad4d1618fee74471d335314cb77070fee951a42 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Sun, 25 Feb 2018 13:29:44 +0000
Subject: [PATCH] Added support for Tensor Cores CC >= 7.0 (V100). For FP16/32 (mixed precision) define CUDNN_HALF should be used.
---
src/cuda.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/src/cuda.c b/src/cuda.c
index f168e4e..d8db851 100644
--- a/src/cuda.c
+++ b/src/cuda.c
@@ -96,6 +96,7 @@
int i = cuda_get_device();
if(!init[i]) {
cublasCreate(&handle[i]);
+ cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream());
init[i] = 1;
}
return handle[i];
--
Gitblit v1.10.0