From 1e9d1fcedf1a361bcdb384f15b5b14bdb526576d Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Sat, 30 Jun 2018 20:12:25 +0000
Subject: [PATCH] Fixed arch=compute_53,code=[sm_53,compute_53] for Jetson TX1

---
 src/cuda.h |   67 +++++++++++++++++++++++++++------
 1 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/src/cuda.h b/src/cuda.h
index 08c0340..e167d36 100644
--- a/src/cuda.h
+++ b/src/cuda.h
@@ -1,21 +1,64 @@
 #ifndef CUDA_H
 #define CUDA_H
 
-#define BLOCK 256
+#if defined(_MSC_VER) && _MSC_VER < 1900
+	#define inline __inline
+#endif
 
-#include "cuda_runtime.h"
-#include "cublas_v2.h"
+#ifdef YOLODLL_EXPORTS
+#if defined(_MSC_VER)
+#define YOLODLL_API __declspec(dllexport) 
+#else
+#define YOLODLL_API __attribute__((visibility("default")))
+#endif
+#else
+#if defined(_MSC_VER)
+#define YOLODLL_API
+#else
+#define YOLODLL_API
+#endif
+#endif
 
 extern int gpu_index;
 
-void check_error(cudaError_t status);
-cublasHandle_t blas_handle();
-float *cuda_make_array(float *x, int n);
-int *cuda_make_int_array(int n);
-void cuda_push_array(float *x_gpu, float *x, int n);
-void cuda_pull_array(float *x_gpu, float *x, int n);
-void cuda_free(float *x_gpu);
-float cuda_compare(float *x_gpu, float *x, int n, char *s);
-dim3 cuda_gridsize(size_t n);
+#ifdef GPU
 
+#define BLOCK 512
+
+#include "cuda_runtime.h"
+#include "curand.h"
+#include "cublas_v2.h"
+
+#ifdef CUDNN
+#include "cudnn.h"
+#endif // CUDNN
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+	void check_error(cudaError_t status);
+	cublasHandle_t blas_handle();
+	float *cuda_make_array(float *x, size_t n);
+	int *cuda_make_int_array(size_t n);
+	void cuda_push_array(float *x_gpu, float *x, size_t n);
+	void cuda_pull_array(float *x_gpu, float *x, size_t n);
+	YOLODLL_API void cuda_set_device(int n);
+	int cuda_get_device();
+	void cuda_free(float *x_gpu);
+	void cuda_random(float *x_gpu, size_t n);
+	float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
+	dim3 cuda_gridsize(size_t n);
+	cudaStream_t get_cuda_stream();
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#ifdef CUDNN
+cudnnHandle_t cudnn_handle();
+enum {cudnn_fastest, cudnn_smallest};
 #endif
+
+#else // GPU
+YOLODLL_API void cuda_set_device(int n);
+#endif // GPU
+#endif // CUDA_H

--
Gitblit v1.10.0