From 9bae70b22549b68f5cdeece8b6c3b3de00c22714 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 16 Apr 2018 23:51:11 +0000
Subject: [PATCH] Accelerated by another 5% using FP16/32 Batch-norm for Tensor Cores.
---
src/gru_layer.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/src/gru_layer.c b/src/gru_layer.c
index 4c720ce..b78e868 100644
--- a/src/gru_layer.c
+++ b/src/gru_layer.c
@@ -85,7 +85,15 @@
l.z_cpu = calloc(outputs*batch, sizeof(float));
l.h_cpu = calloc(outputs*batch, sizeof(float));
+ l.forward = forward_gru_layer;
+ l.backward = backward_gru_layer;
+ l.update = update_gru_layer;
+
#ifdef GPU
+ l.forward_gpu = forward_gru_layer_gpu;
+ l.backward_gpu = backward_gru_layer_gpu;
+ l.update_gpu = update_gru_layer_gpu;
+
l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs);
l.forgot_delta_gpu = cuda_make_array(l.output, batch*outputs);
l.prev_state_gpu = cuda_make_array(l.output, batch*outputs);
--
Gitblit v1.10.0