From a284a7da8d1facbf984a22302665a2a50295a687 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Wed, 08 Aug 2018 16:08:58 +0000
Subject: [PATCH] Try to use avx_hs() - slow and requires alignment 4096 bits < (l.size*l.size*l.c) May be faster only from 8192 bits and more.
---
src/convolutional_layer.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index bbc4807..0bde97a 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -684,6 +684,8 @@
// transpose B from NxK to KxN (x-axis (ldb = l.size*l.size*l.c) - should be multiple of 8 bits)
{
size_t ldb_align = 256;// 8;
+ if (k > 4096)ldb_align = 4096;
+
size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8;
size_t t_intput_size = new_ldb * n;
size_t t_bit_input_size = t_intput_size / 8;// +1;
--
Gitblit v1.10.0