From 564877ad6a3f53d3d866b0015237d07f4af2eaa2 Mon Sep 17 00:00:00 2001
From: vinjn <vinjn.z@gmail.com>
Date: Sat, 07 Jul 2018 04:30:45 +0000
Subject: [PATCH] cuda.h - converts tab to space

---
 src/softmax_layer.c |   63 ++++++++++++-------------------
 1 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 2a34cae..27f73fd 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -10,7 +10,7 @@
 softmax_layer make_softmax_layer(int batch, int inputs, int groups)
 {
     assert(inputs%groups == 0);
-    fprintf(stderr, "Softmax Layer: %d inputs\n", inputs);
+    fprintf(stderr, "softmax                                        %4d\n",  inputs);
     softmax_layer l = {0};
     l.type = SOFTMAX;
     l.batch = batch;
@@ -32,24 +32,30 @@
     return l;
 }
 
+void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output)
+{
+    int b;
+    for(b = 0; b < batch; ++b){
+        int i;
+        int count = 0;
+        for(i = 0; i < hierarchy->groups; ++i){
+            int group_size = hierarchy->group_size[i];
+            softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count, 1);
+            count += group_size;
+        }
+    }
+}
+
 void forward_softmax_layer(const softmax_layer l, network_state state)
 {
     int b;
     int inputs = l.inputs / l.groups;
     int batch = l.batch * l.groups;
     if(l.softmax_tree){
-        for(b = 0; b < batch; ++b){
-            int i;
-            int count = 0;
-            for(i = 0; i < l.softmax_tree->groups; ++i){
-                int group_size = l.softmax_tree->group_size[i];
-                softmax(state.input+b*inputs + count, group_size, l.temperature, l.output+b*inputs + count);
-                count += group_size;
-            }
-        }
+        softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
     } else {
         for(b = 0; b < batch; ++b){
-            softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
+            softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs, 1);
         }
     }
 }
@@ -73,37 +79,16 @@
 {
     int inputs = l.inputs / l.groups;
     int batch = l.batch * l.groups;
-    int b;
     if(l.softmax_tree){
-        if(0){
-            float *buff = calloc(inputs * batch, sizeof(float));
-            cuda_pull_array(state.input, buff, batch * inputs);
-            state.input = buff;
-            forward_softmax_layer(l, state);
-            cuda_push_array(l.output_gpu, l.output, batch*inputs);
-            free(buff);
-        } else {
-            int i;
-            const int nstreams = 32;
-            cudaStream_t streams[nstreams];
-            for (i = 0; i < nstreams; ++i) {
-                cudaStreamCreate(&streams[i]);
-            }
-            for (b = 0; b < batch; ++b) {
-                int i;
-                int count = 0;
-                for (i = 0; i < l.softmax_tree->groups; ++i) {
-                    int group_size = l.softmax_tree->group_size[i];
-                    softmax_gpu(state.input+b*inputs + count, group_size, 1, l.temperature, l.output_gpu+b*inputs + count, streams[(b*l.softmax_tree->groups + i) % nstreams]);
-                    count += group_size;
-                }
-            }
-            for(i = 0; i < nstreams; ++i){
-                cudaStreamDestroy(streams[i]);
-            }
+        int i;
+        int count = 0;
+        for (i = 0; i < l.softmax_tree->groups; ++i) {
+            int group_size = l.softmax_tree->group_size[i];
+            softmax_gpu(state.input+count, group_size, inputs, batch, l.temperature, l.output_gpu + count);
+            count += group_size;
         }
     } else {
-        softmax_gpu(state.input, inputs, batch, l.temperature, l.output_gpu, 0);
+        softmax_gpu(state.input, inputs, inputs, batch, l.temperature, l.output_gpu);
     }
 }
 

--
Gitblit v1.10.0