From 158bb1bee9951875dbe3474d84c6663431e18301 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 21 Oct 2014 21:49:18 +0000
Subject: [PATCH] softmax on gpu

---
 src/network.c |   71 ++++++++++++++++++++++++-----------
 1 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/src/network.c b/src/network.c
index f9b4667..6696769 100644
--- a/src/network.c
+++ b/src/network.c
@@ -1,4 +1,5 @@
 #include <stdio.h>
+#include <time.h>
 #include "network.h"
 #include "image.h"
 #include "data.h"
@@ -31,8 +32,10 @@
 }
 
 #ifdef GPU
+
 void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
 {
+    //printf("start\n");
     int i;
     for(i = 0; i < net.n; ++i){
         if(net.types[i] == CONVOLUTIONAL){
@@ -49,28 +52,28 @@
             forward_connected_layer_gpu(layer, input);
             input = layer.output_cl;
         }
-        /*
-        else if(net.types[i] == SOFTMAX){
-            softmax_layer layer = *(softmax_layer *)net.layers[i];
-            forward_softmax_layer(layer, input);
-            input = layer.output;
-        }
-        else if(net.types[i] == CROP){
-            crop_layer layer = *(crop_layer *)net.layers[i];
-            forward_crop_layer(layer, input);
-            input = layer.output;
-        }
         else if(net.types[i] == MAXPOOL){
             maxpool_layer layer = *(maxpool_layer *)net.layers[i];
-            forward_maxpool_layer(layer, input);
-            input = layer.output;
+            forward_maxpool_layer_gpu(layer, input);
+            input = layer.output_cl;
         }
-        else if(net.types[i] == NORMALIZATION){
-            normalization_layer layer = *(normalization_layer *)net.layers[i];
-            forward_normalization_layer(layer, input);
-            input = layer.output;
+        else if(net.types[i] == SOFTMAX){
+            softmax_layer layer = *(softmax_layer *)net.layers[i];
+            forward_softmax_layer_gpu(layer, input);
+            input = layer.output_cl;
         }
-        */
+        /*
+           else if(net.types[i] == CROP){
+           crop_layer layer = *(crop_layer *)net.layers[i];
+           forward_crop_layer(layer, input);
+           input = layer.output;
+           }
+           else if(net.types[i] == NORMALIZATION){
+           normalization_layer layer = *(normalization_layer *)net.layers[i];
+           forward_normalization_layer(layer, input);
+           input = layer.output;
+           }
+         */
     }
 }
 
@@ -99,6 +102,14 @@
             connected_layer layer = *(connected_layer *)net.layers[i];
             backward_connected_layer_gpu(layer, prev_input, prev_delta);
         }
+        else if(net.types[i] == MAXPOOL){
+            maxpool_layer layer = *(maxpool_layer *)net.layers[i];
+            backward_maxpool_layer_gpu(layer, prev_delta);
+        }
+        else if(net.types[i] == SOFTMAX){
+            softmax_layer layer = *(softmax_layer *)net.layers[i];
+            backward_softmax_layer_gpu(layer, prev_delta);
+        }
     }
 }
 
@@ -127,6 +138,14 @@
         connected_layer layer = *(connected_layer *)net.layers[i];
         return layer.output_cl;
     }
+    else if(net.types[i] == MAXPOOL){
+        maxpool_layer layer = *(maxpool_layer *)net.layers[i];
+        return layer.output_cl;
+    }
+    else if(net.types[i] == SOFTMAX){
+        softmax_layer layer = *(softmax_layer *)net.layers[i];
+        return layer.output_cl;
+    }
     return 0;
 }
 
@@ -140,6 +159,14 @@
         connected_layer layer = *(connected_layer *)net.layers[i];
         return layer.delta_cl;
     }
+    else if(net.types[i] == MAXPOOL){
+        maxpool_layer layer = *(maxpool_layer *)net.layers[i];
+        return layer.delta_cl;
+    }
+    else if(net.types[i] == SOFTMAX){
+        softmax_layer layer = *(softmax_layer *)net.layers[i];
+        return layer.delta_cl;
+    }
     return 0;
 }
 
@@ -330,7 +357,7 @@
         }
         else if(net.types[i] == MAXPOOL){
             maxpool_layer layer = *(maxpool_layer *)net.layers[i];
-            if(i != 0) backward_maxpool_layer(layer, prev_input, prev_delta);
+            if(i != 0) backward_maxpool_layer(layer, prev_delta);
         }
         else if(net.types[i] == NORMALIZATION){
             normalization_layer layer = *(normalization_layer *)net.layers[i];
@@ -338,7 +365,7 @@
         }
         else if(net.types[i] == SOFTMAX){
             softmax_layer layer = *(softmax_layer *)net.layers[i];
-            if(i != 0) backward_softmax_layer(layer, prev_input, prev_delta);
+            if(i != 0) backward_softmax_layer(layer, prev_delta);
         }
         else if(net.types[i] == CONNECTED){
             connected_layer layer = *(connected_layer *)net.layers[i];
@@ -351,6 +378,7 @@
     }
 }
 
+
 #ifdef GPU
 float train_network_datum_gpu(network net, float *x, float *y)
 {
@@ -364,13 +392,12 @@
         cl_write_array(*net.truth_cl, y, y_size);
     }
     forward_network_gpu(net, *net.input_cl, *net.truth_cl, 1);
-    //int class = get_predicted_class_network(net);
     backward_network_gpu(net, *net.input_cl);
     float error = get_network_cost(net);
     update_network_gpu(net);
-    //return (y[class]?1:0);
     return error;
 }
+
 float train_network_sgd_gpu(network net, data d, int n)
 {
     int batch = net.batch;

--
Gitblit v1.10.0