From 08b757a0bf76efe8c76b453063a1bb19315bcaa6 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Wed, 14 Jan 2015 20:18:57 +0000
Subject: [PATCH] Stable, needs to be way faster
---
src/network_gpu.c | 127 +++++++-----------------------------------
1 files changed, 21 insertions(+), 106 deletions(-)
diff --git a/src/network_gpu.c b/src/network_gpu.c
index d09aa71..c958056 100644
--- a/src/network_gpu.c
+++ b/src/network_gpu.c
@@ -17,14 +17,13 @@
#include "dropout_layer.h"
#ifdef GPU
+cl_mem get_network_output_cl_layer(network net, int i);
+cl_mem get_network_delta_cl_layer(network net, int i);
void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
{
- //printf("start\n");
int i;
- // printf("Truth: %f\n", cl_checksum(truth, 1000*net.batch));
for(i = 0; i < net.n; ++i){
- //printf("Truth %i: %f\n", i, cl_checksum(truth, 1000*net.batch));
//clock_t time = clock();
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
@@ -54,20 +53,15 @@
if(!train) continue;
dropout_layer layer = *(dropout_layer *)net.layers[i];
forward_dropout_layer_gpu(layer, input);
+ input = layer.output_cl;
}
- //printf("%d %f\n", i, sec(clock()-time));
- /*
- else if(net.types[i] == CROP){
- crop_layer layer = *(crop_layer *)net.layers[i];
- forward_crop_layer(layer, input);
- input = layer.output;
- }
- else if(net.types[i] == NORMALIZATION){
- normalization_layer layer = *(normalization_layer *)net.layers[i];
- forward_normalization_layer(layer, input);
- input = layer.output;
- }
- */
+ else if(net.types[i] == CROP){
+ crop_layer layer = *(crop_layer *)net.layers[i];
+ forward_crop_layer_gpu(layer, input);
+ input = layer.output_cl;
+ }
+ check_error(cl);
+ //printf("Forward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time));
}
}
@@ -109,7 +103,8 @@
softmax_layer layer = *(softmax_layer *)net.layers[i];
backward_softmax_layer_gpu(layer, prev_delta);
}
- //printf("back: %d %f\n", i, sec(clock()-time));
+ check_error(cl);
+ //printf("Backward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time));
}
}
@@ -142,11 +137,16 @@
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
return layer.output_cl;
}
+ else if(net.types[i] == CROP){
+ crop_layer layer = *(crop_layer *)net.layers[i];
+ return layer.output_cl;
+ }
else if(net.types[i] == SOFTMAX){
softmax_layer layer = *(softmax_layer *)net.layers[i];
return layer.output_cl;
} else if(net.types[i] == DROPOUT){
- return get_network_output_cl_layer(net, i-1);
+ dropout_layer layer = *(dropout_layer *)net.layers[i];
+ return layer.output_cl;
}
return 0;
}
@@ -169,6 +169,7 @@
softmax_layer layer = *(softmax_layer *)net.layers[i];
return layer.delta_cl;
} else if(net.types[i] == DROPOUT){
+ if(i == 0) return 0;
return get_network_delta_cl_layer(net, i-1);
}
return 0;
@@ -178,7 +179,6 @@
{
int x_size = get_network_input_size(net)*net.batch;
int y_size = get_network_output_size(net)*net.batch;
- //clock_t time = clock();
if(!*net.input_cl){
*net.input_cl = cl_make_array(x, x_size);
*net.truth_cl = cl_make_array(y, y_size);
@@ -193,42 +193,6 @@
return error;
}
-float train_network_sgd_gpu(network net, data d, int n)
-{
- int batch = net.batch;
- float *X = calloc(batch*d.X.cols, sizeof(float));
- float *y = calloc(batch*d.y.cols, sizeof(float));
-
- int i;
- float sum = 0;
- for(i = 0; i < n; ++i){
- get_random_batch(d, batch, X, y);
- float err = train_network_datum_gpu(net, X, y);
- sum += err;
- }
- free(X);
- free(y);
- return (float)sum/(n*batch);
-}
-
-float train_network_data_gpu(network net, data d, int n)
-{
- int batch = net.batch;
- float *X = calloc(batch*d.X.cols, sizeof(float));
- float *y = calloc(batch*d.y.cols, sizeof(float));
-
- int i;
- float sum = 0;
- for(i = 0; i < n; ++i){
- get_next_batch(d, batch, i*batch, X, y);
- float err = train_network_datum_gpu(net, X, y);
- sum += err;
- }
- free(X);
- free(y);
- return (float)sum/(n*batch);
-}
-
float *get_network_output_layer_gpu(network net, int i)
{
if(net.types[i] == CONVOLUTIONAL){
@@ -237,6 +201,7 @@
}
else if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i];
+ cl_read_array(layer.output_cl, layer.output, layer.outputs*layer.batch);
return layer.output;
}
else if(net.types[i] == MAXPOOL){
@@ -260,7 +225,7 @@
float *network_predict_gpu(network net, float *input)
{
-
+
int size = get_network_input_size(net) * net.batch;
cl_mem input_cl = cl_make_array(input, size);
forward_network_gpu(net, input_cl, 0, 0);
@@ -269,54 +234,4 @@
return out;
}
-matrix network_predict_data_gpu(network net, data test)
-{
- int i,j,b;
- int k = get_network_output_size(net);
- matrix pred = make_matrix(test.X.rows, k);
- float *X = calloc(net.batch*test.X.cols, sizeof(float));
- for(i = 0; i < test.X.rows; i += net.batch){
- for(b = 0; b < net.batch; ++b){
- if(i+b == test.X.rows) break;
- memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
- }
- float *out = network_predict_gpu(net, X);
- for(b = 0; b < net.batch; ++b){
- if(i+b == test.X.rows) break;
- for(j = 0; j < k; ++j){
- pred.vals[i+b][j] = out[j+b*k];
- }
- }
- }
- free(X);
- return pred;
-}
-float network_accuracy_gpu(network net, data d)
-{
- matrix guess = network_predict_data_gpu(net, d);
- float acc = matrix_topk_accuracy(d.y, guess,1);
- free_matrix(guess);
- return acc;
-}
-
-float *network_accuracies_gpu(network net, data d)
-{
- static float acc[2];
- matrix guess = network_predict_data_gpu(net, d);
- acc[0] = matrix_topk_accuracy(d.y, guess,1);
- acc[1] = matrix_topk_accuracy(d.y, guess,5);
- free_matrix(guess);
- return acc;
-}
-
-
-#else
-void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train){}
-void backward_network_gpu(network net, cl_mem input){}
-void update_network_gpu(network net){}
-float train_network_sgd_gpu(network net, data d, int n){return 0;}
-float train_network_data_gpu(network net, data d, int n){return 0;}
-float *network_predict_gpu(network net, float *input){return 0;}
-float network_accuracy_gpu(network net, data d){return 0;}
-
#endif
--
Gitblit v1.10.0