From aa5996d58e68edfbefe51061856aecd549dd09c4 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 13 Jan 2015 01:27:08 +0000
Subject: [PATCH] Faster
---
src/network_gpu.c | 120 ++++++-----------------------------------------------------
1 files changed, 13 insertions(+), 107 deletions(-)
diff --git a/src/network_gpu.c b/src/network_gpu.c
index c3f22d3..b53d534 100644
--- a/src/network_gpu.c
+++ b/src/network_gpu.c
@@ -17,15 +17,14 @@
#include "dropout_layer.h"
#ifdef GPU
+cl_mem get_network_output_cl_layer(network net, int i);
+cl_mem get_network_delta_cl_layer(network net, int i);
void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
{
- //printf("start\n");
int i;
- // printf("Truth: %f\n", cl_checksum(truth, 1000*net.batch));
for(i = 0; i < net.n; ++i){
- //printf("Truth %i: %f\n", i, cl_checksum(truth, 1000*net.batch));
- //clock_t time = clock();
+ clock_t time = clock();
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
forward_convolutional_layer_gpu(layer, input);
@@ -54,25 +53,15 @@
if(!train) continue;
dropout_layer layer = *(dropout_layer *)net.layers[i];
forward_dropout_layer_gpu(layer, input);
+ input = layer.output_cl;
}
else if(net.types[i] == CROP){
crop_layer layer = *(crop_layer *)net.layers[i];
forward_crop_layer_gpu(layer, input);
input = layer.output_cl;
}
- //printf("%d %f\n", i, sec(clock()-time));
- /*
- else if(net.types[i] == CROP){
- crop_layer layer = *(crop_layer *)net.layers[i];
- forward_crop_layer(layer, input);
- input = layer.output;
- }
- else if(net.types[i] == NORMALIZATION){
- normalization_layer layer = *(normalization_layer *)net.layers[i];
- forward_normalization_layer(layer, input);
- input = layer.output;
- }
- */
+ check_error(cl);
+ //printf("Forw %d %f\n", i, sec(clock() - time));
}
}
@@ -82,7 +71,7 @@
cl_mem prev_input;
cl_mem prev_delta;
for(i = net.n-1; i >= 0; --i){
- //clock_t time = clock();
+ clock_t time = clock();
if(i == 0){
prev_input = input;
prev_delta = 0;
@@ -114,7 +103,8 @@
softmax_layer layer = *(softmax_layer *)net.layers[i];
backward_softmax_layer_gpu(layer, prev_delta);
}
- //printf("back: %d %f\n", i, sec(clock()-time));
+ check_error(cl);
+ //printf("Back %d %f\n", i, sec(clock() - time));
}
}
@@ -155,7 +145,8 @@
softmax_layer layer = *(softmax_layer *)net.layers[i];
return layer.output_cl;
} else if(net.types[i] == DROPOUT){
- return get_network_output_cl_layer(net, i-1);
+ dropout_layer layer = *(dropout_layer *)net.layers[i];
+ return layer.output_cl;
}
return 0;
}
@@ -178,6 +169,7 @@
softmax_layer layer = *(softmax_layer *)net.layers[i];
return layer.delta_cl;
} else if(net.types[i] == DROPOUT){
+ if(i == 0) return 0;
return get_network_delta_cl_layer(net, i-1);
}
return 0;
@@ -187,7 +179,6 @@
{
int x_size = get_network_input_size(net)*net.batch;
int y_size = get_network_output_size(net)*net.batch;
- //clock_t time = clock();
if(!*net.input_cl){
*net.input_cl = cl_make_array(x, x_size);
*net.truth_cl = cl_make_array(y, y_size);
@@ -202,42 +193,6 @@
return error;
}
-float train_network_sgd_gpu(network net, data d, int n)
-{
- int batch = net.batch;
- float *X = calloc(batch*d.X.cols, sizeof(float));
- float *y = calloc(batch*d.y.cols, sizeof(float));
-
- int i;
- float sum = 0;
- for(i = 0; i < n; ++i){
- get_random_batch(d, batch, X, y);
- float err = train_network_datum_gpu(net, X, y);
- sum += err;
- }
- free(X);
- free(y);
- return (float)sum/(n*batch);
-}
-
-float train_network_data_gpu(network net, data d, int n)
-{
- int batch = net.batch;
- float *X = calloc(batch*d.X.cols, sizeof(float));
- float *y = calloc(batch*d.y.cols, sizeof(float));
-
- int i;
- float sum = 0;
- for(i = 0; i < n; ++i){
- get_next_batch(d, batch, i*batch, X, y);
- float err = train_network_datum_gpu(net, X, y);
- sum += err;
- }
- free(X);
- free(y);
- return (float)sum/(n*batch);
-}
-
float *get_network_output_layer_gpu(network net, int i)
{
if(net.types[i] == CONVOLUTIONAL){
@@ -246,6 +201,7 @@
}
else if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i];
+ cl_read_array(layer.output_cl, layer.output, layer.outputs*layer.batch);
return layer.output;
}
else if(net.types[i] == MAXPOOL){
@@ -278,54 +234,4 @@
return out;
}
-matrix network_predict_data_gpu(network net, data test)
-{
- int i,j,b;
- int k = get_network_output_size(net);
- matrix pred = make_matrix(test.X.rows, k);
- float *X = calloc(net.batch*test.X.cols, sizeof(float));
- for(i = 0; i < test.X.rows; i += net.batch){
- for(b = 0; b < net.batch; ++b){
- if(i+b == test.X.rows) break;
- memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
- }
- float *out = network_predict_gpu(net, X);
- for(b = 0; b < net.batch; ++b){
- if(i+b == test.X.rows) break;
- for(j = 0; j < k; ++j){
- pred.vals[i+b][j] = out[j+b*k];
- }
- }
- }
- free(X);
- return pred;
-}
-float network_accuracy_gpu(network net, data d)
-{
- matrix guess = network_predict_data_gpu(net, d);
- float acc = matrix_topk_accuracy(d.y, guess,1);
- free_matrix(guess);
- return acc;
-}
-
-float *network_accuracies_gpu(network net, data d)
-{
- static float acc[2];
- matrix guess = network_predict_data_gpu(net, d);
- acc[0] = matrix_topk_accuracy(d.y, guess,1);
- acc[1] = matrix_topk_accuracy(d.y, guess,5);
- free_matrix(guess);
- return acc;
-}
-
-
-#else
-void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train){}
-void backward_network_gpu(network net, cl_mem input){}
-void update_network_gpu(network net){}
-float train_network_sgd_gpu(network net, data d, int n){return 0;}
-float train_network_data_gpu(network net, data d, int n){return 0;}
-float *network_predict_gpu(network net, float *input){return 0;}
-float network_accuracy_gpu(network net, data d){return 0;}
-
#endif
--
Gitblit v1.10.0