| | |
| | | |
| | | #include "crop_layer.h" |
| | | #include "connected_layer.h" |
| | | #include "detection_layer.h" |
| | | #include "convolutional_layer.h" |
| | | #include "deconvolutional_layer.h" |
| | | #include "maxpool_layer.h" |
| | | #include "cost_layer.h" |
| | | #include "normalization_layer.h" |
| | |
| | | |
| | | extern "C" float * get_network_output_gpu_layer(network net, int i); |
| | | extern "C" float * get_network_delta_gpu_layer(network net, int i); |
| | | float *get_network_output_gpu(network net); |
| | | |
| | | void forward_network_gpu(network net, float * input, float * truth, int train) |
| | | { |
| | |
| | | forward_convolutional_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | forward_deconvolutional_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | forward_cost_layer_gpu(layer, input, truth); |
| | |
| | | forward_connected_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | forward_detection_layer_gpu(layer, input, truth); |
| | | input = layer.output_gpu; |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | forward_maxpool_layer_gpu(layer, input); |
| | |
| | | } |
| | | else if(net.types[i] == CROP){ |
| | | crop_layer layer = *(crop_layer *)net.layers[i]; |
| | | forward_crop_layer_gpu(layer, input); |
| | | forward_crop_layer_gpu(layer, train, input); |
| | | input = layer.output_gpu; |
| | | } |
| | | //cudaDeviceSynchronize(); |
| | | //printf("Forward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); |
| | | } |
| | | } |
| | | |
| | | void backward_network_gpu(network net, float * input) |
| | | void backward_network_gpu(network net, float * input, float *truth) |
| | | { |
| | | int i; |
| | | float * prev_input; |
| | |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | backward_convolutional_layer_gpu(layer, prev_input, prev_delta); |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | backward_deconvolutional_layer_gpu(layer, prev_input, prev_delta); |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | backward_cost_layer_gpu(layer, prev_input, prev_delta); |
| | |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | backward_connected_layer_gpu(layer, prev_input, prev_delta); |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | backward_detection_layer_gpu(layer, prev_input, prev_delta); |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | backward_maxpool_layer_gpu(layer, prev_delta); |
| | |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | update_convolutional_layer_gpu(layer); |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | update_deconvolutional_layer_gpu(layer); |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | update_connected_layer_gpu(layer); |
| | |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | return layer.delta_gpu; |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | return layer.delta_gpu; |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | return layer.delta_gpu; |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | return layer.delta_gpu; |
| | |
| | | |
| | | float train_network_datum_gpu(network net, float *x, float *y) |
| | | { |
| | | //clock_t time = clock(); |
| | | int x_size = get_network_input_size(net)*net.batch; |
| | | int y_size = get_network_output_size(net)*net.batch; |
| | | if(!*net.input_gpu){ |
| | |
| | | cuda_push_array(*net.input_gpu, x, x_size); |
| | | cuda_push_array(*net.truth_gpu, y, y_size); |
| | | } |
| | | //printf("trans %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1); |
| | | backward_network_gpu(net, *net.input_gpu); |
| | | //printf("forw %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | backward_network_gpu(net, *net.input_gpu, *net.truth_gpu); |
| | | //printf("back %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | update_network_gpu(net); |
| | | float error = get_network_cost(net); |
| | | |
| | | //print_letters(y, 50); |
| | | //float *out = get_network_output_gpu(net); |
| | | //print_letters(out, 50); |
| | | //printf("updt %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | return error; |
| | | } |
| | | |
| | |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | return layer.output; |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | return layer.output; |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | cuda_pull_array(layer.output_gpu, layer.output, layer.outputs*layer.batch); |
| | | return layer.output; |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | int outputs = get_detection_layer_output_size(layer); |
| | | cuda_pull_array(layer.output_gpu, layer.output, outputs*layer.batch); |
| | | return layer.output; |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | return layer.output; |