refactoring and added DARK ZONE
34 files modified
1 files added
2 files deleted
| | |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = 1024; |
| | | int i = net.seen/imgs; |
| | | list *plist = get_paths("/data/captcha/train.list"); |
| | | list *plist = get_paths("/data/captcha/train.base"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | printf("%d\n", plist->size); |
| | | clock_t time; |
| | |
| | | #include <stdlib.h> |
| | | #include <string.h> |
| | | |
| | | connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay) |
| | | connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation) |
| | | { |
| | | int i; |
| | | connected_layer *layer = calloc(1, sizeof(connected_layer)); |
| | | |
| | | layer->learning_rate = learning_rate; |
| | | layer->momentum = momentum; |
| | | layer->decay = decay; |
| | | |
| | | layer->inputs = inputs; |
| | | layer->outputs = outputs; |
| | | layer->batch=batch; |
| | |
| | | return layer; |
| | | } |
| | | |
| | | void secret_update_connected_layer(connected_layer *layer) |
| | | void update_connected_layer(connected_layer layer, float learning_rate, float momentum, float decay) |
| | | { |
| | | int n = layer->outputs*layer->inputs; |
| | | float dot = dot_cpu(n, layer->weight_updates, 1, layer->weight_prev, 1); |
| | | float mag = sqrt(dot_cpu(n, layer->weight_updates, 1, layer->weight_updates, 1)) |
| | | * sqrt(dot_cpu(n, layer->weight_prev, 1, layer->weight_prev, 1)); |
| | | float cos = dot/mag; |
| | | if(cos > .3) layer->learning_rate *= 1.1; |
| | | else if (cos < -.3) layer-> learning_rate /= 1.1; |
| | | axpy_cpu(layer.outputs, learning_rate, layer.bias_updates, 1, layer.biases, 1); |
| | | scal_cpu(layer.outputs, momentum, layer.bias_updates, 1); |
| | | |
| | | scal_cpu(n, layer->momentum, layer->weight_prev, 1); |
| | | axpy_cpu(n, 1, layer->weight_updates, 1, layer->weight_prev, 1); |
| | | scal_cpu(n, 0, layer->weight_updates, 1); |
| | | |
| | | scal_cpu(layer->outputs, layer->momentum, layer->bias_prev, 1); |
| | | axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1); |
| | | scal_cpu(layer->outputs, 0, layer->bias_updates, 1); |
| | | |
| | | axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1); |
| | | |
| | | axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1); |
| | | axpy_cpu(layer->inputs*layer->outputs, layer->learning_rate, layer->weight_prev, 1, layer->weights, 1); |
| | | axpy_cpu(layer.inputs*layer.outputs, -decay, layer.weights, 1, layer.weight_updates, 1); |
| | | axpy_cpu(layer.inputs*layer.outputs, learning_rate, layer.weight_updates, 1, layer.weights, 1); |
| | | scal_cpu(layer.inputs*layer.outputs, momentum, layer.weight_updates, 1); |
| | | } |
| | | |
| | | void update_connected_layer(connected_layer layer) |
| | | { |
| | | axpy_cpu(layer.outputs, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1); |
| | | scal_cpu(layer.outputs, layer.momentum, layer.bias_updates, 1); |
| | | |
| | | axpy_cpu(layer.inputs*layer.outputs, -layer.decay, layer.weights, 1, layer.weight_updates, 1); |
| | | axpy_cpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates, 1, layer.weights, 1); |
| | | scal_cpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates, 1); |
| | | } |
| | | |
| | | void forward_connected_layer(connected_layer layer, float *input) |
| | | void forward_connected_layer(connected_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | for(i = 0; i < layer.batch; ++i){ |
| | |
| | | int m = layer.batch; |
| | | int k = layer.inputs; |
| | | int n = layer.outputs; |
| | | float *a = input; |
| | | float *a = state.input; |
| | | float *b = layer.weights; |
| | | float *c = layer.output; |
| | | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | activate_array(layer.output, layer.outputs*layer.batch, layer.activation); |
| | | } |
| | | |
| | | void backward_connected_layer(connected_layer layer, float *input, float *delta) |
| | | void backward_connected_layer(connected_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | float alpha = 1./layer.batch; |
| | |
| | | int m = layer.inputs; |
| | | int k = layer.batch; |
| | | int n = layer.outputs; |
| | | float *a = input; |
| | | float *a = state.input; |
| | | float *b = layer.delta; |
| | | float *c = layer.weight_updates; |
| | | gemm(1,0,m,n,k,alpha,a,m,b,n,1,c,n); |
| | |
| | | |
| | | a = layer.delta; |
| | | b = layer.weights; |
| | | c = delta; |
| | | c = state.delta; |
| | | |
| | | if(c) gemm(0,1,m,n,k,1,a,k,b,k,0,c,n); |
| | | } |
| | |
| | | cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.outputs); |
| | | } |
| | | |
| | | void update_connected_layer_gpu(connected_layer layer) |
| | | void update_connected_layer_gpu(connected_layer layer, float learning_rate, float momentum, float decay) |
| | | { |
| | | /* |
| | | cuda_pull_array(layer.weights_gpu, layer.weights, layer.inputs*layer.outputs); |
| | | cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.inputs*layer.outputs); |
| | | printf("Weights: %f updates: %f\n", mag_array(layer.weights, layer.inputs*layer.outputs), layer.learning_rate*mag_array(layer.weight_updates, layer.inputs*layer.outputs)); |
| | | */ |
| | | axpy_ongpu(layer.outputs, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); |
| | | scal_ongpu(layer.outputs, momentum, layer.bias_updates_gpu, 1); |
| | | |
| | | axpy_ongpu(layer.outputs, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); |
| | | scal_ongpu(layer.outputs, layer.momentum, layer.bias_updates_gpu, 1); |
| | | |
| | | axpy_ongpu(layer.inputs*layer.outputs, -layer.decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); |
| | | axpy_ongpu(layer.inputs*layer.outputs, layer.learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); |
| | | scal_ongpu(layer.inputs*layer.outputs, layer.momentum, layer.weight_updates_gpu, 1); |
| | | axpy_ongpu(layer.inputs*layer.outputs, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); |
| | | axpy_ongpu(layer.inputs*layer.outputs, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); |
| | | scal_ongpu(layer.inputs*layer.outputs, momentum, layer.weight_updates_gpu, 1); |
| | | } |
| | | |
| | | void forward_connected_layer_gpu(connected_layer layer, float * input) |
| | | void forward_connected_layer_gpu(connected_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | for(i = 0; i < layer.batch; ++i){ |
| | |
| | | int m = layer.batch; |
| | | int k = layer.inputs; |
| | | int n = layer.outputs; |
| | | float * a = input; |
| | | float * a = state.input; |
| | | float * b = layer.weights_gpu; |
| | | float * c = layer.output_gpu; |
| | | gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | activate_array_ongpu(layer.output_gpu, layer.outputs*layer.batch, layer.activation); |
| | | } |
| | | |
| | | void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta) |
| | | void backward_connected_layer_gpu(connected_layer layer, network_state state) |
| | | { |
| | | float alpha = 1./layer.batch; |
| | | int i; |
| | |
| | | int m = layer.inputs; |
| | | int k = layer.batch; |
| | | int n = layer.outputs; |
| | | float * a = input; |
| | | float * a = state.input; |
| | | float * b = layer.delta_gpu; |
| | | float * c = layer.weight_updates_gpu; |
| | | gemm_ongpu(1,0,m,n,k,alpha,a,m,b,n,1,c,n); |
| | |
| | | |
| | | a = layer.delta_gpu; |
| | | b = layer.weights_gpu; |
| | | c = delta; |
| | | c = state.delta; |
| | | |
| | | if(c) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,0,c,n); |
| | | } |
| | |
| | | #define CONNECTED_LAYER_H |
| | | |
| | | #include "activations.h" |
| | | #include "params.h" |
| | | |
| | | typedef struct{ |
| | | float learning_rate; |
| | | float momentum; |
| | | float decay; |
| | | |
| | | int batch; |
| | | int inputs; |
| | | int outputs; |
| | |
| | | |
| | | } connected_layer; |
| | | |
| | | void secret_update_connected_layer(connected_layer *layer); |
| | | connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay); |
| | | connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation); |
| | | |
| | | void forward_connected_layer(connected_layer layer, float *input); |
| | | void backward_connected_layer(connected_layer layer, float *input, float *delta); |
| | | void update_connected_layer(connected_layer layer); |
| | | void forward_connected_layer(connected_layer layer, network_state state); |
| | | void backward_connected_layer(connected_layer layer, network_state state); |
| | | void update_connected_layer(connected_layer layer, float learning_rate, float momentum, float decay); |
| | | |
| | | #ifdef GPU |
| | | void forward_connected_layer_gpu(connected_layer layer, float * input); |
| | | void backward_connected_layer_gpu(connected_layer layer, float * input, float * delta); |
| | | void update_connected_layer_gpu(connected_layer layer); |
| | | void forward_connected_layer_gpu(connected_layer layer, network_state state); |
| | | void backward_connected_layer_gpu(connected_layer layer, network_state state); |
| | | void update_connected_layer_gpu(connected_layer layer, float learning_rate, float momentum, float decay); |
| | | void push_connected_layer(connected_layer layer); |
| | | void pull_connected_layer(connected_layer layer); |
| | | #endif |
| | |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | | extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, float *in) |
| | | extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | int m = layer.n; |
| | |
| | | bias_output_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, n); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | im2col_ongpu(in + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); |
| | | im2col_ongpu(state.input + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); |
| | | float * a = layer.filters_gpu; |
| | | float * b = layer.col_image_gpu; |
| | | float * c = layer.output_gpu; |
| | |
| | | activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation); |
| | | } |
| | | |
| | | extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu) |
| | | extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state) |
| | | { |
| | | float alpha = 1./layer.batch; |
| | | int i; |
| | |
| | | gradient_array_ongpu(layer.output_gpu, m*k*layer.batch, layer.activation, layer.delta_gpu); |
| | | backward_bias_gpu(layer.bias_updates_gpu, layer.delta_gpu, layer.batch, layer.n, k); |
| | | |
| | | if(delta_gpu) scal_ongpu(layer.batch*layer.h*layer.w*layer.c, 0, delta_gpu, 1); |
| | | if(state.delta) scal_ongpu(layer.batch*layer.h*layer.w*layer.c, 0, state.delta, 1); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | float * a = layer.delta_gpu; |
| | | float * b = layer.col_image_gpu; |
| | | float * c = layer.filter_updates_gpu; |
| | | |
| | | im2col_ongpu(in + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); |
| | | im2col_ongpu(state.input + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); |
| | | gemm_ongpu(0,1,m,n,k,alpha,a + i*m*k,k,b,k,1,c,n); |
| | | |
| | | if(delta_gpu){ |
| | | if(state.delta){ |
| | | |
| | | float * a = layer.filters_gpu; |
| | | float * b = layer.delta_gpu; |
| | |
| | | |
| | | gemm_ongpu(1,0,n,k,m,1,a,n,b + i*k*m,k,0,c,k); |
| | | |
| | | col2im_ongpu(layer.col_image_gpu, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta_gpu + i*layer.c*layer.h*layer.w); |
| | | col2im_ongpu(layer.col_image_gpu, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, state.delta + i*layer.c*layer.h*layer.w); |
| | | } |
| | | } |
| | | } |
| | |
| | | cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); |
| | | } |
| | | |
| | | extern "C" void update_convolutional_layer_gpu(convolutional_layer layer) |
| | | extern "C" void update_convolutional_layer_gpu(convolutional_layer layer, float learning_rate, float momentum, float decay) |
| | | { |
| | | int size = layer.size*layer.size*layer.c*layer.n; |
| | | |
| | | /* |
| | | cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size); |
| | | cuda_pull_array(layer.filters_gpu, layer.filters, size); |
| | | printf("Filter: %f updates: %f\n", mag_array(layer.filters, size), layer.learning_rate*mag_array(layer.filter_updates, size)); |
| | | */ |
| | | axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); |
| | | scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); |
| | | |
| | | axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); |
| | | scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1); |
| | | |
| | | axpy_ongpu(size, -layer.decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); |
| | | axpy_ongpu(size, layer.learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); |
| | | scal_ongpu(size, layer.momentum, layer.filter_updates_gpu, 1); |
| | | //pull_convolutional_layer(layer); |
| | | axpy_ongpu(size, -decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); |
| | | axpy_ongpu(size, learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); |
| | | scal_ongpu(size, momentum, layer.filter_updates_gpu, 1); |
| | | } |
| | | |
| | |
| | | return float_to_image(h,w,c,layer.delta); |
| | | } |
| | | |
| | | convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay) |
| | | convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) |
| | | { |
| | | int i; |
| | | convolutional_layer *layer = calloc(1, sizeof(convolutional_layer)); |
| | | |
| | | layer->learning_rate = learning_rate; |
| | | layer->momentum = momentum; |
| | | layer->decay = decay; |
| | | |
| | | layer->h = h; |
| | | layer->w = w; |
| | | layer->c = c; |
| | |
| | | } |
| | | |
| | | |
| | | void forward_convolutional_layer(const convolutional_layer layer, float *in) |
| | | void forward_convolutional_layer(const convolutional_layer layer, network_state state) |
| | | { |
| | | int out_h = convolutional_out_height(layer); |
| | | int out_w = convolutional_out_width(layer); |
| | |
| | | float *c = layer.output; |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | im2col_cpu(in, layer.c, layer.h, layer.w, |
| | | im2col_cpu(state.input, layer.c, layer.h, layer.w, |
| | | layer.size, layer.stride, layer.pad, b); |
| | | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | c += n*m; |
| | | in += layer.c*layer.h*layer.w; |
| | | state.input += layer.c*layer.h*layer.w; |
| | | } |
| | | activate_array(layer.output, m*n*layer.batch, layer.activation); |
| | | } |
| | | |
| | | void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta) |
| | | void backward_convolutional_layer(convolutional_layer layer, network_state state) |
| | | { |
| | | float alpha = 1./layer.batch; |
| | | int i; |
| | |
| | | gradient_array(layer.output, m*k*layer.batch, layer.activation, layer.delta); |
| | | backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, k); |
| | | |
| | | if(delta) memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | float *a = layer.delta + i*m*k; |
| | | float *b = layer.col_image; |
| | | float *c = layer.filter_updates; |
| | | |
| | | float *im = in+i*layer.c*layer.h*layer.w; |
| | | float *im = state.input+i*layer.c*layer.h*layer.w; |
| | | |
| | | im2col_cpu(im, layer.c, layer.h, layer.w, |
| | | layer.size, layer.stride, layer.pad, b); |
| | | gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n); |
| | | |
| | | if(delta){ |
| | | if(state.delta){ |
| | | a = layer.filters; |
| | | b = layer.delta + i*m*k; |
| | | c = layer.col_image; |
| | | |
| | | gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); |
| | | |
| | | col2im_cpu(layer.col_image, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta+i*layer.c*layer.h*layer.w); |
| | | col2im_cpu(layer.col_image, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, state.delta+i*layer.c*layer.h*layer.w); |
| | | } |
| | | } |
| | | } |
| | | |
| | | void update_convolutional_layer(convolutional_layer layer) |
| | | void update_convolutional_layer(convolutional_layer layer, float learning_rate, float momentum, float decay) |
| | | { |
| | | int size = layer.size*layer.size*layer.c*layer.n; |
| | | axpy_cpu(layer.n, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1); |
| | | scal_cpu(layer.n, layer.momentum, layer.bias_updates, 1); |
| | | axpy_cpu(layer.n, learning_rate, layer.bias_updates, 1, layer.biases, 1); |
| | | scal_cpu(layer.n, momentum, layer.bias_updates, 1); |
| | | |
| | | axpy_cpu(size, -layer.decay, layer.filters, 1, layer.filter_updates, 1); |
| | | axpy_cpu(size, layer.learning_rate, layer.filter_updates, 1, layer.filters, 1); |
| | | scal_cpu(size, layer.momentum, layer.filter_updates, 1); |
| | | axpy_cpu(size, -decay, layer.filters, 1, layer.filter_updates, 1); |
| | | axpy_cpu(size, learning_rate, layer.filter_updates, 1, layer.filters, 1); |
| | | scal_cpu(size, momentum, layer.filter_updates, 1); |
| | | } |
| | | |
| | | |
| | |
| | | #define CONVOLUTIONAL_LAYER_H |
| | | |
| | | #include "cuda.h" |
| | | #include "params.h" |
| | | #include "image.h" |
| | | #include "activations.h" |
| | | |
| | | typedef struct { |
| | | float learning_rate; |
| | | float momentum; |
| | | float decay; |
| | | |
| | | int batch; |
| | | int h,w,c; |
| | | int n; |
| | |
| | | } convolutional_layer; |
| | | |
| | | #ifdef GPU |
| | | void forward_convolutional_layer_gpu(convolutional_layer layer, float * in); |
| | | void backward_convolutional_layer_gpu(convolutional_layer layer, float * in, float * delta_gpu); |
| | | void update_convolutional_layer_gpu(convolutional_layer layer); |
| | | void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state); |
| | | void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state); |
| | | void update_convolutional_layer_gpu(convolutional_layer layer, float learning_rate, float momentum, float decay); |
| | | |
| | | void push_convolutional_layer(convolutional_layer layer); |
| | | void pull_convolutional_layer(convolutional_layer layer); |
| | |
| | | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); |
| | | #endif |
| | | |
| | | convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay); |
| | | convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); |
| | | void resize_convolutional_layer(convolutional_layer *layer, int h, int w); |
| | | void forward_convolutional_layer(const convolutional_layer layer, float *in); |
| | | void update_convolutional_layer(convolutional_layer layer); |
| | | void forward_convolutional_layer(const convolutional_layer layer, network_state state); |
| | | void update_convolutional_layer(convolutional_layer layer, float learning_rate, float momentum, float decay); |
| | | image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters); |
| | | |
| | | void backward_convolutional_layer(convolutional_layer layer, float *in, float *delta); |
| | | void backward_convolutional_layer(convolutional_layer layer, network_state state); |
| | | |
| | | void bias_output(float *output, float *biases, int batch, int n, int size); |
| | | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); |
| | |
| | | cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); |
| | | } |
| | | |
| | | void forward_cost_layer(cost_layer layer, float *input, float *truth) |
| | | void forward_cost_layer(cost_layer layer, network_state state) |
| | | { |
| | | if (!truth) return; |
| | | copy_cpu(layer.batch*layer.inputs, truth, 1, layer.delta, 1); |
| | | axpy_cpu(layer.batch*layer.inputs, -1, input, 1, layer.delta, 1); |
| | | if (!state.truth) return; |
| | | copy_cpu(layer.batch*layer.inputs, state.truth, 1, layer.delta, 1); |
| | | axpy_cpu(layer.batch*layer.inputs, -1, state.input, 1, layer.delta, 1); |
| | | *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); |
| | | //printf("cost: %f\n", *layer.output); |
| | | } |
| | | |
| | | void backward_cost_layer(const cost_layer layer, float *input, float *delta) |
| | | void backward_cost_layer(const cost_layer layer, network_state state) |
| | | { |
| | | copy_cpu(layer.batch*layer.inputs, layer.delta, 1, delta, 1); |
| | | copy_cpu(layer.batch*layer.inputs, layer.delta, 1, state.delta, 1); |
| | | } |
| | | |
| | | #ifdef GPU |
| | | |
| | | void forward_cost_layer_gpu(cost_layer layer, float * input, float * truth) |
| | | void forward_cost_layer_gpu(cost_layer layer, network_state state) |
| | | { |
| | | if (!truth) return; |
| | | if (!state.truth) return; |
| | | |
| | | /* |
| | | float *in = calloc(layer.inputs*layer.batch, sizeof(float)); |
| | | float *t = calloc(layer.inputs*layer.batch, sizeof(float)); |
| | | cuda_pull_array(input, in, layer.batch*layer.inputs); |
| | | cuda_pull_array(truth, t, layer.batch*layer.inputs); |
| | | forward_cost_layer(layer, in, t); |
| | | cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); |
| | | free(in); |
| | | free(t); |
| | | */ |
| | | |
| | | copy_ongpu(layer.batch*layer.inputs, truth, 1, layer.delta_gpu, 1); |
| | | axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_gpu, 1); |
| | | copy_ongpu(layer.batch*layer.inputs, state.truth, 1, layer.delta_gpu, 1); |
| | | axpy_ongpu(layer.batch*layer.inputs, -1, state.input, 1, layer.delta_gpu, 1); |
| | | |
| | | cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); |
| | | *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); |
| | | //printf("cost: %f\n", *layer.output); |
| | | } |
| | | |
| | | void backward_cost_layer_gpu(const cost_layer layer, float * input, float * delta) |
| | | void backward_cost_layer_gpu(const cost_layer layer, network_state state) |
| | | { |
| | | copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, delta, 1); |
| | | copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1); |
| | | } |
| | | #endif |
| | | |
| | |
| | | #ifndef COST_LAYER_H |
| | | #define COST_LAYER_H |
| | | #include "params.h" |
| | | |
| | | typedef enum{ |
| | | SSE |
| | |
| | | COST_TYPE get_cost_type(char *s); |
| | | char *get_cost_string(COST_TYPE a); |
| | | cost_layer *make_cost_layer(int batch, int inputs, COST_TYPE type); |
| | | void forward_cost_layer(const cost_layer layer, float *input, float *truth); |
| | | void backward_cost_layer(const cost_layer layer, float *input, float *delta); |
| | | void forward_cost_layer(const cost_layer layer, network_state state); |
| | | void backward_cost_layer(const cost_layer layer, network_state state); |
| | | |
| | | #ifdef GPU |
| | | void forward_cost_layer_gpu(cost_layer layer, float * input, float * truth); |
| | | void backward_cost_layer_gpu(const cost_layer layer, float * input, float * delta); |
| | | void forward_cost_layer_gpu(cost_layer layer, network_state state); |
| | | void backward_cost_layer_gpu(const cost_layer layer, network_state state); |
| | | #endif |
| | | |
| | | #endif |
| | |
| | | return layer; |
| | | } |
| | | |
| | | void forward_crop_layer(const crop_layer layer, int train, float *input) |
| | | void forward_crop_layer(const crop_layer layer, network_state state) |
| | | { |
| | | int i,j,c,b,row,col; |
| | | int index; |
| | |
| | | int flip = (layer.flip && rand()%2); |
| | | int dh = rand()%(layer.h - layer.crop_height + 1); |
| | | int dw = rand()%(layer.w - layer.crop_width + 1); |
| | | if(!train){ |
| | | if(!state.train){ |
| | | flip = 0; |
| | | dh = (layer.h - layer.crop_height)/2; |
| | | dw = (layer.w - layer.crop_width)/2; |
| | |
| | | } |
| | | row = i + dh; |
| | | index = col+layer.w*(row+layer.h*(c + layer.c*b)); |
| | | layer.output[count++] = input[index]; |
| | | layer.output[count++] = state.input[index]; |
| | | } |
| | | } |
| | | } |
| | |
| | | #define CROP_LAYER_H |
| | | |
| | | #include "image.h" |
| | | #include "params.h" |
| | | |
| | | typedef struct { |
| | | int batch; |
| | |
| | | |
| | | image get_crop_image(crop_layer layer); |
| | | crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip); |
| | | void forward_crop_layer(const crop_layer layer, int train, float *input); |
| | | void forward_crop_layer(const crop_layer layer, network_state state); |
| | | |
| | | #ifdef GPU |
| | | void forward_crop_layer_gpu(crop_layer layer, int train, float *input); |
| | | void forward_crop_layer_gpu(crop_layer layer, network_state state); |
| | | #endif |
| | | |
| | | #endif |
| | |
| | | output[count] = input[index]; |
| | | } |
| | | |
| | | extern "C" void forward_crop_layer_gpu(crop_layer layer, int train, float *input) |
| | | extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) |
| | | { |
| | | int flip = (layer.flip && rand()%2); |
| | | int dh = rand()%(layer.h - layer.crop_height + 1); |
| | | int dw = rand()%(layer.w - layer.crop_width + 1); |
| | | if(!train){ |
| | | if(!state.train){ |
| | | flip = 0; |
| | | dh = (layer.h - layer.crop_height)/2; |
| | | dw = (layer.w - layer.crop_width)/2; |
| | |
| | | dim3 dimBlock(BLOCK, 1, 1); |
| | | dim3 dimGrid((size-1)/BLOCK + 1, 1, 1); |
| | | |
| | | forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(input, size, layer.c, layer.h, layer.w, |
| | | forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.c, layer.h, layer.w, |
| | | layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | |
| | | int nw; |
| | | int jitter; |
| | | int classes; |
| | | int background; |
| | | data *d; |
| | | }; |
| | | |
| | |
| | | return X; |
| | | } |
| | | |
| | | void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip) |
| | | typedef struct box{ |
| | | int id; |
| | | float x,y,w,h; |
| | | } box; |
| | | |
| | | box *read_boxes(char *filename, int *n) |
| | | { |
| | | box *boxes = calloc(1, sizeof(box)); |
| | | FILE *file = fopen(filename, "r"); |
| | | if(!file) file_error(filename); |
| | | float x, y, h, w; |
| | | int id; |
| | | int count = 0; |
| | | while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ |
| | | boxes = realloc(boxes, (count+1)*sizeof(box)); |
| | | boxes[count].id = id; |
| | | boxes[count].x = x; |
| | | boxes[count].y = y; |
| | | boxes[count].h = h; |
| | | boxes[count].w = w; |
| | | ++count; |
| | | } |
| | | fclose(file); |
| | | *n = count; |
| | | return boxes; |
| | | } |
| | | |
| | | void randomize_boxes(box *b, int n) |
| | | { |
| | | int i; |
| | | for(i = 0; i < n; ++i){ |
| | | box swap = b[i]; |
| | | int index = rand()%n; |
| | | b[i] = b[index]; |
| | | b[index] = swap; |
| | | } |
| | | } |
| | | |
| | | void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip, int background) |
| | | { |
| | | int box_height = height/num_height; |
| | | int box_width = width/num_width; |
| | | char *labelpath = find_replace(path, "VOC2012/JPEGImages", "labels"); |
| | | labelpath = find_replace(labelpath, ".jpg", ".txt"); |
| | | FILE *file = fopen(labelpath, "r"); |
| | | if(!file) file_error(labelpath); |
| | | int count = 0; |
| | | box *boxes = read_boxes(labelpath, &count); |
| | | randomize_boxes(boxes, count); |
| | | float x, y, h, w; |
| | | int id; |
| | | while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ |
| | | int i, j; |
| | | for(i = 0; i < count; ++i){ |
| | | x = boxes[i].x; |
| | | y = boxes[i].y; |
| | | w = boxes[i].w; |
| | | h = boxes[i].h; |
| | | id = boxes[i].id; |
| | | if(flip) x = 1-x; |
| | | x *= width + jitter; |
| | | y *= height + jitter; |
| | |
| | | |
| | | float dw = (x - i*box_width)/box_width; |
| | | float dh = (y - j*box_height)/box_height; |
| | | //printf("%d %d %d %f %f\n", id, i, j, dh, dw); |
| | | int index = (i+j*num_width)*(4+classes); |
| | | if(truth[index+classes]) continue; |
| | | |
| | | int index = (i+j*num_width)*(4+classes+background); |
| | | if(truth[index+classes+background]) continue; |
| | | truth[index+id] = 1; |
| | | index += classes; |
| | | index += classes+background; |
| | | truth[index++] = dh; |
| | | truth[index++] = dw; |
| | | truth[index++] = h*(height+jitter)/height; |
| | | truth[index++] = w*(width+jitter)/width; |
| | | } |
| | | int i, j; |
| | | for(i = 0; i < num_height*num_width*(4+classes); i += 4+classes){ |
| | | int background = 1; |
| | | for(j = i; j < i+classes; ++j) if (truth[j]) background = 0; |
| | | truth[i+classes-1] = background; |
| | | free(boxes); |
| | | if(background){ |
| | | for(i = 0; i < num_height*num_width*(4+classes+background); i += 4+classes+background){ |
| | | int object = 0; |
| | | for(j = i; j < i+classes; ++j) if (truth[j]) object = 1; |
| | | truth[i+classes] = !object; |
| | | } |
| | | } |
| | | fclose(file); |
| | | } |
| | | |
| | | #define NUMCHARS 37 |
| | |
| | | } |
| | | } |
| | | |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter) |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background) |
| | | { |
| | | char **random_paths = get_random_paths(paths, n, m); |
| | | int i; |
| | | data d; |
| | | d.shallow = 0; |
| | | d.X = load_image_paths(random_paths, n, h, w); |
| | | int k = nh*nw*(4+classes); |
| | | int k = nh*nw*(4+classes+background); |
| | | d.y = make_matrix(n, k); |
| | | for(i = 0; i < n; ++i){ |
| | | int dx = rand()%jitter; |
| | | int dy = rand()%jitter; |
| | | int flip = rand()%2; |
| | | fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip); |
| | | fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip, background); |
| | | image a = float_to_image(h, w, 3, d.X.vals[i]); |
| | | if(flip) flip_image(a); |
| | | jitter_image(a,h-jitter,w-jitter,dy,dx); |
| | |
| | | { |
| | | printf("Loading data: %d\n", rand()); |
| | | struct load_args a = *(struct load_args*)ptr; |
| | | *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter); |
| | | *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter, a.background); |
| | | translate_data_rows(*a.d, -128); |
| | | scale_data_rows(*a.d, 1./128); |
| | | free(ptr); |
| | | return 0; |
| | | } |
| | | |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d) |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background, data *d) |
| | | { |
| | | pthread_t thread; |
| | | struct load_args *args = calloc(1, sizeof(struct load_args)); |
| | |
| | | args->nw = nw; |
| | | args->classes = classes; |
| | | args->jitter = jitter; |
| | | args->background = background; |
| | | args->d = d; |
| | | if(pthread_create(&thread, 0, load_detection_thread, args)) { |
| | | error("Thread creation failed"); |
| | |
| | | data load_data(char **paths, int n, int m, char **labels, int k, int h, int w); |
| | | pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d); |
| | | |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d); |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter); |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background, data *d); |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, int background); |
| | | |
| | | data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w); |
| | | data load_cifar10_data(char *filename); |
| | |
| | | #include "cuda.h" |
| | | } |
| | | |
| | | extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, float *in) |
| | | extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | int out_h = deconvolutional_out_height(layer); |
| | |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | float *a = layer.filters_gpu; |
| | | float *b = in + i*layer.c*layer.h*layer.w; |
| | | float *b = state.input + i*layer.c*layer.h*layer.w; |
| | | float *c = layer.col_image_gpu; |
| | | |
| | | gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n); |
| | |
| | | activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation); |
| | | } |
| | | |
| | | extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, float *in, float *delta_gpu) |
| | | extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) |
| | | { |
| | | float alpha = 1./layer.batch; |
| | | int out_h = deconvolutional_out_height(layer); |
| | |
| | | gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu); |
| | | backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size); |
| | | |
| | | if(delta_gpu) memset(delta_gpu, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | int m = layer.c; |
| | | int n = layer.size*layer.size*layer.n; |
| | | int k = layer.h*layer.w; |
| | | |
| | | float *a = in + i*m*n; |
| | | float *a = state.input + i*m*n; |
| | | float *b = layer.col_image_gpu; |
| | | float *c = layer.filter_updates_gpu; |
| | | |
| | |
| | | layer.size, layer.stride, 0, b); |
| | | gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n); |
| | | |
| | | if(delta_gpu){ |
| | | if(state.delta){ |
| | | int m = layer.c; |
| | | int n = layer.h*layer.w; |
| | | int k = layer.size*layer.size*layer.n; |
| | | |
| | | float *a = layer.filters_gpu; |
| | | float *b = layer.col_image_gpu; |
| | | float *c = delta_gpu + i*n*m; |
| | | float *c = state.delta + i*n*m; |
| | | |
| | | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | } |
| | |
| | | cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); |
| | | } |
| | | |
| | | extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer) |
| | | extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay) |
| | | { |
| | | int size = layer.size*layer.size*layer.c*layer.n; |
| | | |
| | | axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); |
| | | scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1); |
| | | axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); |
| | | scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); |
| | | |
| | | axpy_ongpu(size, -layer.decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); |
| | | axpy_ongpu(size, layer.learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); |
| | | scal_ongpu(size, layer.momentum, layer.filter_updates_gpu, 1); |
| | | axpy_ongpu(size, -decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); |
| | | axpy_ongpu(size, learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); |
| | | scal_ongpu(size, momentum, layer.filter_updates_gpu, 1); |
| | | } |
| | | |
| | |
| | | return float_to_image(h,w,c,layer.delta); |
| | | } |
| | | |
| | | deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, float learning_rate, float momentum, float decay) |
| | | deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation) |
| | | { |
| | | int i; |
| | | deconvolutional_layer *layer = calloc(1, sizeof(deconvolutional_layer)); |
| | | |
| | | layer->learning_rate = learning_rate; |
| | | layer->momentum = momentum; |
| | | layer->decay = decay; |
| | | |
| | | layer->h = h; |
| | | layer->w = w; |
| | | layer->c = c; |
| | |
| | | #endif |
| | | } |
| | | |
| | | void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in) |
| | | void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | int out_h = deconvolutional_out_height(layer); |
| | |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | float *a = layer.filters; |
| | | float *b = in + i*layer.c*layer.h*layer.w; |
| | | float *b = state.input + i*layer.c*layer.h*layer.w; |
| | | float *c = layer.col_image; |
| | | |
| | | gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); |
| | |
| | | activate_array(layer.output, layer.batch*layer.n*size, layer.activation); |
| | | } |
| | | |
| | | void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, float *delta) |
| | | void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state) |
| | | { |
| | | float alpha = 1./layer.batch; |
| | | int out_h = deconvolutional_out_height(layer); |
| | |
| | | gradient_array(layer.output, size*layer.n*layer.batch, layer.activation, layer.delta); |
| | | backward_bias(layer.bias_updates, layer.delta, layer.batch, layer.n, size); |
| | | |
| | | if(delta) memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | int m = layer.c; |
| | | int n = layer.size*layer.size*layer.n; |
| | | int k = layer.h*layer.w; |
| | | |
| | | float *a = in + i*m*n; |
| | | float *a = state.input + i*m*n; |
| | | float *b = layer.col_image; |
| | | float *c = layer.filter_updates; |
| | | |
| | |
| | | layer.size, layer.stride, 0, b); |
| | | gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n); |
| | | |
| | | if(delta){ |
| | | if(state.delta){ |
| | | int m = layer.c; |
| | | int n = layer.h*layer.w; |
| | | int k = layer.size*layer.size*layer.n; |
| | | |
| | | float *a = layer.filters; |
| | | float *b = layer.col_image; |
| | | float *c = delta + i*n*m; |
| | | float *c = state.delta + i*n*m; |
| | | |
| | | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | } |
| | | } |
| | | } |
| | | |
| | | void update_deconvolutional_layer(deconvolutional_layer layer) |
| | | void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay) |
| | | { |
| | | int size = layer.size*layer.size*layer.c*layer.n; |
| | | axpy_cpu(layer.n, layer.learning_rate, layer.bias_updates, 1, layer.biases, 1); |
| | | scal_cpu(layer.n, layer.momentum, layer.bias_updates, 1); |
| | | axpy_cpu(layer.n, learning_rate, layer.bias_updates, 1, layer.biases, 1); |
| | | scal_cpu(layer.n, momentum, layer.bias_updates, 1); |
| | | |
| | | axpy_cpu(size, -layer.decay, layer.filters, 1, layer.filter_updates, 1); |
| | | axpy_cpu(size, layer.learning_rate, layer.filter_updates, 1, layer.filters, 1); |
| | | scal_cpu(size, layer.momentum, layer.filter_updates, 1); |
| | | axpy_cpu(size, -decay, layer.filters, 1, layer.filter_updates, 1); |
| | | axpy_cpu(size, learning_rate, layer.filter_updates, 1, layer.filters, 1); |
| | | scal_cpu(size, momentum, layer.filter_updates, 1); |
| | | } |
| | | |
| | | |
| | |
| | | #define DECONVOLUTIONAL_LAYER_H |
| | | |
| | | #include "cuda.h" |
| | | #include "params.h" |
| | | #include "image.h" |
| | | #include "activations.h" |
| | | |
| | | typedef struct { |
| | | float learning_rate; |
| | | float momentum; |
| | | float decay; |
| | | |
| | | int batch; |
| | | int h,w,c; |
| | | int n; |
| | |
| | | } deconvolutional_layer; |
| | | |
| | | #ifdef GPU |
| | | void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, float * in); |
| | | void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, float * in, float * delta_gpu); |
| | | void update_deconvolutional_layer_gpu(deconvolutional_layer layer); |
| | | void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); |
| | | void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); |
| | | void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay); |
| | | void push_deconvolutional_layer(deconvolutional_layer layer); |
| | | void pull_deconvolutional_layer(deconvolutional_layer layer); |
| | | #endif |
| | | |
| | | deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation, float learning_rate, float momentum, float decay); |
| | | deconvolutional_layer *make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation); |
| | | void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w); |
| | | void forward_deconvolutional_layer(const deconvolutional_layer layer, float *in); |
| | | void update_deconvolutional_layer(deconvolutional_layer layer); |
| | | void backward_deconvolutional_layer(deconvolutional_layer layer, float *in, float *delta); |
| | | void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state); |
| | | void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay); |
| | | void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state); |
| | | |
| | | image get_deconvolutional_image(deconvolutional_layer layer); |
| | | image get_deconvolutional_delta(deconvolutional_layer layer); |
| | |
| | | data train, buffer; |
| | | int im_dim = 512; |
| | | int jitter = 64; |
| | | int classes = 21; |
| | | pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, &buffer); |
| | | int classes = 20; |
| | | int background = 1; |
| | | pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, background, &buffer); |
| | | clock_t time; |
| | | while(1){ |
| | | i += 1; |
| | | time=clock(); |
| | | pthread_join(load_thread, 0); |
| | | train = buffer; |
| | | load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, &buffer); |
| | | load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, im_dim, im_dim, 7, 7, jitter, background, &buffer); |
| | | |
| | | /* |
| | | image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[0]); |
| | |
| | | srand(time(0)); |
| | | |
| | | list *plist = get_paths("/home/pjreddie/data/voc/val.txt"); |
| | | //list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | int num_output = 1225; |
| | | int im_size = 448; |
| | | int classes = 21; |
| | | int classes = 20; |
| | | int background = 0; |
| | | int num_output = 7*7*(4+classes+background); |
| | | |
| | | int m = plist->size; |
| | | int i = 0; |
| | |
| | | matrix pred = network_predict_data(net, val); |
| | | int j, k, class; |
| | | for(j = 0; j < pred.rows; ++j){ |
| | | for(k = 0; k < pred.cols; k += classes+4){ |
| | | |
| | | /* |
| | | int z; |
| | | for(z = 0; z < 25; ++z) printf("%f, ", pred.vals[j][k+z]); |
| | | printf("\n"); |
| | | */ |
| | | |
| | | //if (pred.vals[j][k] > .001){ |
| | | for(class = 0; class < classes-1; ++class){ |
| | | int index = (k)/(classes+4); |
| | | for(k = 0; k < pred.cols; k += classes+4+background){ |
| | | for(class = 0; class < classes; ++class){ |
| | | int index = (k)/(classes+4+background); |
| | | int r = index/7; |
| | | int c = index%7; |
| | | float y = (r + pred.vals[j][k+0+classes])/7.; |
| | | float x = (c + pred.vals[j][k+1+classes])/7.; |
| | | float h = pred.vals[j][k+2+classes]; |
| | | float w = pred.vals[j][k+3+classes]; |
| | | int ci = k+classes+background; |
| | | float y = (r + pred.vals[j][ci + 0])/7.; |
| | | float x = (c + pred.vals[j][ci + 1])/7.; |
| | | float h = pred.vals[j][ci + 2]; |
| | | float w = pred.vals[j][ci + 3]; |
| | | printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, pred.vals[j][k+class], y, x, h, w); |
| | | } |
| | | //} |
| | | } |
| | | } |
| | | |
| | |
| | | return layer; |
| | | } |
| | | |
| | | void forward_detection_layer(const detection_layer layer, float *in, float *truth) |
| | | |
| | | void forward_detection_layer(const detection_layer layer, network_state state) |
| | | { |
| | | int in_i = 0; |
| | | int out_i = 0; |
| | | int locations = get_detection_layer_locations(layer); |
| | | int i,j; |
| | | for(i = 0; i < layer.batch*locations; ++i){ |
| | | int mask = (!truth || !truth[out_i + layer.classes - 1]); |
| | | int mask = (!state.truth || state.truth[out_i + layer.classes + 2]); |
| | | float scale = 1; |
| | | if(layer.rescore) scale = in[in_i++]; |
| | | if(layer.rescore) scale = state.input[in_i++]; |
| | | for(j = 0; j < layer.classes; ++j){ |
| | | layer.output[out_i++] = scale*in[in_i++]; |
| | | layer.output[out_i++] = scale*state.input[in_i++]; |
| | | } |
| | | softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes); |
| | | activate_array(in+in_i, layer.coords, LOGISTIC); |
| | | if(!layer.rescore){ |
| | | softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes); |
| | | activate_array(state.input+in_i, layer.coords, LOGISTIC); |
| | | } |
| | | for(j = 0; j < layer.coords; ++j){ |
| | | layer.output[out_i++] = mask*in[in_i++]; |
| | | layer.output[out_i++] = mask*state.input[in_i++]; |
| | | } |
| | | } |
| | | } |
| | | |
| | | void backward_detection_layer(const detection_layer layer, float *in, float *delta) |
| | | void dark_zone(detection_layer layer, int index, network_state state) |
| | | { |
| | | int size = layer.classes+layer.rescore+layer.coords; |
| | | int location = (index%(7*7*size)) / size ; |
| | | int r = location / 7; |
| | | int c = location % 7; |
| | | int class = index%size; |
| | | if(layer.rescore) --class; |
| | | int dr, dc; |
| | | for(dr = -1; dr <= 1; ++dr){ |
| | | for(dc = -1; dc <= 1; ++dc){ |
| | | if(!(dr || dc)) continue; |
| | | if((r + dr) > 6 || (r + dr) < 0) continue; |
| | | if((c + dc) > 6 || (c + dc) < 0) continue; |
| | | int di = (dr*7 + dc) * size; |
| | | if(state.truth[index+di]) continue; |
| | | layer.delta[index + di] = 0; |
| | | } |
| | | } |
| | | } |
| | | |
| | | void backward_detection_layer(const detection_layer layer, network_state state) |
| | | { |
| | | int locations = get_detection_layer_locations(layer); |
| | | int i,j; |
| | |
| | | for(i = 0; i < layer.batch*locations; ++i){ |
| | | float scale = 1; |
| | | float latent_delta = 0; |
| | | if(layer.rescore) scale = in[in_i++]; |
| | | if(layer.rescore) scale = state.input[in_i++]; |
| | | if(!layer.rescore){ |
| | | for(j = 0; j < layer.classes-1; ++j){ |
| | | if(state.truth[out_i + j]) dark_zone(layer, out_i+j, state); |
| | | } |
| | | } |
| | | for(j = 0; j < layer.classes; ++j){ |
| | | latent_delta += in[in_i]*layer.delta[out_i]; |
| | | delta[in_i++] = scale*layer.delta[out_i++]; |
| | | latent_delta += state.input[in_i]*layer.delta[out_i]; |
| | | state.delta[in_i++] = scale*layer.delta[out_i++]; |
| | | } |
| | | |
| | | gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); |
| | | |
| | | if (!layer.rescore) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); |
| | | for(j = 0; j < layer.coords; ++j){ |
| | | delta[in_i++] = layer.delta[out_i++]; |
| | | state.delta[in_i++] = layer.delta[out_i++]; |
| | | } |
| | | if(layer.rescore) delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta; |
| | | if(layer.rescore) state.delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta; |
| | | } |
| | | } |
| | | |
| | | #ifdef GPU |
| | | |
| | | void forward_detection_layer_gpu(const detection_layer layer, float *in, float *truth) |
| | | void forward_detection_layer_gpu(const detection_layer layer, network_state state) |
| | | { |
| | | int outputs = get_detection_layer_output_size(layer); |
| | | float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float)); |
| | | float *truth_cpu = 0; |
| | | if(truth){ |
| | | if(state.truth){ |
| | | truth_cpu = calloc(layer.batch*outputs, sizeof(float)); |
| | | cuda_pull_array(truth, truth_cpu, layer.batch*outputs); |
| | | cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs); |
| | | } |
| | | cuda_pull_array(in, in_cpu, layer.batch*layer.inputs); |
| | | forward_detection_layer(layer, in_cpu, truth_cpu); |
| | | cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs); |
| | | network_state cpu_state; |
| | | cpu_state.train = state.train; |
| | | cpu_state.truth = truth_cpu; |
| | | cpu_state.input = in_cpu; |
| | | forward_detection_layer(layer, cpu_state); |
| | | cuda_push_array(layer.output_gpu, layer.output, layer.batch*outputs); |
| | | free(in_cpu); |
| | | if(truth_cpu) free(truth_cpu); |
| | | free(cpu_state.input); |
| | | if(cpu_state.truth) free(cpu_state.truth); |
| | | } |
| | | |
| | | void backward_detection_layer_gpu(detection_layer layer, float *in, float *delta) |
| | | void backward_detection_layer_gpu(detection_layer layer, network_state state) |
| | | { |
| | | int outputs = get_detection_layer_output_size(layer); |
| | | |
| | | float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float)); |
| | | float *delta_cpu = calloc(layer.batch*layer.inputs, sizeof(float)); |
| | | float *truth_cpu = 0; |
| | | if(state.truth){ |
| | | truth_cpu = calloc(layer.batch*outputs, sizeof(float)); |
| | | cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs); |
| | | } |
| | | network_state cpu_state; |
| | | cpu_state.train = state.train; |
| | | cpu_state.input = in_cpu; |
| | | cpu_state.truth = truth_cpu; |
| | | cpu_state.delta = delta_cpu; |
| | | |
| | | cuda_pull_array(in, in_cpu, layer.batch*layer.inputs); |
| | | cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs); |
| | | cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*outputs); |
| | | backward_detection_layer(layer, in_cpu, delta_cpu); |
| | | cuda_push_array(delta, delta_cpu, layer.batch*layer.inputs); |
| | | backward_detection_layer(layer, cpu_state); |
| | | cuda_push_array(state.delta, delta_cpu, layer.batch*layer.inputs); |
| | | |
| | | free(in_cpu); |
| | | free(delta_cpu); |
| | |
| | | #ifndef DETECTION_LAYER_H |
| | | #define DETECTION_LAYER_H |
| | | |
| | | #include "params.h" |
| | | |
| | | typedef struct { |
| | | int batch; |
| | | int inputs; |
| | |
| | | } detection_layer; |
| | | |
| | | detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore); |
| | | void forward_detection_layer(const detection_layer layer, float *in, float *truth); |
| | | void backward_detection_layer(const detection_layer layer, float *in, float *delta); |
| | | void forward_detection_layer(const detection_layer layer, network_state state); |
| | | void backward_detection_layer(const detection_layer layer, network_state state); |
| | | int get_detection_layer_output_size(detection_layer layer); |
| | | |
| | | #ifdef GPU |
| | | void forward_detection_layer_gpu(const detection_layer layer, float *in, float *truth); |
| | | void backward_detection_layer_gpu(detection_layer layer, float *in, float *delta); |
| | | void forward_detection_layer_gpu(const detection_layer layer, network_state state); |
| | | void backward_detection_layer_gpu(detection_layer layer, network_state state); |
| | | #endif |
| | | |
| | | #endif |
| | |
| | | #include "dropout_layer.h" |
| | | #include "params.h" |
| | | #include "utils.h" |
| | | #include "cuda.h" |
| | | #include <stdlib.h> |
| | |
| | | layer->probability = probability; |
| | | layer->inputs = inputs; |
| | | layer->batch = batch; |
| | | layer->output = calloc(inputs*batch, sizeof(float)); |
| | | layer->rand = calloc(inputs*batch, sizeof(float)); |
| | | layer->scale = 1./(1.-probability); |
| | | #ifdef GPU |
| | | layer->output_gpu = cuda_make_array(layer->output, inputs*batch); |
| | | layer->rand_gpu = cuda_make_array(layer->rand, inputs*batch); |
| | | #endif |
| | | return layer; |
| | |
| | | |
| | | void resize_dropout_layer(dropout_layer *layer, int inputs) |
| | | { |
| | | layer->output = realloc(layer->output, layer->inputs*layer->batch*sizeof(float)); |
| | | layer->rand = realloc(layer->rand, layer->inputs*layer->batch*sizeof(float)); |
| | | #ifdef GPU |
| | | cuda_free(layer->output_gpu); |
| | | cuda_free(layer->rand_gpu); |
| | | |
| | | layer->output_gpu = cuda_make_array(layer->output, inputs*layer->batch); |
| | | layer->rand_gpu = cuda_make_array(layer->rand, inputs*layer->batch); |
| | | #endif |
| | | } |
| | | |
| | | void forward_dropout_layer(dropout_layer layer, float *input) |
| | | void forward_dropout_layer(dropout_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | if (!state.train) return; |
| | | for(i = 0; i < layer.batch * layer.inputs; ++i){ |
| | | float r = rand_uniform(); |
| | | layer.rand[i] = r; |
| | | if(r < layer.probability) layer.output[i] = 0; |
| | | else layer.output[i] = input[i]*layer.scale; |
| | | if(r < layer.probability) state.input[i] = 0; |
| | | else state.input[i] *= layer.scale; |
| | | } |
| | | } |
| | | |
| | | void backward_dropout_layer(dropout_layer layer, float *delta) |
| | | void backward_dropout_layer(dropout_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | if(!delta) return; |
| | | if(!state.delta) return; |
| | | for(i = 0; i < layer.batch * layer.inputs; ++i){ |
| | | float r = layer.rand[i]; |
| | | if(r < layer.probability) delta[i] = 0; |
| | | else delta[i] *= layer.scale; |
| | | if(r < layer.probability) state.delta[i] = 0; |
| | | else state.delta[i] *= layer.scale; |
| | | } |
| | | } |
| | | |
| | |
| | | #ifndef DROPOUT_LAYER_H |
| | | #define DROPOUT_LAYER_H |
| | | #include "params.h" |
| | | |
| | | typedef struct{ |
| | | int batch; |
| | |
| | | float probability; |
| | | float scale; |
| | | float *rand; |
| | | float *output; |
| | | #ifdef GPU |
| | | float * rand_gpu; |
| | | float * output_gpu; |
| | | #endif |
| | | } dropout_layer; |
| | | |
| | | dropout_layer *make_dropout_layer(int batch, int inputs, float probability); |
| | | |
| | | void forward_dropout_layer(dropout_layer layer, float *input); |
| | | void backward_dropout_layer(dropout_layer layer, float *delta); |
| | | void forward_dropout_layer(dropout_layer layer, network_state state); |
| | | void backward_dropout_layer(dropout_layer layer, network_state state); |
| | | void resize_dropout_layer(dropout_layer *layer, int inputs); |
| | | |
| | | #ifdef GPU |
| | | void forward_dropout_layer_gpu(dropout_layer layer, float * input); |
| | | void backward_dropout_layer_gpu(dropout_layer layer, float * delta); |
| | | void forward_dropout_layer_gpu(dropout_layer layer, network_state state); |
| | | void backward_dropout_layer_gpu(dropout_layer layer, network_state state); |
| | | |
| | | #endif |
| | | #endif |
| | |
| | | #include "dropout_layer.h" |
| | | #include "cuda.h" |
| | | #include "utils.h" |
| | | #include "params.h" |
| | | } |
| | | |
| | | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale, float *output) |
| | | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) |
| | | { |
| | | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; |
| | | if(id < size) output[id] = (rand[id] < prob) ? 0 : input[id]*scale; |
| | | if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; |
| | | } |
| | | |
| | | extern "C" void forward_dropout_layer_gpu(dropout_layer layer, float * input) |
| | | extern "C" void forward_dropout_layer_gpu(dropout_layer layer, network_state state) |
| | | { |
| | | if (!state.train) return; |
| | | int j; |
| | | int size = layer.inputs*layer.batch; |
| | | for(j = 0; j < size; ++j) layer.rand[j] = rand_uniform(); |
| | | cuda_push_array(layer.rand_gpu, layer.rand, layer.inputs*layer.batch); |
| | | |
| | | yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(input, size, layer.rand_gpu, layer.probability, |
| | | layer.scale, layer.output_gpu); |
| | | yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | | extern "C" void backward_dropout_layer_gpu(dropout_layer layer, float *delta) |
| | | extern "C" void backward_dropout_layer_gpu(dropout_layer layer, network_state state) |
| | | { |
| | | if(!delta) return; |
| | | if(!state.delta) return; |
| | | int size = layer.inputs*layer.batch; |
| | | |
| | | yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(delta, size, layer.rand_gpu, layer.probability, |
| | | layer.scale, delta); |
| | | yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.delta, size, layer.rand_gpu, layer.probability, layer.scale); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | |
| | | #endif |
| | | } |
| | | |
| | | void forward_maxpool_layer(const maxpool_layer layer, float *input) |
| | | void forward_maxpool_layer(const maxpool_layer layer, network_state state) |
| | | { |
| | | int b,i,j,k,l,m; |
| | | int w_offset = (-layer.size-1)/2 + 1; |
| | |
| | | int index = cur_w + layer.w*(cur_h + layer.h*(k + b*layer.c)); |
| | | int valid = (cur_h >= 0 && cur_h < layer.h && |
| | | cur_w >= 0 && cur_w < layer.w); |
| | | float val = (valid != 0) ? input[index] : -FLT_MAX; |
| | | float val = (valid != 0) ? state.input[index] : -FLT_MAX; |
| | | max_i = (val > max) ? index : max_i; |
| | | max = (val > max) ? val : max; |
| | | } |
| | |
| | | } |
| | | } |
| | | |
| | | void backward_maxpool_layer(const maxpool_layer layer, float *delta) |
| | | void backward_maxpool_layer(const maxpool_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | int h = (layer.h-1)/layer.stride + 1; |
| | | int w = (layer.w-1)/layer.stride + 1; |
| | | int c = layer.c; |
| | | memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | for(i = 0; i < h*w*c*layer.batch; ++i){ |
| | | int index = layer.indexes[i]; |
| | | delta[index] += layer.delta[i]; |
| | | state.delta[index] += layer.delta[i]; |
| | | } |
| | | } |
| | | |
| | |
| | | #define MAXPOOL_LAYER_H |
| | | |
| | | #include "image.h" |
| | | #include "params.h" |
| | | #include "cuda.h" |
| | | |
| | | typedef struct { |
| | |
| | | image get_maxpool_image(maxpool_layer layer); |
| | | maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int size, int stride); |
| | | void resize_maxpool_layer(maxpool_layer *layer, int h, int w); |
| | | void forward_maxpool_layer(const maxpool_layer layer, float *input); |
| | | void backward_maxpool_layer(const maxpool_layer layer, float *delta); |
| | | void forward_maxpool_layer(const maxpool_layer layer, network_state state); |
| | | void backward_maxpool_layer(const maxpool_layer layer, network_state state); |
| | | |
| | | #ifdef GPU |
| | | void forward_maxpool_layer_gpu(maxpool_layer layer, float * input); |
| | | void backward_maxpool_layer_gpu(maxpool_layer layer, float * delta); |
| | | void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state); |
| | | void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state); |
| | | #endif |
| | | |
| | | #endif |
| | |
| | | prev_delta[index] = d; |
| | | } |
| | | |
| | | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, float *input) |
| | | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) |
| | | { |
| | | int h = (layer.h-1)/layer.stride + 1; |
| | | int w = (layer.w-1)/layer.stride + 1; |
| | |
| | | |
| | | size_t n = h*w*c*layer.batch; |
| | | |
| | | forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, input, layer.output_gpu, layer.indexes_gpu); |
| | | forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, float * delta) |
| | | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) |
| | | { |
| | | size_t n = layer.h*layer.w*layer.c*layer.batch; |
| | | |
| | | backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, delta, layer.indexes_gpu); |
| | | backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | |
| | | #include "image.h" |
| | | #include "data.h" |
| | | #include "utils.h" |
| | | #include "params.h" |
| | | |
| | | #include "crop_layer.h" |
| | | #include "connected_layer.h" |
| | |
| | | #include "maxpool_layer.h" |
| | | #include "cost_layer.h" |
| | | #include "normalization_layer.h" |
| | | #include "freeweight_layer.h" |
| | | #include "softmax_layer.h" |
| | | #include "dropout_layer.h" |
| | | |
| | |
| | | return "normalization"; |
| | | case DROPOUT: |
| | | return "dropout"; |
| | | case FREEWEIGHT: |
| | | return "freeweight"; |
| | | case CROP: |
| | | return "crop"; |
| | | case COST: |
| | |
| | | return "none"; |
| | | } |
| | | |
| | | network make_network(int n, int batch) |
| | | network make_network(int n) |
| | | { |
| | | network net; |
| | | net.n = n; |
| | | net.batch = batch; |
| | | net.layers = calloc(net.n, sizeof(void *)); |
| | | net.types = calloc(net.n, sizeof(LAYER_TYPE)); |
| | | net.outputs = 0; |
| | | net.output = 0; |
| | | net.seen = 0; |
| | | net.batch = 0; |
| | | net.inputs = 0; |
| | | net.h = net.w = net.c = 0; |
| | | #ifdef GPU |
| | | net.input_gpu = calloc(1, sizeof(float *)); |
| | | net.truth_gpu = calloc(1, sizeof(float *)); |
| | |
| | | return net; |
| | | } |
| | | |
| | | void forward_network(network net, float *input, float *truth, int train) |
| | | void forward_network(network net, network_state state) |
| | | { |
| | | int i; |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | forward_convolutional_layer(layer, input); |
| | | input = layer.output; |
| | | forward_convolutional_layer(*(convolutional_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | forward_deconvolutional_layer(layer, input); |
| | | input = layer.output; |
| | | forward_deconvolutional_layer(*(deconvolutional_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | forward_detection_layer(layer, input, truth); |
| | | input = layer.output; |
| | | forward_detection_layer(*(detection_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | forward_connected_layer(layer, input); |
| | | input = layer.output; |
| | | forward_connected_layer(*(connected_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == CROP){ |
| | | crop_layer layer = *(crop_layer *)net.layers[i]; |
| | | forward_crop_layer(layer, train, input); |
| | | input = layer.output; |
| | | forward_crop_layer(*(crop_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | forward_cost_layer(layer, input, truth); |
| | | forward_cost_layer(*(cost_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | forward_softmax_layer(layer, input); |
| | | input = layer.output; |
| | | forward_softmax_layer(*(softmax_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | forward_maxpool_layer(layer, input); |
| | | input = layer.output; |
| | | forward_maxpool_layer(*(maxpool_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == NORMALIZATION){ |
| | | normalization_layer layer = *(normalization_layer *)net.layers[i]; |
| | | forward_normalization_layer(layer, input); |
| | | input = layer.output; |
| | | forward_normalization_layer(*(normalization_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DROPOUT){ |
| | | if(!train) continue; |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | forward_dropout_layer(layer, input); |
| | | input = layer.output; |
| | | forward_dropout_layer(*(dropout_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == FREEWEIGHT){ |
| | | if(!train) continue; |
| | | //freeweight_layer layer = *(freeweight_layer *)net.layers[i]; |
| | | //forward_freeweight_layer(layer, input); |
| | | } |
| | | //char buff[256]; |
| | | //sprintf(buff, "layer %d", i); |
| | | //cuda_compare(get_network_output_gpu_layer(net, i), input, get_network_output_size_layer(net, i)*net.batch, buff); |
| | | state.input = get_network_output_layer(net, i); |
| | | } |
| | | } |
| | | |
| | |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | update_convolutional_layer(layer); |
| | | update_convolutional_layer(layer, net.learning_rate, net.momentum, net.decay); |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | update_deconvolutional_layer(layer); |
| | | update_deconvolutional_layer(layer, net.learning_rate, net.momentum, net.decay); |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | update_connected_layer(layer); |
| | | update_connected_layer(layer, net.learning_rate, net.momentum, net.decay); |
| | | } |
| | | } |
| | | } |
| | |
| | | float *get_network_output_layer(network net, int i) |
| | | { |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((convolutional_layer *)net.layers[i]) -> output; |
| | | } else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((deconvolutional_layer *)net.layers[i]) -> output; |
| | | } else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((maxpool_layer *)net.layers[i]) -> output; |
| | | } else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((detection_layer *)net.layers[i]) -> output; |
| | | } else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((softmax_layer *)net.layers[i]) -> output; |
| | | } else if(net.types[i] == DROPOUT){ |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | return layer.output; |
| | | } else if(net.types[i] == FREEWEIGHT){ |
| | | return get_network_output_layer(net, i-1); |
| | | } else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((connected_layer *)net.layers[i]) -> output; |
| | | } else if(net.types[i] == CROP){ |
| | | crop_layer layer = *(crop_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((crop_layer *)net.layers[i]) -> output; |
| | | } else if(net.types[i] == NORMALIZATION){ |
| | | normalization_layer layer = *(normalization_layer *)net.layers[i]; |
| | | return layer.output; |
| | | return ((normalization_layer *)net.layers[i]) -> output; |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | float *get_network_output(network net) |
| | | { |
| | | int i; |
| | |
| | | } else if(net.types[i] == DROPOUT){ |
| | | if(i == 0) return 0; |
| | | return get_network_delta_layer(net, i-1); |
| | | } else if(net.types[i] == FREEWEIGHT){ |
| | | return get_network_delta_layer(net, i-1); |
| | | } else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | return layer.delta; |
| | |
| | | return max_index(out, k); |
| | | } |
| | | |
| | | void backward_network(network net, float *input, float *truth) |
| | | void backward_network(network net, network_state state) |
| | | { |
| | | int i; |
| | | float *prev_input; |
| | | float *prev_delta; |
| | | float *original_input = state.input; |
| | | for(i = net.n-1; i >= 0; --i){ |
| | | if(i == 0){ |
| | | prev_input = input; |
| | | prev_delta = 0; |
| | | state.input = original_input; |
| | | state.delta = 0; |
| | | }else{ |
| | | prev_input = get_network_output_layer(net, i-1); |
| | | prev_delta = get_network_delta_layer(net, i-1); |
| | | state.input = get_network_output_layer(net, i-1); |
| | | state.delta = get_network_delta_layer(net, i-1); |
| | | } |
| | | |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | backward_convolutional_layer(layer, prev_input, prev_delta); |
| | | backward_convolutional_layer(layer, state); |
| | | } else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | backward_deconvolutional_layer(layer, prev_input, prev_delta); |
| | | backward_deconvolutional_layer(layer, state); |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | if(i != 0) backward_maxpool_layer(layer, prev_delta); |
| | | if(i != 0) backward_maxpool_layer(layer, state); |
| | | } |
| | | else if(net.types[i] == DROPOUT){ |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | backward_dropout_layer(layer, prev_delta); |
| | | backward_dropout_layer(layer, state); |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | backward_detection_layer(layer, prev_input, prev_delta); |
| | | backward_detection_layer(layer, state); |
| | | } |
| | | else if(net.types[i] == NORMALIZATION){ |
| | | normalization_layer layer = *(normalization_layer *)net.layers[i]; |
| | | if(i != 0) backward_normalization_layer(layer, prev_input, prev_delta); |
| | | if(i != 0) backward_normalization_layer(layer, state); |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | if(i != 0) backward_softmax_layer(layer, prev_delta); |
| | | if(i != 0) backward_softmax_layer(layer, state); |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | backward_connected_layer(layer, prev_input, prev_delta); |
| | | backward_connected_layer(layer, state); |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | backward_cost_layer(layer, prev_input, prev_delta); |
| | | backward_cost_layer(layer, state); |
| | | } |
| | | } |
| | | } |
| | |
| | | #ifdef GPU |
| | | if(gpu_index >= 0) return train_network_datum_gpu(net, x, y); |
| | | #endif |
| | | forward_network(net, x, y, 1); |
| | | backward_network(net, x, y); |
| | | network_state state; |
| | | state.input = x; |
| | | state.truth = y; |
| | | state.train = 1; |
| | | forward_network(net, state); |
| | | backward_network(net, state); |
| | | float error = get_network_cost(net); |
| | | update_network(net); |
| | | return error; |
| | |
| | | float train_network_batch(network net, data d, int n) |
| | | { |
| | | int i,j; |
| | | network_state state; |
| | | state.train = 1; |
| | | float sum = 0; |
| | | int batch = 2; |
| | | for(i = 0; i < n; ++i){ |
| | | for(j = 0; j < batch; ++j){ |
| | | int index = rand()%d.X.rows; |
| | | float *x = d.X.vals[index]; |
| | | float *y = d.y.vals[index]; |
| | | forward_network(net, x, y, 1); |
| | | backward_network(net, x, y); |
| | | state.input = d.X.vals[index]; |
| | | state.truth = d.y.vals[index]; |
| | | forward_network(net, state); |
| | | backward_network(net, state); |
| | | sum += get_network_cost(net); |
| | | } |
| | | update_network(net); |
| | |
| | | return (float)sum/(n*batch); |
| | | } |
| | | |
| | | void set_learning_network(network *net, float rate, float momentum, float decay) |
| | | { |
| | | int i; |
| | | net->learning_rate=rate; |
| | | net->momentum = momentum; |
| | | net->decay = decay; |
| | | for(i = 0; i < net->n; ++i){ |
| | | if(net->types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer *layer = (convolutional_layer *)net->layers[i]; |
| | | layer->learning_rate=rate; |
| | | layer->momentum = momentum; |
| | | layer->decay = decay; |
| | | } |
| | | else if(net->types[i] == CONNECTED){ |
| | | connected_layer *layer = (connected_layer *)net->layers[i]; |
| | | layer->learning_rate=rate; |
| | | layer->momentum = momentum; |
| | | layer->decay = decay; |
| | | } |
| | | } |
| | | } |
| | | |
| | | void set_batch_network(network *net, int b) |
| | | { |
| | | net->batch = b; |
| | |
| | | detection_layer *layer = (detection_layer *) net->layers[i]; |
| | | layer->batch = b; |
| | | } |
| | | else if(net->types[i] == FREEWEIGHT){ |
| | | freeweight_layer *layer = (freeweight_layer *) net->layers[i]; |
| | | layer->batch = b; |
| | | } |
| | | else if(net->types[i] == SOFTMAX){ |
| | | softmax_layer *layer = (softmax_layer *)net->layers[i]; |
| | | layer->batch = b; |
| | |
| | | crop_layer layer = *(crop_layer *) net.layers[i]; |
| | | return layer.c*layer.h*layer.w; |
| | | } |
| | | else if(net.types[i] == FREEWEIGHT){ |
| | | freeweight_layer layer = *(freeweight_layer *) net.layers[i]; |
| | | return layer.inputs; |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | return layer.inputs; |
| | | } |
| | | printf("Can't find input size\n"); |
| | | fprintf(stderr, "Can't find input size\n"); |
| | | return 0; |
| | | } |
| | | |
| | |
| | | image output = get_maxpool_image(layer); |
| | | return output.h*output.w*output.c; |
| | | } |
| | | else if(net.types[i] == CROP){ |
| | | else if(net.types[i] == CROP){ |
| | | crop_layer layer = *(crop_layer *) net.layers[i]; |
| | | return layer.c*layer.crop_height*layer.crop_width; |
| | | } |
| | |
| | | dropout_layer layer = *(dropout_layer *) net.layers[i]; |
| | | return layer.inputs; |
| | | } |
| | | else if(net.types[i] == FREEWEIGHT){ |
| | | freeweight_layer layer = *(freeweight_layer *) net.layers[i]; |
| | | return layer.inputs; |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | return layer.inputs; |
| | | } |
| | | printf("Can't find output size\n"); |
| | | fprintf(stderr, "Can't find output size\n"); |
| | | return 0; |
| | | } |
| | | |
| | |
| | | |
| | | float *network_predict(network net, float *input) |
| | | { |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0) return network_predict_gpu(net, input); |
| | | #endif |
| | | #endif |
| | | |
| | | forward_network(net, input, 0, 0); |
| | | network_state state; |
| | | state.input = input; |
| | | state.truth = 0; |
| | | state.train = 0; |
| | | state.delta = 0; |
| | | forward_network(net, state); |
| | | float *out = get_network_output(net); |
| | | return out; |
| | | } |
| | |
| | | #define NETWORK_H |
| | | |
| | | #include "image.h" |
| | | #include "params.h" |
| | | #include "data.h" |
| | | |
| | | typedef enum { |
| | |
| | | DETECTION, |
| | | NORMALIZATION, |
| | | DROPOUT, |
| | | FREEWEIGHT, |
| | | CROP, |
| | | COST |
| | | } LAYER_TYPE; |
| | |
| | | int outputs; |
| | | float *output; |
| | | |
| | | int inputs; |
| | | int h, w, c; |
| | | |
| | | #ifdef GPU |
| | | float **input_gpu; |
| | | float **truth_gpu; |
| | |
| | | void compare_networks(network n1, network n2, data d); |
| | | char *get_layer_string(LAYER_TYPE a); |
| | | |
| | | network make_network(int n, int batch); |
| | | void forward_network(network net, float *input, float *truth, int train); |
| | | void backward_network(network net, float *input, float *truth); |
| | | network make_network(int n); |
| | | void forward_network(network net, network_state state); |
| | | void backward_network(network net, network_state state); |
| | | void update_network(network net); |
| | | |
| | | float train_network(network net, data d); |
| | |
| | | void visualize_network(network net); |
| | | int resize_network(network net, int h, int w, int c); |
| | | void set_batch_network(network *net, int b); |
| | | void set_learning_network(network *net, float rate, float momentum, float decay); |
| | | int get_network_input_size(network net); |
| | | float get_network_cost(network net); |
| | | |
| | |
| | | #include "image.h" |
| | | #include "data.h" |
| | | #include "utils.h" |
| | | #include "params.h" |
| | | |
| | | #include "crop_layer.h" |
| | | #include "connected_layer.h" |
| | |
| | | #include "maxpool_layer.h" |
| | | #include "cost_layer.h" |
| | | #include "normalization_layer.h" |
| | | #include "freeweight_layer.h" |
| | | #include "softmax_layer.h" |
| | | #include "dropout_layer.h" |
| | | } |
| | |
| | | extern "C" float * get_network_delta_gpu_layer(network net, int i); |
| | | float *get_network_output_gpu(network net); |
| | | |
| | | void forward_network_gpu(network net, float * input, float * truth, int train) |
| | | void forward_network_gpu(network net, network_state state) |
| | | { |
| | | int i; |
| | | for(i = 0; i < net.n; ++i){ |
| | | //clock_t time = clock(); |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | forward_convolutional_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | forward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | forward_deconvolutional_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | forward_deconvolutional_layer_gpu(*(deconvolutional_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | forward_cost_layer_gpu(layer, input, truth); |
| | | forward_cost_layer_gpu(*(cost_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | forward_connected_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | forward_connected_layer_gpu(*(connected_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | forward_detection_layer_gpu(layer, input, truth); |
| | | input = layer.output_gpu; |
| | | forward_detection_layer_gpu(*(detection_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | forward_maxpool_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | forward_maxpool_layer_gpu(*(maxpool_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | forward_softmax_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | forward_softmax_layer_gpu(*(softmax_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DROPOUT){ |
| | | if(!train) continue; |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | forward_dropout_layer_gpu(layer, input); |
| | | input = layer.output_gpu; |
| | | forward_dropout_layer_gpu(*(dropout_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == CROP){ |
| | | crop_layer layer = *(crop_layer *)net.layers[i]; |
| | | forward_crop_layer_gpu(layer, train, input); |
| | | input = layer.output_gpu; |
| | | forward_crop_layer_gpu(*(crop_layer *)net.layers[i], state); |
| | | } |
| | | //cudaDeviceSynchronize(); |
| | | //printf("Forward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); |
| | | state.input = get_network_output_gpu_layer(net, i); |
| | | } |
| | | } |
| | | |
| | | void backward_network_gpu(network net, float * input, float *truth) |
| | | void backward_network_gpu(network net, network_state state) |
| | | { |
| | | int i; |
| | | float * prev_input; |
| | | float * prev_delta; |
| | | float * original_input = state.input; |
| | | for(i = net.n-1; i >= 0; --i){ |
| | | //clock_t time = clock(); |
| | | if(i == 0){ |
| | | prev_input = input; |
| | | prev_delta = 0; |
| | | state.input = original_input; |
| | | state.delta = 0; |
| | | }else{ |
| | | prev_input = get_network_output_gpu_layer(net, i-1); |
| | | prev_delta = get_network_delta_gpu_layer(net, i-1); |
| | | state.input = get_network_output_gpu_layer(net, i-1); |
| | | state.delta = get_network_delta_gpu_layer(net, i-1); |
| | | } |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | backward_convolutional_layer_gpu(layer, prev_input, prev_delta); |
| | | backward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | backward_deconvolutional_layer_gpu(layer, prev_input, prev_delta); |
| | | backward_deconvolutional_layer_gpu(*(deconvolutional_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | backward_cost_layer_gpu(layer, prev_input, prev_delta); |
| | | backward_cost_layer_gpu(*(cost_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | backward_connected_layer_gpu(layer, prev_input, prev_delta); |
| | | backward_connected_layer_gpu(*(connected_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | backward_detection_layer_gpu(layer, prev_input, prev_delta); |
| | | backward_detection_layer_gpu(*(detection_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | backward_maxpool_layer_gpu(layer, prev_delta); |
| | | backward_maxpool_layer_gpu(*(maxpool_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == DROPOUT){ |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | backward_dropout_layer_gpu(layer, prev_delta); |
| | | backward_dropout_layer_gpu(*(dropout_layer *)net.layers[i], state); |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | backward_softmax_layer_gpu(layer, prev_delta); |
| | | backward_softmax_layer_gpu(*(softmax_layer *)net.layers[i], state); |
| | | } |
| | | //printf("Backward %d %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); |
| | | } |
| | | } |
| | | |
| | |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | update_convolutional_layer_gpu(layer); |
| | | update_convolutional_layer_gpu(layer, net.learning_rate, net.momentum, net.decay); |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | update_deconvolutional_layer_gpu(layer); |
| | | update_deconvolutional_layer_gpu(layer, net.learning_rate, net.momentum, net.decay); |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | update_connected_layer_gpu(layer); |
| | | update_connected_layer_gpu(layer, net.learning_rate, net.momentum, net.decay); |
| | | } |
| | | } |
| | | } |
| | |
| | | float * get_network_output_gpu_layer(network net, int i) |
| | | { |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | return ((convolutional_layer *)net.layers[i]) -> output_gpu; |
| | | } |
| | | else if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | return ((deconvolutional_layer *)net.layers[i]) -> output_gpu; |
| | | } |
| | | else if(net.types[i] == DETECTION){ |
| | | detection_layer layer = *(detection_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | return ((detection_layer *)net.layers[i]) -> output_gpu; |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | return ((connected_layer *)net.layers[i]) -> output_gpu; |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | return ((maxpool_layer *)net.layers[i]) -> output_gpu; |
| | | } |
| | | else if(net.types[i] == CROP){ |
| | | crop_layer layer = *(crop_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | return ((crop_layer *)net.layers[i]) -> output_gpu; |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | } else if(net.types[i] == DROPOUT){ |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | return layer.output_gpu; |
| | | return ((softmax_layer *)net.layers[i]) -> output_gpu; |
| | | } |
| | | else if(net.types[i] == DROPOUT){ |
| | | return get_network_output_gpu_layer(net, i-1); |
| | | } |
| | | return 0; |
| | | } |
| | |
| | | float train_network_datum_gpu(network net, float *x, float *y) |
| | | { |
| | | //clock_t time = clock(); |
| | | network_state state; |
| | | int x_size = get_network_input_size(net)*net.batch; |
| | | int y_size = get_network_output_size(net)*net.batch; |
| | | if(!*net.input_gpu){ |
| | |
| | | cuda_push_array(*net.input_gpu, x, x_size); |
| | | cuda_push_array(*net.truth_gpu, y, y_size); |
| | | } |
| | | state.input = *net.input_gpu; |
| | | state.truth = *net.truth_gpu; |
| | | state.train = 1; |
| | | //printf("trans %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1); |
| | | forward_network_gpu(net, state); |
| | | //printf("forw %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | backward_network_gpu(net, *net.input_gpu, *net.truth_gpu); |
| | | backward_network_gpu(net, state); |
| | | //printf("back %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | update_network_gpu(net); |
| | |
| | | { |
| | | |
| | | int size = get_network_input_size(net) * net.batch; |
| | | float * input_gpu = cuda_make_array(input, size); |
| | | forward_network_gpu(net, input_gpu, 0, 0); |
| | | network_state state; |
| | | state.input = cuda_make_array(input, size); |
| | | state.truth = 0; |
| | | state.train = 0; |
| | | state.delta = 0; |
| | | forward_network_gpu(net, state); |
| | | float *out = get_network_output_gpu(net); |
| | | cuda_free(input_gpu); |
| | | cuda_free(state.input); |
| | | return out; |
| | | } |
| | | |
| | |
| | | } |
| | | } |
| | | |
| | | void forward_normalization_layer(const normalization_layer layer, float *in) |
| | | void forward_normalization_layer(const normalization_layer layer, network_state state) |
| | | { |
| | | int i,j,k; |
| | | memset(layer.sums, 0, layer.h*layer.w*sizeof(float)); |
| | | int imsize = layer.h*layer.w; |
| | | for(j = 0; j < layer.size/2; ++j){ |
| | | if(j < layer.c) add_square_array(in+j*imsize, layer.sums, imsize); |
| | | if(j < layer.c) add_square_array(state.input+j*imsize, layer.sums, imsize); |
| | | } |
| | | for(k = 0; k < layer.c; ++k){ |
| | | int next = k+layer.size/2; |
| | | int prev = k-layer.size/2-1; |
| | | if(next < layer.c) add_square_array(in+next*imsize, layer.sums, imsize); |
| | | if(prev > 0) sub_square_array(in+prev*imsize, layer.sums, imsize); |
| | | if(next < layer.c) add_square_array(state.input+next*imsize, layer.sums, imsize); |
| | | if(prev > 0) sub_square_array(state.input+prev*imsize, layer.sums, imsize); |
| | | for(i = 0; i < imsize; ++i){ |
| | | layer.output[k*imsize + i] = in[k*imsize+i] / pow(layer.kappa + layer.alpha * layer.sums[i], layer.beta); |
| | | layer.output[k*imsize + i] = state.input[k*imsize+i] / pow(layer.kappa + layer.alpha * layer.sums[i], layer.beta); |
| | | } |
| | | } |
| | | } |
| | | |
| | | void backward_normalization_layer(const normalization_layer layer, float *in, float *delta) |
| | | void backward_normalization_layer(const normalization_layer layer, network_state state) |
| | | { |
| | | //TODO! |
| | | // TODO! |
| | | // OR NOT TODO!! |
| | | } |
| | | |
| | | void visualize_normalization_layer(normalization_layer layer, char *window) |
| | |
| | | #define NORMALIZATION_LAYER_H |
| | | |
| | | #include "image.h" |
| | | #include "params.h" |
| | | |
| | | typedef struct { |
| | | int batch; |
| | |
| | | image get_normalization_image(normalization_layer layer); |
| | | normalization_layer *make_normalization_layer(int batch, int h, int w, int c, int size, float alpha, float beta, float kappa); |
| | | void resize_normalization_layer(normalization_layer *layer, int h, int w); |
| | | void forward_normalization_layer(const normalization_layer layer, float *in); |
| | | void backward_normalization_layer(const normalization_layer layer, float *in, float *delta); |
| | | void forward_normalization_layer(const normalization_layer layer, network_state state); |
| | | void backward_normalization_layer(const normalization_layer layer, network_state state); |
| | | void visualize_normalization_layer(normalization_layer layer, char *window); |
| | | |
| | | #endif |
| New file |
| | |
| | | #ifndef PARAMS_H |
| | | #define PARAMS_H |
| | | |
| | | typedef struct { |
| | | float *truth; |
| | | float *input; |
| | | float *delta; |
| | | int train; |
| | | } network_state; |
| | | |
| | | #endif |
| | | |
| | |
| | | #include "softmax_layer.h" |
| | | #include "dropout_layer.h" |
| | | #include "detection_layer.h" |
| | | #include "freeweight_layer.h" |
| | | #include "list.h" |
| | | #include "option_list.h" |
| | | #include "utils.h" |
| | |
| | | list *options; |
| | | }section; |
| | | |
| | | int is_network(section *s); |
| | | int is_convolutional(section *s); |
| | | int is_deconvolutional(section *s); |
| | | int is_connected(section *s); |
| | | int is_maxpool(section *s); |
| | | int is_dropout(section *s); |
| | | int is_freeweight(section *s); |
| | | int is_softmax(section *s); |
| | | int is_crop(section *s); |
| | | int is_cost(section *s); |
| | |
| | | } |
| | | } |
| | | |
| | | deconvolutional_layer *parse_deconvolutional(list *options, network *net, int count) |
| | | typedef struct size_params{ |
| | | int batch; |
| | | int inputs; |
| | | int h; |
| | | int w; |
| | | int c; |
| | | } size_params; |
| | | |
| | | deconvolutional_layer *parse_deconvolutional(list *options, size_params params) |
| | | { |
| | | int h,w,c; |
| | | float learning_rate, momentum, decay; |
| | | int n = option_find_int(options, "filters",1); |
| | | int size = option_find_int(options, "size",1); |
| | | int stride = option_find_int(options, "stride",1); |
| | | char *activation_s = option_find_str(options, "activation", "logistic"); |
| | | ACTIVATION activation = get_activation(activation_s); |
| | | if(count == 0){ |
| | | learning_rate = option_find_float(options, "learning_rate", .001); |
| | | momentum = option_find_float(options, "momentum", .9); |
| | | decay = option_find_float(options, "decay", .0001); |
| | | h = option_find_int(options, "height",1); |
| | | w = option_find_int(options, "width",1); |
| | | c = option_find_int(options, "channels",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->learning_rate = learning_rate; |
| | | net->momentum = momentum; |
| | | net->decay = decay; |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate); |
| | | momentum = option_find_float_quiet(options, "momentum", net->momentum); |
| | | decay = option_find_float_quiet(options, "decay", net->decay); |
| | | image m = get_network_image_layer(*net, count-1); |
| | | h = m.h; |
| | | w = m.w; |
| | | c = m.c; |
| | | if(h == 0) error("Layer before deconvolutional layer must output image."); |
| | | } |
| | | deconvolutional_layer *layer = make_deconvolutional_layer(net->batch,h,w,c,n,size,stride,activation,learning_rate,momentum,decay); |
| | | |
| | | int batch,h,w,c; |
| | | h = params.h; |
| | | w = params.w; |
| | | c = params.c; |
| | | batch=params.batch; |
| | | if(!(h && w && c)) error("Layer before deconvolutional layer must output image."); |
| | | |
| | | deconvolutional_layer *layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation); |
| | | |
| | | char *weights = option_find_str(options, "weights", 0); |
| | | char *biases = option_find_str(options, "biases", 0); |
| | | parse_data(weights, layer->filters, c*n*size*size); |
| | |
| | | return layer; |
| | | } |
| | | |
| | | convolutional_layer *parse_convolutional(list *options, network *net, int count) |
| | | convolutional_layer *parse_convolutional(list *options, size_params params) |
| | | { |
| | | int h,w,c; |
| | | float learning_rate, momentum, decay; |
| | | int n = option_find_int(options, "filters",1); |
| | | int size = option_find_int(options, "size",1); |
| | | int stride = option_find_int(options, "stride",1); |
| | | int pad = option_find_int(options, "pad",0); |
| | | char *activation_s = option_find_str(options, "activation", "logistic"); |
| | | ACTIVATION activation = get_activation(activation_s); |
| | | if(count == 0){ |
| | | learning_rate = option_find_float(options, "learning_rate", .001); |
| | | momentum = option_find_float(options, "momentum", .9); |
| | | decay = option_find_float(options, "decay", .0001); |
| | | h = option_find_int(options, "height",1); |
| | | w = option_find_int(options, "width",1); |
| | | c = option_find_int(options, "channels",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->learning_rate = learning_rate; |
| | | net->momentum = momentum; |
| | | net->decay = decay; |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate); |
| | | momentum = option_find_float_quiet(options, "momentum", net->momentum); |
| | | decay = option_find_float_quiet(options, "decay", net->decay); |
| | | image m = get_network_image_layer(*net, count-1); |
| | | h = m.h; |
| | | w = m.w; |
| | | c = m.c; |
| | | if(h == 0) error("Layer before convolutional layer must output image."); |
| | | } |
| | | convolutional_layer *layer = make_convolutional_layer(net->batch,h,w,c,n,size,stride,pad,activation,learning_rate,momentum,decay); |
| | | |
| | | int batch,h,w,c; |
| | | h = params.h; |
| | | w = params.w; |
| | | c = params.c; |
| | | batch=params.batch; |
| | | if(!(h && w && c)) error("Layer before convolutional layer must output image."); |
| | | |
| | | convolutional_layer *layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation); |
| | | |
| | | char *weights = option_find_str(options, "weights", 0); |
| | | char *biases = option_find_str(options, "biases", 0); |
| | | parse_data(weights, layer->filters, c*n*size*size); |
| | |
| | | return layer; |
| | | } |
| | | |
| | | connected_layer *parse_connected(list *options, network *net, int count) |
| | | connected_layer *parse_connected(list *options, size_params params) |
| | | { |
| | | int input; |
| | | float learning_rate, momentum, decay; |
| | | int output = option_find_int(options, "output",1); |
| | | char *activation_s = option_find_str(options, "activation", "logistic"); |
| | | ACTIVATION activation = get_activation(activation_s); |
| | | if(count == 0){ |
| | | input = option_find_int(options, "input",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | learning_rate = option_find_float(options, "learning_rate", .001); |
| | | momentum = option_find_float(options, "momentum", .9); |
| | | decay = option_find_float(options, "decay", .0001); |
| | | net->learning_rate = learning_rate; |
| | | net->momentum = momentum; |
| | | net->decay = decay; |
| | | }else{ |
| | | learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate); |
| | | momentum = option_find_float_quiet(options, "momentum", net->momentum); |
| | | decay = option_find_float_quiet(options, "decay", net->decay); |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | connected_layer *layer = make_connected_layer(net->batch, input, output, activation,learning_rate,momentum,decay); |
| | | |
| | | connected_layer *layer = make_connected_layer(params.batch, params.inputs, output, activation); |
| | | |
| | | char *weights = option_find_str(options, "weights", 0); |
| | | char *biases = option_find_str(options, "biases", 0); |
| | | parse_data(biases, layer->biases, output); |
| | | parse_data(weights, layer->weights, input*output); |
| | | parse_data(weights, layer->weights, params.inputs*output); |
| | | #ifdef GPU |
| | | if(weights || biases) push_connected_layer(*layer); |
| | | #endif |
| | |
| | | return layer; |
| | | } |
| | | |
| | | softmax_layer *parse_softmax(list *options, network *net, int count) |
| | | softmax_layer *parse_softmax(list *options, size_params params) |
| | | { |
| | | int input; |
| | | int groups = option_find_int(options, "groups",1); |
| | | if(count == 0){ |
| | | input = option_find_int(options, "input",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | softmax_layer *layer = make_softmax_layer(net->batch, groups, input); |
| | | softmax_layer *layer = make_softmax_layer(params.batch, params.inputs, groups); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | detection_layer *parse_detection(list *options, network *net, int count) |
| | | detection_layer *parse_detection(list *options, size_params params) |
| | | { |
| | | int input; |
| | | if(count == 0){ |
| | | input = option_find_int(options, "input",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | int coords = option_find_int(options, "coords", 1); |
| | | int classes = option_find_int(options, "classes", 1); |
| | | int rescore = option_find_int(options, "rescore", 1); |
| | | detection_layer *layer = make_detection_layer(net->batch, input, classes, coords, rescore); |
| | | detection_layer *layer = make_detection_layer(params.batch, params.inputs, classes, coords, rescore); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | cost_layer *parse_cost(list *options, network *net, int count) |
| | | cost_layer *parse_cost(list *options, size_params params) |
| | | { |
| | | int input; |
| | | if(count == 0){ |
| | | input = option_find_int(options, "input",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | char *type_s = option_find_str(options, "type", "sse"); |
| | | COST_TYPE type = get_cost_type(type_s); |
| | | cost_layer *layer = make_cost_layer(net->batch, input, type); |
| | | cost_layer *layer = make_cost_layer(params.batch, params.inputs, type); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | crop_layer *parse_crop(list *options, network *net, int count) |
| | | crop_layer *parse_crop(list *options, size_params params) |
| | | { |
| | | float learning_rate, momentum, decay; |
| | | int h,w,c; |
| | | int crop_height = option_find_int(options, "crop_height",1); |
| | | int crop_width = option_find_int(options, "crop_width",1); |
| | | int flip = option_find_int(options, "flip",0); |
| | | if(count == 0){ |
| | | h = option_find_int(options, "height",1); |
| | | w = option_find_int(options, "width",1); |
| | | c = option_find_int(options, "channels",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | learning_rate = option_find_float(options, "learning_rate", .001); |
| | | momentum = option_find_float(options, "momentum", .9); |
| | | decay = option_find_float(options, "decay", .0001); |
| | | net->learning_rate = learning_rate; |
| | | net->momentum = momentum; |
| | | net->decay = decay; |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | image m = get_network_image_layer(*net, count-1); |
| | | h = m.h; |
| | | w = m.w; |
| | | c = m.c; |
| | | if(h == 0) error("Layer before crop layer must output image."); |
| | | } |
| | | crop_layer *layer = make_crop_layer(net->batch,h,w,c,crop_height,crop_width,flip); |
| | | |
| | | int batch,h,w,c; |
| | | h = params.h; |
| | | w = params.w; |
| | | c = params.c; |
| | | batch=params.batch; |
| | | if(!(h && w && c)) error("Layer before crop layer must output image."); |
| | | |
| | | crop_layer *layer = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | maxpool_layer *parse_maxpool(list *options, network *net, int count) |
| | | maxpool_layer *parse_maxpool(list *options, size_params params) |
| | | { |
| | | int h,w,c; |
| | | int stride = option_find_int(options, "stride",1); |
| | | int size = option_find_int(options, "size",stride); |
| | | if(count == 0){ |
| | | h = option_find_int(options, "height",1); |
| | | w = option_find_int(options, "width",1); |
| | | c = option_find_int(options, "channels",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | image m = get_network_image_layer(*net, count-1); |
| | | h = m.h; |
| | | w = m.w; |
| | | c = m.c; |
| | | if(h == 0) error("Layer before convolutional layer must output image."); |
| | | } |
| | | maxpool_layer *layer = make_maxpool_layer(net->batch,h,w,c,size,stride); |
| | | |
| | | int batch,h,w,c; |
| | | h = params.h; |
| | | w = params.w; |
| | | c = params.c; |
| | | batch=params.batch; |
| | | if(!(h && w && c)) error("Layer before maxpool layer must output image."); |
| | | |
| | | maxpool_layer *layer = make_maxpool_layer(batch,h,w,c,size,stride); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | /* |
| | | freeweight_layer *parse_freeweight(list *options, network *net, int count) |
| | | dropout_layer *parse_dropout(list *options, size_params params) |
| | | { |
| | | int input; |
| | | if(count == 0){ |
| | | net->batch = option_find_int(options, "batch",1); |
| | | input = option_find_int(options, "input",1); |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | freeweight_layer *layer = make_freeweight_layer(net->batch,input); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | */ |
| | | |
| | | dropout_layer *parse_dropout(list *options, network *net, int count) |
| | | { |
| | | int input; |
| | | float probability = option_find_float(options, "probability", .5); |
| | | if(count == 0){ |
| | | net->batch = option_find_int(options, "batch",1); |
| | | input = option_find_int(options, "input",1); |
| | | float learning_rate = option_find_float(options, "learning_rate", .001); |
| | | float momentum = option_find_float(options, "momentum", .9); |
| | | float decay = option_find_float(options, "decay", .0001); |
| | | net->learning_rate = learning_rate; |
| | | net->momentum = momentum; |
| | | net->decay = decay; |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | dropout_layer *layer = make_dropout_layer(net->batch,input,probability); |
| | | dropout_layer *layer = make_dropout_layer(params.batch, params.inputs, probability); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | normalization_layer *parse_normalization(list *options, network *net, int count) |
| | | normalization_layer *parse_normalization(list *options, size_params params) |
| | | { |
| | | int h,w,c; |
| | | int size = option_find_int(options, "size",1); |
| | | float alpha = option_find_float(options, "alpha", 0.); |
| | | float beta = option_find_float(options, "beta", 1.); |
| | | float kappa = option_find_float(options, "kappa", 1.); |
| | | if(count == 0){ |
| | | h = option_find_int(options, "height",1); |
| | | w = option_find_int(options, "width",1); |
| | | c = option_find_int(options, "channels",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->seen = option_find_int(options, "seen",0); |
| | | }else{ |
| | | image m = get_network_image_layer(*net, count-1); |
| | | h = m.h; |
| | | w = m.w; |
| | | c = m.c; |
| | | if(h == 0) error("Layer before convolutional layer must output image."); |
| | | } |
| | | normalization_layer *layer = make_normalization_layer(net->batch,h,w,c,size, alpha, beta, kappa); |
| | | |
| | | int batch,h,w,c; |
| | | h = params.h; |
| | | w = params.w; |
| | | c = params.c; |
| | | batch=params.batch; |
| | | if(!(h && w && c)) error("Layer before normalization layer must output image."); |
| | | |
| | | normalization_layer *layer = make_normalization_layer(batch,h,w,c,size, alpha, beta, kappa); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | void parse_net_options(list *options, network *net) |
| | | { |
| | | net->batch = option_find_int(options, "batch",1); |
| | | net->learning_rate = option_find_float(options, "learning_rate", .001); |
| | | net->momentum = option_find_float(options, "momentum", .9); |
| | | net->decay = option_find_float(options, "decay", .0001); |
| | | net->seen = option_find_int(options, "seen",0); |
| | | |
| | | net->h = option_find_int_quiet(options, "height",0); |
| | | net->w = option_find_int_quiet(options, "width",0); |
| | | net->c = option_find_int_quiet(options, "channels",0); |
| | | net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); |
| | | if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); |
| | | } |
| | | |
| | | network parse_network_cfg(char *filename) |
| | | { |
| | | list *sections = read_cfg(filename); |
| | | network net = make_network(sections->size, 0); |
| | | |
| | | node *n = sections->front; |
| | | if(!n) error("Config file has no sections"); |
| | | network net = make_network(sections->size - 1); |
| | | size_params params; |
| | | |
| | | section *s = (section *)n->val; |
| | | list *options = s->options; |
| | | if(!is_network(s)) error("First section must be [net] or [network]"); |
| | | parse_net_options(options, &net); |
| | | |
| | | params.h = net.h; |
| | | params.w = net.w; |
| | | params.c = net.c; |
| | | params.inputs = net.inputs; |
| | | params.batch = net.batch; |
| | | |
| | | n = n->next; |
| | | int count = 0; |
| | | while(n){ |
| | | section *s = (section *)n->val; |
| | | list *options = s->options; |
| | | fprintf(stderr, "%d: ", count); |
| | | s = (section *)n->val; |
| | | options = s->options; |
| | | if(is_convolutional(s)){ |
| | | convolutional_layer *layer = parse_convolutional(options, &net, count); |
| | | convolutional_layer *layer = parse_convolutional(options, params); |
| | | net.types[count] = CONVOLUTIONAL; |
| | | net.layers[count] = layer; |
| | | }else if(is_deconvolutional(s)){ |
| | | deconvolutional_layer *layer = parse_deconvolutional(options, &net, count); |
| | | deconvolutional_layer *layer = parse_deconvolutional(options, params); |
| | | net.types[count] = DECONVOLUTIONAL; |
| | | net.layers[count] = layer; |
| | | }else if(is_connected(s)){ |
| | | connected_layer *layer = parse_connected(options, &net, count); |
| | | connected_layer *layer = parse_connected(options, params); |
| | | net.types[count] = CONNECTED; |
| | | net.layers[count] = layer; |
| | | }else if(is_crop(s)){ |
| | | crop_layer *layer = parse_crop(options, &net, count); |
| | | crop_layer *layer = parse_crop(options, params); |
| | | net.types[count] = CROP; |
| | | net.layers[count] = layer; |
| | | }else if(is_cost(s)){ |
| | | cost_layer *layer = parse_cost(options, &net, count); |
| | | cost_layer *layer = parse_cost(options, params); |
| | | net.types[count] = COST; |
| | | net.layers[count] = layer; |
| | | }else if(is_detection(s)){ |
| | | detection_layer *layer = parse_detection(options, &net, count); |
| | | detection_layer *layer = parse_detection(options, params); |
| | | net.types[count] = DETECTION; |
| | | net.layers[count] = layer; |
| | | }else if(is_softmax(s)){ |
| | | softmax_layer *layer = parse_softmax(options, &net, count); |
| | | softmax_layer *layer = parse_softmax(options, params); |
| | | net.types[count] = SOFTMAX; |
| | | net.layers[count] = layer; |
| | | }else if(is_maxpool(s)){ |
| | | maxpool_layer *layer = parse_maxpool(options, &net, count); |
| | | maxpool_layer *layer = parse_maxpool(options, params); |
| | | net.types[count] = MAXPOOL; |
| | | net.layers[count] = layer; |
| | | }else if(is_normalization(s)){ |
| | | normalization_layer *layer = parse_normalization(options, &net, count); |
| | | normalization_layer *layer = parse_normalization(options, params); |
| | | net.types[count] = NORMALIZATION; |
| | | net.layers[count] = layer; |
| | | }else if(is_dropout(s)){ |
| | | dropout_layer *layer = parse_dropout(options, &net, count); |
| | | dropout_layer *layer = parse_dropout(options, params); |
| | | net.types[count] = DROPOUT; |
| | | net.layers[count] = layer; |
| | | }else if(is_freeweight(s)){ |
| | | //freeweight_layer *layer = parse_freeweight(options, &net, count); |
| | | //net.types[count] = FREEWEIGHT; |
| | | //net.layers[count] = layer; |
| | | fprintf(stderr, "Type not recognized: %s\n", s->type); |
| | | }else{ |
| | | fprintf(stderr, "Type not recognized: %s\n", s->type); |
| | | } |
| | | free_section(s); |
| | | ++count; |
| | | n = n->next; |
| | | if(n){ |
| | | image im = get_network_image_layer(net, count); |
| | | params.h = im.h; |
| | | params.w = im.w; |
| | | params.c = im.c; |
| | | params.inputs = get_network_output_size_layer(net, count); |
| | | } |
| | | ++count; |
| | | } |
| | | free_list(sections); |
| | | net.outputs = get_network_output_size(net); |
| | |
| | | return (strcmp(s->type, "[conv]")==0 |
| | | || strcmp(s->type, "[convolutional]")==0); |
| | | } |
| | | int is_network(section *s) |
| | | { |
| | | return (strcmp(s->type, "[net]")==0 |
| | | || strcmp(s->type, "[network]")==0); |
| | | } |
| | | int is_connected(section *s) |
| | | { |
| | | return (strcmp(s->type, "[conn]")==0 |
| | |
| | | { |
| | | return (strcmp(s->type, "[dropout]")==0); |
| | | } |
| | | int is_freeweight(section *s) |
| | | { |
| | | return (strcmp(s->type, "[freeweight]")==0); |
| | | } |
| | | |
| | | int is_softmax(section *s) |
| | | { |
| | |
| | | |
| | | void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int count) |
| | | { |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0) pull_convolutional_layer(*l); |
| | | #endif |
| | | #endif |
| | | int i; |
| | | fprintf(fp, "[convolutional]\n"); |
| | | if(count == 0) { |
| | | fprintf(fp, "batch=%d\n" |
| | | "height=%d\n" |
| | | "width=%d\n" |
| | | "channels=%d\n" |
| | | "learning_rate=%g\n" |
| | | "momentum=%g\n" |
| | | "decay=%g\n" |
| | | "seen=%d\n", |
| | | l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen); |
| | | } else { |
| | | if(l->learning_rate != net.learning_rate) |
| | | fprintf(fp, "learning_rate=%g\n", l->learning_rate); |
| | | if(l->momentum != net.momentum) |
| | | fprintf(fp, "momentum=%g\n", l->momentum); |
| | | if(l->decay != net.decay) |
| | | fprintf(fp, "decay=%g\n", l->decay); |
| | | } |
| | | fprintf(fp, "filters=%d\n" |
| | | "size=%d\n" |
| | | "stride=%d\n" |
| | |
| | | |
| | | void print_deconvolutional_cfg(FILE *fp, deconvolutional_layer *l, network net, int count) |
| | | { |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0) pull_deconvolutional_layer(*l); |
| | | #endif |
| | | #endif |
| | | int i; |
| | | fprintf(fp, "[deconvolutional]\n"); |
| | | if(count == 0) { |
| | | fprintf(fp, "batch=%d\n" |
| | | "height=%d\n" |
| | | "width=%d\n" |
| | | "channels=%d\n" |
| | | "learning_rate=%g\n" |
| | | "momentum=%g\n" |
| | | "decay=%g\n" |
| | | "seen=%d\n", |
| | | l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen); |
| | | } else { |
| | | if(l->learning_rate != net.learning_rate) |
| | | fprintf(fp, "learning_rate=%g\n", l->learning_rate); |
| | | if(l->momentum != net.momentum) |
| | | fprintf(fp, "momentum=%g\n", l->momentum); |
| | | if(l->decay != net.decay) |
| | | fprintf(fp, "decay=%g\n", l->decay); |
| | | } |
| | | fprintf(fp, "filters=%d\n" |
| | | "size=%d\n" |
| | | "stride=%d\n" |
| | |
| | | fprintf(fp, "\n\n"); |
| | | } |
| | | |
| | | void print_freeweight_cfg(FILE *fp, freeweight_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[freeweight]\n"); |
| | | if(count == 0){ |
| | | fprintf(fp, "batch=%d\ninput=%d\n",l->batch, l->inputs); |
| | | } |
| | | fprintf(fp, "\n"); |
| | | } |
| | | |
| | | void print_dropout_cfg(FILE *fp, dropout_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[dropout]\n"); |
| | | if(count == 0){ |
| | | fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); |
| | | } |
| | | fprintf(fp, "probability=%g\n\n", l->probability); |
| | | } |
| | | |
| | | void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count) |
| | | { |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0) pull_connected_layer(*l); |
| | | #endif |
| | | #endif |
| | | int i; |
| | | fprintf(fp, "[connected]\n"); |
| | | if(count == 0){ |
| | | fprintf(fp, "batch=%d\n" |
| | | "input=%d\n" |
| | | "learning_rate=%g\n" |
| | | "momentum=%g\n" |
| | | "decay=%g\n" |
| | | "seen=%d\n", |
| | | l->batch, l->inputs, l->learning_rate, l->momentum, l->decay, net.seen); |
| | | } else { |
| | | if(l->learning_rate != net.learning_rate) |
| | | fprintf(fp, "learning_rate=%g\n", l->learning_rate); |
| | | if(l->momentum != net.momentum) |
| | | fprintf(fp, "momentum=%g\n", l->momentum); |
| | | if(l->decay != net.decay) |
| | | fprintf(fp, "decay=%g\n", l->decay); |
| | | } |
| | | fprintf(fp, "output=%d\n" |
| | | "activation=%s\n", |
| | | l->outputs, |
| | |
| | | void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[crop]\n"); |
| | | if(count == 0) { |
| | | fprintf(fp, "batch=%d\n" |
| | | "height=%d\n" |
| | | "width=%d\n" |
| | | "channels=%d\n" |
| | | "learning_rate=%g\n" |
| | | "momentum=%g\n" |
| | | "decay=%g\n" |
| | | "seen=%d\n", |
| | | l->batch,l->h, l->w, l->c, net.learning_rate, net.momentum, net.decay, net.seen); |
| | | } |
| | | fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip); |
| | | } |
| | | |
| | | void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[maxpool]\n"); |
| | | if(count == 0) fprintf(fp, "batch=%d\n" |
| | | "height=%d\n" |
| | | "width=%d\n" |
| | | "channels=%d\n", |
| | | l->batch,l->h, l->w, l->c); |
| | | fprintf(fp, "size=%d\nstride=%d\n\n", l->size, l->stride); |
| | | } |
| | | |
| | | void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[localresponsenormalization]\n"); |
| | | if(count == 0) fprintf(fp, "batch=%d\n" |
| | | "height=%d\n" |
| | | "width=%d\n" |
| | | "channels=%d\n", |
| | | l->batch,l->h, l->w, l->c); |
| | | fprintf(fp, "size=%d\n" |
| | | "alpha=%g\n" |
| | | "beta=%g\n" |
| | |
| | | void print_softmax_cfg(FILE *fp, softmax_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[softmax]\n"); |
| | | if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); |
| | | fprintf(fp, "\n"); |
| | | } |
| | | |
| | |
| | | void print_cost_cfg(FILE *fp, cost_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[cost]\ntype=%s\n", get_cost_string(l->type)); |
| | | if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); |
| | | fprintf(fp, "\n"); |
| | | } |
| | | |
| | |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *) net.layers[i]; |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0){ |
| | | pull_convolutional_layer(layer); |
| | | } |
| | | #endif |
| | | #endif |
| | | int num = layer.n*layer.c*layer.size*layer.size; |
| | | fwrite(layer.biases, sizeof(float), layer.n, fp); |
| | | fwrite(layer.filters, sizeof(float), num, fp); |
| | | } |
| | | if(net.types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *) net.layers[i]; |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0){ |
| | | pull_deconvolutional_layer(layer); |
| | | } |
| | | #endif |
| | | #endif |
| | | int num = layer.n*layer.c*layer.size*layer.size; |
| | | fwrite(layer.biases, sizeof(float), layer.n, fp); |
| | | fwrite(layer.filters, sizeof(float), num, fp); |
| | | } |
| | | if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *) net.layers[i]; |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0){ |
| | | pull_connected_layer(layer); |
| | | } |
| | | #endif |
| | | #endif |
| | | fwrite(layer.biases, sizeof(float), layer.outputs, fp); |
| | | fwrite(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp); |
| | | } |
| | |
| | | fread(&net->momentum, sizeof(float), 1, fp); |
| | | fread(&net->decay, sizeof(float), 1, fp); |
| | | fread(&net->seen, sizeof(int), 1, fp); |
| | | set_learning_network(net, net->learning_rate, net->momentum, net->decay); |
| | | |
| | | |
| | | int i; |
| | | for(i = 0; i < net->n && i < cutoff; ++i){ |
| | | if(net->types[i] == CONVOLUTIONAL){ |
| | |
| | | int num = layer.n*layer.c*layer.size*layer.size; |
| | | fread(layer.biases, sizeof(float), layer.n, fp); |
| | | fread(layer.filters, sizeof(float), num, fp); |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0){ |
| | | push_convolutional_layer(layer); |
| | | } |
| | | #endif |
| | | #endif |
| | | } |
| | | if(net->types[i] == DECONVOLUTIONAL){ |
| | | deconvolutional_layer layer = *(deconvolutional_layer *) net->layers[i]; |
| | | int num = layer.n*layer.c*layer.size*layer.size; |
| | | fread(layer.biases, sizeof(float), layer.n, fp); |
| | | fread(layer.filters, sizeof(float), num, fp); |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0){ |
| | | push_deconvolutional_layer(layer); |
| | | } |
| | | #endif |
| | | #endif |
| | | } |
| | | if(net->types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *) net->layers[i]; |
| | | fread(layer.biases, sizeof(float), layer.outputs, fp); |
| | | fread(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp); |
| | | #ifdef GPU |
| | | #ifdef GPU |
| | | if(gpu_index >= 0){ |
| | | push_connected_layer(layer); |
| | | } |
| | | #endif |
| | | #endif |
| | | } |
| | | } |
| | | fclose(fp); |
| | |
| | | print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == MAXPOOL) |
| | | print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == FREEWEIGHT) |
| | | print_freeweight_cfg(fp, (freeweight_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == DROPOUT) |
| | | print_dropout_cfg(fp, (dropout_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == NORMALIZATION) |
| | |
| | | #include <stdio.h> |
| | | #include <assert.h> |
| | | |
| | | softmax_layer *make_softmax_layer(int batch, int groups, int inputs) |
| | | softmax_layer *make_softmax_layer(int batch, int inputs, int groups) |
| | | { |
| | | assert(inputs%groups == 0); |
| | | fprintf(stderr, "Softmax Layer: %d inputs\n", inputs); |
| | |
| | | } |
| | | } |
| | | |
| | | void forward_softmax_layer(const softmax_layer layer, float *input) |
| | | void forward_softmax_layer(const softmax_layer layer, network_state state) |
| | | { |
| | | int b; |
| | | int inputs = layer.inputs / layer.groups; |
| | | int batch = layer.batch * layer.groups; |
| | | for(b = 0; b < batch; ++b){ |
| | | softmax_array(input+b*inputs, inputs, layer.output+b*inputs); |
| | | softmax_array(state.input+b*inputs, inputs, layer.output+b*inputs); |
| | | } |
| | | } |
| | | |
| | | void backward_softmax_layer(const softmax_layer layer, float *delta) |
| | | void backward_softmax_layer(const softmax_layer layer, network_state state) |
| | | { |
| | | int i; |
| | | for(i = 0; i < layer.inputs*layer.batch; ++i){ |
| | | delta[i] = layer.delta[i]; |
| | | state.delta[i] = layer.delta[i]; |
| | | } |
| | | } |
| | | |
| | |
| | | #ifndef SOFTMAX_LAYER_H |
| | | #define SOFTMAX_LAYER_H |
| | | #include "params.h" |
| | | |
| | | typedef struct { |
| | | int inputs; |
| | |
| | | } softmax_layer; |
| | | |
| | | void softmax_array(float *input, int n, float *output); |
| | | softmax_layer *make_softmax_layer(int batch, int groups, int inputs); |
| | | void forward_softmax_layer(const softmax_layer layer, float *input); |
| | | void backward_softmax_layer(const softmax_layer layer, float *delta); |
| | | softmax_layer *make_softmax_layer(int batch, int inputs, int groups); |
| | | void forward_softmax_layer(const softmax_layer layer, network_state state); |
| | | void backward_softmax_layer(const softmax_layer layer, network_state state); |
| | | |
| | | #ifdef GPU |
| | | void pull_softmax_layer_output(const softmax_layer layer); |
| | | void forward_softmax_layer_gpu(const softmax_layer layer, float *input); |
| | | void backward_softmax_layer_gpu(const softmax_layer layer, float *delta); |
| | | void forward_softmax_layer_gpu(const softmax_layer layer, network_state state); |
| | | void backward_softmax_layer_gpu(const softmax_layer layer, network_state state); |
| | | #endif |
| | | |
| | | #endif |
| | |
| | | cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); |
| | | } |
| | | |
| | | extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, float *input) |
| | | extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, network_state state) |
| | | { |
| | | int inputs = layer.inputs / layer.groups; |
| | | int batch = layer.batch * layer.groups; |
| | | forward_softmax_layer_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, batch, input, layer.output_gpu); |
| | | forward_softmax_layer_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, batch, state.input, layer.output_gpu); |
| | | check_error(cudaPeekAtLastError()); |
| | | |
| | | /* |
| | | cl_read_array(layer.output_cl, layer.output, layer.inputs*layer.batch); |
| | | int z; |
| | | for(z = 0; z < layer.inputs*layer.batch; ++z) printf("%f,",layer.output[z]); |
| | | */ |
| | | } |
| | | |
| | | extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, float *delta) |
| | | extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) |
| | | { |
| | | copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, delta, 1); |
| | | copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1); |
| | | } |
| | | |
| | | /* This is if you want softmax w/o log-loss classification. You probably don't. |