maxpool fixed, good on mnist
| | |
| | | void test_nist_single() |
| | | { |
| | | srand(222222); |
| | | network net = parse_network_cfg("cfg/nist.cfg"); |
| | | network net = parse_network_cfg("cfg/nist_single.cfg"); |
| | | data train = load_categorical_data_csv("data/mnist/mnist_tiny.csv", 0, 10); |
| | | normalize_data_rows(train); |
| | | float loss = train_network_sgd(net, train, 5); |
| | | float loss = train_network_sgd(net, train, 1); |
| | | printf("Loss: %f, LR: %f, Momentum: %f, Decay: %f\n", loss, net.learning_rate, net.momentum, net.decay); |
| | | |
| | | } |
| | |
| | | data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10); |
| | | data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10); |
| | | translate_data_rows(train, -144); |
| | | scale_data_rows(train, 1./128); |
| | | //scale_data_rows(train, 1./128); |
| | | translate_data_rows(test, -144); |
| | | scale_data_rows(test, 1./128); |
| | | //scale_data_rows(test, 1./128); |
| | | //randomize_data(train); |
| | | int count = 0; |
| | | //clock_t start = clock(), end; |
| | | int iters = 10000/net.batch; |
| | | while(++count <= 100){ |
| | | while(++count <= 2000){ |
| | | clock_t start = clock(), end; |
| | | float loss = train_network_sgd(net, train, iters); |
| | | end = clock(); |
| | | float test_acc = network_accuracy(net, test); |
| | | //float test_acc = 0; |
| | | printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay); |
| | | /*printf("%f %f %f %f %f\n", mean_array(get_network_output_layer(net,0), 100), |
| | | mean_array(get_network_output_layer(net,1), 100), |
| | | mean_array(get_network_output_layer(net,2), 100), |
| | | mean_array(get_network_output_layer(net,3), 100), |
| | | mean_array(get_network_output_layer(net,4), 100)); |
| | | */ |
| | | //save_network(net, "cfg/nist_basic_trained.cfg"); |
| | | |
| | | //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay); |
| | |
| | | { |
| | | //train_full(); |
| | | //test_distribution(); |
| | | //feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); |
| | | feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); |
| | | |
| | | //test_blas(); |
| | | //test_visualize(); |
| | |
| | | layer->weight_momentum = calloc(inputs*outputs, sizeof(float)); |
| | | layer->weights = calloc(inputs*outputs, sizeof(float)); |
| | | float scale = 1./inputs; |
| | | //scale = .01; |
| | | scale = .05; |
| | | for(i = 0; i < inputs*outputs; ++i) |
| | | layer->weights[i] = scale*(rand_uniform()-.5); |
| | | layer->weights[i] = scale*2*(rand_uniform()-.5); |
| | | |
| | | layer->bias_updates = calloc(outputs, sizeof(float)); |
| | | layer->bias_adapt = calloc(outputs, sizeof(float)); |
| | |
| | | layer->bias_updates = calloc(n, sizeof(float)); |
| | | layer->bias_momentum = calloc(n, sizeof(float)); |
| | | float scale = 1./(size*size*c); |
| | | //scale = .0001; |
| | | for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*(rand_uniform()-.5); |
| | | scale = .05; |
| | | for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*2*(rand_uniform()-.5); |
| | | for(i = 0; i < n; ++i){ |
| | | //layer->biases[i] = rand_normal()*scale + scale; |
| | | layer->biases[i] = .5; |
| | |
| | | layer->c = c; |
| | | layer->size = size; |
| | | layer->stride = stride; |
| | | layer->max_indexes = calloc(((h-1)/stride+1) * ((w-1)/stride+1) * c*batch, sizeof(int)); |
| | | layer->output = calloc(((h-1)/stride+1) * ((w-1)/stride+1) * c*batch, sizeof(float)); |
| | | layer->delta = calloc(((h-1)/stride+1) * ((w-1)/stride+1) * c*batch, sizeof(float)); |
| | | return layer; |
| | |
| | | layer->delta = realloc(layer->delta, ((h-1)/layer->stride+1) * ((w-1)/layer->stride+1) * c * layer->batch*sizeof(float)); |
| | | } |
| | | |
| | | float get_max_region(image im, int h, int w, int c, int size) |
| | | { |
| | | int i,j; |
| | | int lower = (-size-1)/2 + 1; |
| | | int upper = size/2 + 1; |
| | | |
| | | int lh = (h-lower < 0) ? 0 : h-lower; |
| | | int uh = (h+upper > im.h) ? im.h : h+upper; |
| | | |
| | | int lw = (w-lower < 0) ? 0 : w-lower; |
| | | int uw = (w+upper > im.w) ? im.w : w+upper; |
| | | |
| | | //printf("%d\n", -3/2); |
| | | //printf("%d %d\n", lower, upper); |
| | | //printf("%d %d %d %d\n", lh, uh, lw, uw); |
| | | |
| | | float max = -FLT_MAX; |
| | | for(i = lh; i < uh; ++i){ |
| | | for(j = lw; j < uw; ++j){ |
| | | float val = get_pixel(im, i, j, c); |
| | | if (val > max) max = val; |
| | | } |
| | | } |
| | | return max; |
| | | } |
| | | |
| | | void forward_maxpool_layer(const maxpool_layer layer, float *in) |
| | | void forward_maxpool_layer(const maxpool_layer layer, float *input) |
| | | { |
| | | int b; |
| | | for(b = 0; b < layer.batch; ++b){ |
| | | image input = float_to_image(layer.h, layer.w, layer.c, in+b*layer.h*layer.w*layer.c); |
| | | |
| | | int h = (layer.h-1)/layer.stride + 1; |
| | | int w = (layer.w-1)/layer.stride + 1; |
| | | int c = layer.c; |
| | | image output = float_to_image(h,w,c,layer.output+b*h*w*c); |
| | | |
| | | int i,j,k; |
| | | for(k = 0; k < input.c; ++k){ |
| | | for(i = 0; i < input.h; i += layer.stride){ |
| | | for(j = 0; j < input.w; j += layer.stride){ |
| | | float max = get_max_region(input, i, j, k, layer.size); |
| | | set_pixel(output, i/layer.stride, j/layer.stride, k, max); |
| | | int i,j,k,l,m; |
| | | for(k = 0; k < layer.c; ++k){ |
| | | for(i = 0; i < layer.h; i += layer.stride){ |
| | | for(j = 0; j < layer.w; j += layer.stride){ |
| | | int out_index = j/layer.stride + w*(i/layer.stride + h*(k + c*b)); |
| | | layer.output[out_index] = -FLT_MAX; |
| | | int lower = (-layer.size-1)/2 + 1; |
| | | int upper = layer.size/2 + 1; |
| | | |
| | | int lh = (i+lower < 0) ? 0 : i+lower; |
| | | int uh = (i+upper > layer.h) ? layer.h : i+upper; |
| | | |
| | | int lw = (j+lower < 0) ? 0 : j+lower; |
| | | int uw = (j+upper > layer.w) ? layer.w : j+upper; |
| | | for(l = lh; l < uh; ++l){ |
| | | for(m = lw; m < uw; ++m){ |
| | | //printf("%d %d\n", l, m); |
| | | int index = m + layer.w*(l + layer.h*(k + b*layer.c)); |
| | | if(input[index] > layer.output[out_index]){ |
| | | layer.output[out_index] = input[index]; |
| | | layer.max_indexes[out_index] = index; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | float set_max_region_delta(image im, image delta, int h, int w, int c, int size, float max, float error) |
| | | void backward_maxpool_layer(const maxpool_layer layer, float *input, float *delta) |
| | | { |
| | | int i,j; |
| | | int lower = (-size-1)/2 + 1; |
| | | int upper = size/2 + 1; |
| | | |
| | | int lh = (h-lower < 0) ? 0 : h-lower; |
| | | int uh = (h+upper > im.h) ? im.h : h+upper; |
| | | |
| | | int lw = (w-lower < 0) ? 0 : w-lower; |
| | | int uw = (w+upper > im.w) ? im.w : w+upper; |
| | | |
| | | for(i = lh; i < uh; ++i){ |
| | | for(j = lw; j < uw; ++j){ |
| | | float val = get_pixel(im, i, j, c); |
| | | if (val == max){ |
| | | add_pixel(delta, i, j, c, error); |
| | | } |
| | | } |
| | | } |
| | | return max; |
| | | } |
| | | |
| | | void backward_maxpool_layer(const maxpool_layer layer, float *in, float *delta) |
| | | { |
| | | int b; |
| | | for(b = 0; b < layer.batch; ++b){ |
| | | image input = float_to_image(layer.h, layer.w, layer.c, in+b*layer.h*layer.w*layer.c); |
| | | image input_delta = float_to_image(layer.h, layer.w, layer.c, delta+b*layer.h*layer.w*layer.c); |
| | | int i; |
| | | int h = (layer.h-1)/layer.stride + 1; |
| | | int w = (layer.w-1)/layer.stride + 1; |
| | | int c = layer.c; |
| | | image output = float_to_image(h,w,c,layer.output+b*h*w*c); |
| | | image output_delta = float_to_image(h,w,c,layer.delta+b*h*w*c); |
| | | zero_image(input_delta); |
| | | |
| | | int i,j,k; |
| | | for(k = 0; k < input.c; ++k){ |
| | | for(i = 0; i < input.h; i += layer.stride){ |
| | | for(j = 0; j < input.w; j += layer.stride){ |
| | | float max = get_pixel(output, i/layer.stride, j/layer.stride, k); |
| | | float error = get_pixel(output_delta, i/layer.stride, j/layer.stride, k); |
| | | set_max_region_delta(input, input_delta, i, j, k, layer.size, max, error); |
| | | } |
| | | } |
| | | } |
| | | memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | for(i = 0; i < h*w*c*layer.batch; ++i){ |
| | | int index = layer.max_indexes[i]; |
| | | delta[index] += layer.delta[i]; |
| | | } |
| | | } |
| | | |
| | |
| | | int h,w,c; |
| | | int stride; |
| | | int size; |
| | | int *max_indexes; |
| | | float *delta; |
| | | float *output; |
| | | } maxpool_layer; |
| | |
| | | image get_maxpool_image(maxpool_layer layer); |
| | | maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int size, int stride); |
| | | void resize_maxpool_layer(maxpool_layer *layer, int h, int w, int c); |
| | | void forward_maxpool_layer(const maxpool_layer layer, float *in); |
| | | void backward_maxpool_layer(const maxpool_layer layer, float *in, float *delta); |
| | | void forward_maxpool_layer(const maxpool_layer layer, float *input); |
| | | void backward_maxpool_layer(const maxpool_layer layer, float *input, float *delta); |
| | | |
| | | #endif |
| | | |