Midway through lots of fixes, checkpoint
| | |
| | | |
| | | float activate(float x, ACTIVATION a, float dropout) |
| | | { |
| | | if((float)rand()/RAND_MAX < dropout) return 0; |
| | | if(dropout && (float)rand()/RAND_MAX < dropout) return 0; |
| | | switch(a){ |
| | | case LINEAR: |
| | | return linear_activate(x)/(1-dropout); |
| | |
| | | |
| | | void test_nist() |
| | | { |
| | | srand(444444); |
| | | srand(222222); |
| | | network net = parse_network_cfg("cfg/nist.cfg"); |
| | | data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10); |
| | |
| | | normalize_data_rows(test); |
| | | //randomize_data(train); |
| | | int count = 0; |
| | | float lr = .000075; |
| | | float lr = .0001; |
| | | float momentum = .9; |
| | | float decay = 0.0001; |
| | | decay = 0; |
| | | //clock_t start = clock(), end; |
| | | int iters = 100; |
| | | int iters = 1000; |
| | | while(++count <= 10){ |
| | | clock_t start = clock(), end; |
| | | float loss = train_network_sgd(net, train, iters, lr, momentum, decay); |
| | | end = clock(); |
| | | float test_acc = network_accuracy(net, test); |
| | | printf("%d: %f %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay); |
| | | //float test_acc = network_accuracy(net, test); |
| | | float test_acc = 0; |
| | | printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay); |
| | | |
| | | //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay); |
| | | //end = clock(); |
| | |
| | | int i; |
| | | for(i = 0; i < layer.outputs*layer.batch; ++i){ |
| | | layer.delta[i] *= gradient(layer.output[i], layer.activation); |
| | | layer.bias_updates[i%layer.batch] += layer.delta[i]; |
| | | layer.bias_updates[i%layer.outputs] += layer.delta[i]; |
| | | } |
| | | int m = layer.inputs; |
| | | int k = layer.batch; |
| | |
| | | float *a = input; |
| | | float *b = layer.delta; |
| | | float *c = layer.weight_updates; |
| | | gemm(1,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); |
| | | |
| | | m = layer.batch; |
| | | k = layer.outputs; |
| | |
| | | layer->activation = activation; |
| | | |
| | | fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); |
| | | srand(0); |
| | | |
| | | return layer; |
| | | } |
| | |
| | | layer.size, layer.stride, layer.pad, b); |
| | | bias_output(layer); |
| | | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | /* |
| | | int i; |
| | | for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]); |
| | | printf("\n"); |
| | | */ |
| | | activate_array(layer.output, m*n, layer.activation, 0.); |
| | | } |
| | | |
| | |
| | | #include "mini_blas.h" |
| | | #include <stdio.h> |
| | | |
| | | inline float im2col_get_pixel(float *im, int height, int width, int channels, |
| | | int row, int col, int channel, int pad) |
| | |
| | | } |
| | | int channels_col = channels * ksize * ksize; |
| | | int im_size = height*width*channels; |
| | | int col_size = height_col*width_col*channels_col; |
| | | //int col_size = height_col*width_col*channels_col; |
| | | for (b = 0; b < batch; ++b) { |
| | | for (c = 0; c < channels_col; ++c) { |
| | | int w_offset = c % ksize; |
| | |
| | | for (w = 0; w < width_col; ++w) { |
| | | int im_row = h_offset + h * stride; |
| | | int im_col = w_offset + w * stride; |
| | | data_col[(c * height_col + h) * width_col + w] = |
| | | im2col_get_pixel(data_im, height, width, channels, |
| | | int col_index = (c * height_col + h) * width_col + w + (batch-1) * c * height_col*width_col; |
| | | data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, |
| | | im_row, im_col, c_im, pad); |
| | | } |
| | | } |
| | | } |
| | | data_im += im_size; |
| | | data_col+= col_size; |
| | | data_col+= channels_col; |
| | | } |
| | | } |
| | | |
| | |
| | | for(i = 0; i < get_network_output_size(net)*net.batch; ++i){ |
| | | //if(i %get_network_output_size(net) == 0) printf("\n"); |
| | | //printf("%5.2f %5.2f, ", out[i], truth[i]); |
| | | //if(i == get_network_output_size(net)) printf("\n"); |
| | | delta[i] = truth[i] - out[i]; |
| | | //printf("%f, ", delta[i]); |
| | | sum += delta[i]*delta[i]; |
| | | } |
| | | //printf("\n"); |
| | |
| | | } |
| | | float train_network_batch(network net, data d, int n, float step, float momentum,float decay) |
| | | { |
| | | int i; |
| | | int correct = 0; |
| | | int i,j; |
| | | float sum = 0; |
| | | int batch = 2; |
| | | for(i = 0; i < n; ++i){ |
| | | for(j = 0; j < batch; ++j){ |
| | | int index = rand()%d.X.rows; |
| | | float *x = d.X.vals[index]; |
| | | float *y = d.y.vals[index]; |
| | | forward_network(net, x, 1); |
| | | int class = get_predicted_class_network(net); |
| | | backward_network(net, x, y); |
| | | correct += (y[class]?1:0); |
| | | sum += backward_network(net, x, y); |
| | | } |
| | | update_network(net, step, momentum, decay); |
| | | return (float)correct/n; |
| | | |
| | | } |
| | | return (float)sum/(n*batch); |
| | | } |
| | | |
| | | |