Fixed batch stuff in conv layer
| | |
| | | |
| | | image edge = make_image((dog.h-size)/stride+1, (dog.w-size)/stride+1, n); |
| | | |
| | | |
| | | int i; |
| | | clock_t start = clock(), end; |
| | | for(i = 0; i < 1000; ++i){ |
| | | im2col_cpu(dog.data, 1, dog.c, dog.h, dog.w, size, stride, 0, matrix); |
| | | im2col_cpu(dog.data, dog.c, dog.h, dog.w, size, stride, 0, matrix); |
| | | gemm(0,0,n,mw,mh,1,filters,mh,matrix,mw,1,edge.data,mw); |
| | | } |
| | | end = clock(); |
| | |
| | | clock_t start = clock(), end; |
| | | float loss = train_network_sgd(net, train, iters, lr, momentum, decay); |
| | | end = clock(); |
| | | //float test_acc = network_accuracy(net, test); |
| | | float test_acc = 0; |
| | | float test_acc = network_accuracy(net, test); |
| | | //float test_acc = 0; |
| | | printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay); |
| | | |
| | | //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay); |
| | |
| | | float *matrix = calloc(msize, sizeof(float)); |
| | | int i; |
| | | for(i = 0; i < 1000; ++i){ |
| | | im2col_cpu(test.data, 1, c, h, w, size, stride, 0, matrix); |
| | | im2col_cpu(test.data, c, h, w, size, stride, 0, matrix); |
| | | //image render = float_to_image(mh, mw, mc, matrix); |
| | | } |
| | | } |
| | |
| | | } |
| | | //This one might be too, can't remember. |
| | | void col2im_cpu(float* data_col, |
| | | const int batch, const int channels, const int height, const int width, |
| | | const int channels, const int height, const int width, |
| | | const int ksize, const int stride, int pad, float* data_im) |
| | | { |
| | | int c,h,w,b; |
| | | int c,h,w; |
| | | int height_col = (height - ksize) / stride + 1; |
| | | int width_col = (width - ksize) / stride + 1; |
| | | if (pad){ |
| | |
| | | pad = ksize/2; |
| | | } |
| | | int channels_col = channels * ksize * ksize; |
| | | int im_size = height*width*channels; |
| | | int col_size = height_col*width_col*channels_col; |
| | | for (b = 0; b < batch; ++b) { |
| | | for (c = 0; c < channels_col; ++c) { |
| | | int w_offset = c % ksize; |
| | | int h_offset = (c / ksize) % ksize; |
| | | int c_im = c / ksize / ksize; |
| | | for (h = 0; h < height_col; ++h) { |
| | | for (w = 0; w < width_col; ++w) { |
| | | int im_row = h_offset + h * stride; |
| | | int im_col = w_offset + w * stride; |
| | | double val = data_col[(c * height_col + h) * width_col + w]; |
| | | col2im_set_pixel(data_im, height, width, channels, |
| | | im_row, im_col, c_im, pad, val); |
| | | } |
| | | for (c = 0; c < channels_col; ++c) { |
| | | int w_offset = c % ksize; |
| | | int h_offset = (c / ksize) % ksize; |
| | | int c_im = c / ksize / ksize; |
| | | for (h = 0; h < height_col; ++h) { |
| | | for (w = 0; w < width_col; ++w) { |
| | | int im_row = h_offset + h * stride; |
| | | int im_col = w_offset + w * stride; |
| | | double val = data_col[(c * height_col + h) * width_col + w]; |
| | | col2im_set_pixel(data_im, height, width, channels, |
| | | im_row, im_col, c_im, pad, val); |
| | | } |
| | | } |
| | | data_im += im_size; |
| | | data_col+= col_size; |
| | | } |
| | | } |
| | | |
| | |
| | | layer->bias_updates_cl = cl_make_array(layer->bias_updates, n); |
| | | layer->bias_momentum_cl = cl_make_array(layer->bias_momentum, n); |
| | | |
| | | layer->col_image_cl = cl_make_array(layer->col_image, layer->batch*out_h*out_w*size*size*c); |
| | | layer->col_image_cl = cl_make_array(layer->col_image, layer.batch*out_h*out_w*size*size*c); |
| | | layer->delta_cl = cl_make_array(layer->delta, layer->batch*out_h*out_w*n); |
| | | layer->output_cl = cl_make_array(layer->output, layer->batch*out_h*out_w*n); |
| | | #endif |
| | |
| | | { |
| | | int out_h = convolutional_out_height(layer); |
| | | int out_w = convolutional_out_width(layer); |
| | | int i; |
| | | |
| | | bias_output(layer); |
| | | |
| | | int m = layer.n; |
| | | int k = layer.size*layer.size*layer.c; |
| | | int n = out_h*out_w*layer.batch; |
| | | int n = out_h*out_w; |
| | | |
| | | float *a = layer.filters; |
| | | float *b = layer.col_image; |
| | | float *c = layer.output; |
| | | im2col_cpu(in, layer.batch, layer.c, layer.h, layer.w, |
| | | layer.size, layer.stride, layer.pad, b); |
| | | bias_output(layer); |
| | | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | im2col_cpu(in, layer.c, layer.h, layer.w, |
| | | layer.size, layer.stride, layer.pad, b); |
| | | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); |
| | | c += n*m; |
| | | in += layer.h*layer.w*layer.c; |
| | | b += k*n; |
| | | } |
| | | /* |
| | | int i; |
| | | for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]); |
| | | printf("\n"); |
| | | */ |
| | | activate_array(layer.output, m*n, layer.activation, 0.); |
| | | activate_array(layer.output, m*n*layer.batch, layer.activation, 0.); |
| | | } |
| | | |
| | | #ifdef GPU |
| | |
| | | |
| | | void backward_convolutional_layer(convolutional_layer layer, float *delta) |
| | | { |
| | | int i; |
| | | int m = layer.n; |
| | | int n = layer.size*layer.size*layer.c; |
| | | int k = convolutional_out_height(layer)* |
| | | convolutional_out_width(layer)* |
| | | layer.batch; |
| | | gradient_array(layer.output, m*k, layer.activation, layer.delta); |
| | | convolutional_out_width(layer); |
| | | gradient_array(layer.output, m*k*layer.batch, layer.activation, layer.delta); |
| | | learn_bias_convolutional_layer(layer); |
| | | |
| | | float *a = layer.delta; |
| | | float *b = layer.col_image; |
| | | float *c = layer.filter_updates; |
| | | |
| | | gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); |
| | | a += m*k; |
| | | b += k*n; |
| | | } |
| | | |
| | | if(delta){ |
| | | m = layer.size*layer.size*layer.c; |
| | | k = layer.n; |
| | | n = convolutional_out_height(layer)* |
| | | convolutional_out_width(layer)* |
| | | layer.batch; |
| | | convolutional_out_width(layer); |
| | | |
| | | a = layer.filters; |
| | | b = layer.delta; |
| | | c = layer.col_image; |
| | | |
| | | gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); |
| | | |
| | | memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | col2im_cpu(c, layer.batch, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); |
| | | col2im_cpu(c, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta); |
| | | c += k*n; |
| | | delta += layer.h*layer.w*layer.c; |
| | | } |
| | | } |
| | | } |
| | | |
| | |
| | | |
| | | //From Berkeley Vision's Caffe! |
| | | //https://github.com/BVLC/caffe/blob/master/LICENSE |
| | | void im2col_cpu(float* data_im, |
| | | void im2col_cpu_batch(float* data_im, |
| | | const int batch, const int channels, const int height, const int width, |
| | | const int ksize, const int stride, int pad, float* data_col) |
| | | { |
| | |
| | | } |
| | | } |
| | | |
| | | //From Berkeley Vision's Caffe! |
| | | //https://github.com/BVLC/caffe/blob/master/LICENSE |
| | | void im2col_cpu(float* data_im, |
| | | const int channels, const int height, const int width, |
| | | const int ksize, const int stride, int pad, float* data_col) |
| | | { |
| | | int c,h,w; |
| | | int height_col = (height - ksize) / stride + 1; |
| | | int width_col = (width - ksize) / stride + 1; |
| | | if (pad){ |
| | | height_col = 1 + (height-1) / stride; |
| | | width_col = 1 + (width-1) / stride; |
| | | pad = ksize/2; |
| | | } |
| | | int channels_col = channels * ksize * ksize; |
| | | for (c = 0; c < channels_col; ++c) { |
| | | int w_offset = c % ksize; |
| | | int h_offset = (c / ksize) % ksize; |
| | | int c_im = c / ksize / ksize; |
| | | for (h = 0; h < height_col; ++h) { |
| | | for (w = 0; w < width_col; ++w) { |
| | | int im_row = h_offset + h * stride; |
| | | int im_col = w_offset + w * stride; |
| | | int col_index = (c * height_col + h) * width_col + w; |
| | | data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, |
| | | im_row, im_col, c_im, pad); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | |
| | | #ifdef GPU |
| | | |
| | |
| | | #endif |
| | | |
| | | void im2col_cpu(float* data_im, |
| | | const int batch, const int channels, const int height, const int width, |
| | | const int channels, const int height, const int width, |
| | | const int ksize, const int stride, int pad, float* data_col); |
| | | |
| | | void col2im_cpu(float* data_col, |
| | | const int batch, const int channels, const int height, const int width, |
| | | const int channels, const int height, const int width, |
| | | const int ksize, const int stride, int pad, float* data_im); |
| | | void test_blas(); |
| | | |
| | |
| | | //printf("%5.2f %5.2f, ", out[i], truth[i]); |
| | | //if(i == get_network_output_size(net)) printf("\n"); |
| | | delta[i] = truth[i] - out[i]; |
| | | //printf("%f, ", delta[i]); |
| | | //printf("%.10f, ", out[i]); |
| | | sum += delta[i]*delta[i]; |
| | | } |
| | | //printf("\n"); |