Added BFLOPs output for network configurations
| | |
| | | l.workspace_size = get_workspace_size(l); |
| | | l.activation = activation; |
| | | |
| | | fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); |
| | | //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); |
| | | l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; |
| | | fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); |
| | | |
| | | return l; |
| | | } |
| | |
| | | int tanh; |
| | | int *mask; |
| | | int total; |
| | | float bflops; |
| | | |
| | | int adam; |
| | | float B1; |
| | |
| | | l.output_gpu = cuda_make_array(l.output, output_size); |
| | | l.delta_gpu = cuda_make_array(l.delta, output_size); |
| | | #endif |
| | | fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); |
| | | l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; |
| | | fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); |
| | | return l; |
| | | } |
| | | |
| | |
| | | params.time_steps = net.time_steps; |
| | | params.net = net; |
| | | |
| | | float bflops = 0; |
| | | size_t workspace_size = 0; |
| | | n = n->next; |
| | | int count = 0; |
| | |
| | | fprintf(stderr, "layer filters size input output\n"); |
| | | while(n){ |
| | | params.index = count; |
| | | fprintf(stderr, "%5d ", count); |
| | | fprintf(stderr, "%4d ", count); |
| | | s = (section *)n->val; |
| | | options = s->options; |
| | | layer l = {0}; |
| | |
| | | params.c = l.out_c; |
| | | params.inputs = l.outputs; |
| | | } |
| | | if (l.bflops > 0) bflops += l.bflops; |
| | | } |
| | | free_list(sections); |
| | | net.outputs = get_network_output_size(net); |
| | | net.output = get_network_output(net); |
| | | printf("Total BFLOPS %5.3f \n", bflops); |
| | | if(workspace_size){ |
| | | //printf("%ld\n", workspace_size); |
| | | #ifdef GPU |