| | |
| | | fprintf(stderr, "\n"); |
| | | l.outputs = outputs; |
| | | l.inputs = outputs; |
| | | l.delta = calloc(outputs*batch, sizeof(float)); |
| | | l.delta = calloc(outputs*batch, sizeof(float)); |
| | | l.output = calloc(outputs*batch, sizeof(float));; |
| | | #ifdef GPU |
| | | l.delta_gpu = cuda_make_array(0, outputs*batch); |
| | | l.output_gpu = cuda_make_array(0, outputs*batch); |
| | | l.delta_gpu = cuda_make_array(l.delta, outputs*batch); |
| | | l.output_gpu = cuda_make_array(l.output, outputs*batch); |
| | | #endif |
| | | return l; |
| | | } |
| | |
| | | float *delta = net.layers[index].delta; |
| | | int input_size = l.input_sizes[i]; |
| | | for(j = 0; j < l.batch; ++j){ |
| | | copy_cpu(input_size, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); |
| | | axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); |
| | | } |
| | | offset += input_size; |
| | | } |
| | |
| | | float *delta = net.layers[index].delta_gpu; |
| | | int input_size = l.input_sizes[i]; |
| | | for(j = 0; j < l.batch; ++j){ |
| | | copy_ongpu(input_size, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); |
| | | axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); |
| | | } |
| | | offset += input_size; |
| | | } |