| | |
| | | cuda_free(l->output_gpu); |
| | | cuda_free(l->delta_gpu); |
| | | l->indexes_gpu = cuda_make_int_array(output_size); |
| | | l->output_gpu = cuda_make_array(0, output_size); |
| | | l->delta_gpu = cuda_make_array(0, output_size); |
| | | l->output_gpu = cuda_make_array(l->output, output_size); |
| | | l->delta_gpu = cuda_make_array(l->delta, output_size); |
| | | #endif |
| | | } |
| | | |
| | |
| | | int h = (l.h-1)/l.stride + 1; |
| | | int w = (l.w-1)/l.stride + 1; |
| | | int c = l.c; |
| | | memset(state.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float)); |
| | | for(i = 0; i < h*w*c*l.batch; ++i){ |
| | | int index = l.indexes[i]; |
| | | state.delta[index] += l.delta[i]; |