| | |
| | | #include "blas.h" |
| | | } |
| | | |
| | | #define BLOCK 256 |
| | | |
| | | __global__ void forward_softmax_layer_kernel(int n, int batch, float *input, float *output) |
| | | { |
| | | int b = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; |
| | |
| | | |
| | | extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) |
| | | { |
| | | copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1); |
| | | axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1); |
| | | } |
| | | |
| | | /* This is if you want softmax w/o log-loss classification. You probably don't. |