src/softmax_layer_kernels.cu
@@ -1,11 +1,13 @@ #include "cuda_runtime.h" #include "curand.h" #include "cublas_v2.h" extern "C" { #include "softmax_layer.h" #include "cuda.h" #include "blas.h" } #define BLOCK 256 __global__ void forward_softmax_layer_kernel(int n, int batch, float *input, float *output) { int b = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;