~speedprog/mtg/mtg_card_detector.git

			@@ -1,11 +1,13 @@
			#include "cuda_runtime.h"
			#include "curand.h"
			#include "cublas_v2.h"

			extern "C" {
			#include "softmax_layer.h"
			#include "cuda.h"
			#include "blas.h"
			}

			#define BLOCK 256

			__global__ void forward_softmax_layer_kernel(int n, int batch, float input, float output)
			{
			int b = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			@@ -42,7 +44,7 @@

			extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, network_state state)
			{
			copy_ongpu(layer.batch*layer.inputs, layer.delta_gpu, 1, state.delta, 1);
			axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1);
			}

			/* This is if you want softmax w/o log-loss classification. You probably don't.