~speedprog/mtg/mtg_card_detector.git

			@@ -47,23 +47,31 @@
			l.outputs = l.out_h * l.out_w * l.out_c;
			l.inputs = l.w * l.h * l.c;

			l.filters = calloc(cnsizesizelocations, sizeof(float));
			l.filter_updates = calloc(cnsizesizelocations, sizeof(float));
			l.weights = calloc(cnsizesizelocations, sizeof(float));
			l.weight_updates = calloc(cnsizesizelocations, sizeof(float));

			l.biases = calloc(l.outputs, sizeof(float));
			l.bias_updates = calloc(l.outputs, sizeof(float));

			// float scale = 1./sqrt(sizesizec);
			float scale = sqrt(2./(sizesizec));
			for(i = 0; i < cnsizesize; ++i) l.filters[i] = scalerand_uniform(-1,1);
			for(i = 0; i < cnsizesize; ++i) l.weights[i] = scalerand_uniform(-1,1);

			l.col_image = calloc(out_hout_wsizesizec, sizeof(float));
			l.output = calloc(l.batchout_h out_w * n, sizeof(float));
			l.delta = calloc(l.batchout_h out_w * n, sizeof(float));

			l.forward = forward_local_layer;
			l.backward = backward_local_layer;
			l.update = update_local_layer;

			#ifdef GPU
			l.filters_gpu = cuda_make_array(l.filters, cnsizesizelocations);
			l.filter_updates_gpu = cuda_make_array(l.filter_updates, cnsizesizelocations);
			l.forward_gpu = forward_local_layer_gpu;
			l.backward_gpu = backward_local_layer_gpu;
			l.update_gpu = update_local_layer_gpu;

			l.weights_gpu = cuda_make_array(l.weights, cnsizesizelocations);
			l.weight_updates_gpu = cuda_make_array(l.weight_updates, cnsizesizelocations);

			l.biases_gpu = cuda_make_array(l.biases, l.outputs);
			l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs);
			@@ -97,7 +105,7 @@
			l.size, l.stride, l.pad, l.col_image);
			float output = l.output + il.outputs;
			for(j = 0; j < locations; ++j){
			float a = l.filters + jl.sizel.sizel.c*l.n;
			float a = l.weights + jl.sizel.sizel.c*l.n;
			float *b = l.col_image + j;
			float *c = output + j;

			@@ -130,7 +138,7 @@
			for(j = 0; j < locations; ++j){
			float a = l.delta + il.outputs + j;
			float *b = l.col_image + j;
			float c = l.filter_updates + jl.sizel.sizel.c*l.n;
			float c = l.weight_updates + jl.sizel.sizel.c*l.n;
			int m = l.n;
			int n = l.sizel.sizel.c;
			int k = 1;
			@@ -140,7 +148,7 @@

			if(state.delta){
			for(j = 0; j < locations; ++j){
			float a = l.filters + jl.sizel.sizel.c*l.n;
			float a = l.weights + jl.sizel.sizel.c*l.n;
			float b = l.delta + il.outputs + j;
			float *c = l.col_image + j;

			@@ -163,9 +171,9 @@
			axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
			scal_cpu(l.outputs, momentum, l.bias_updates, 1);

			axpy_cpu(size, -decay*batch, l.filters, 1, l.filter_updates, 1);
			axpy_cpu(size, learning_rate/batch, l.filter_updates, 1, l.filters, 1);
			scal_cpu(size, momentum, l.filter_updates, 1);
			axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
			axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
			scal_cpu(size, momentum, l.weight_updates, 1);
			}

			#ifdef GPU
			@@ -187,7 +195,7 @@
			l.size, l.stride, l.pad, l.col_image_gpu);
			float output = l.output_gpu + il.outputs;
			for(j = 0; j < locations; ++j){
			float a = l.filters_gpu + jl.sizel.sizel.c*l.n;
			float a = l.weights_gpu + jl.sizel.sizel.c*l.n;
			float *b = l.col_image_gpu + j;
			float *c = output + j;

			@@ -219,7 +227,7 @@
			for(j = 0; j < locations; ++j){
			float a = l.delta_gpu + il.outputs + j;
			float *b = l.col_image_gpu + j;
			float c = l.filter_updates_gpu + jl.sizel.sizel.c*l.n;
			float c = l.weight_updates_gpu + jl.sizel.sizel.c*l.n;
			int m = l.n;
			int n = l.sizel.sizel.c;
			int k = 1;
			@@ -229,7 +237,7 @@

			if(state.delta){
			for(j = 0; j < locations; ++j){
			float a = l.filters_gpu + jl.sizel.sizel.c*l.n;
			float a = l.weights_gpu + jl.sizel.sizel.c*l.n;
			float b = l.delta_gpu + il.outputs + j;
			float *c = l.col_image_gpu + j;

			@@ -252,16 +260,16 @@
			axpy_ongpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
			scal_ongpu(l.outputs, momentum, l.bias_updates_gpu, 1);

			axpy_ongpu(size, -decay*batch, l.filters_gpu, 1, l.filter_updates_gpu, 1);
			axpy_ongpu(size, learning_rate/batch, l.filter_updates_gpu, 1, l.filters_gpu, 1);
			scal_ongpu(size, momentum, l.filter_updates_gpu, 1);
			axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
			axpy_ongpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
			scal_ongpu(size, momentum, l.weight_updates_gpu, 1);
			}

			void pull_local_layer(local_layer l)
			{
			int locations = l.out_w*l.out_h;
			int size = l.sizel.sizel.cl.nlocations;
			cuda_pull_array(l.filters_gpu, l.filters, size);
			cuda_pull_array(l.weights_gpu, l.weights, size);
			cuda_pull_array(l.biases_gpu, l.biases, l.outputs);
			}

			@@ -269,7 +277,7 @@
			{
			int locations = l.out_w*l.out_h;
			int size = l.sizel.sizel.cl.nlocations;
			cuda_push_array(l.filters_gpu, l.filters, size);
			cuda_push_array(l.weights_gpu, l.weights, size);
			cuda_push_array(l.biases_gpu, l.biases, l.outputs);
			}
			#endif