~speedprog/mtg/mtg_card_detector.git

			@@ -65,7 +65,7 @@

			// float scale = 1./sqrt(sizesizec);
			float scale = sqrt(2./(sizesizec));
			for(i = 0; i < cnsizesize; ++i) l.filters[i] = 2scale*rand_uniform() - scale;
			for(i = 0; i < cnsizesize; ++i) l.filters[i] = scalerand_uniform(-1, 1);
			int out_h = convolutional_out_height(l);
			int out_w = convolutional_out_width(l);
			l.out_h = out_h;
			@@ -86,9 +86,8 @@
			}

			l.mean = calloc(n, sizeof(float));
			l.spatial_mean = calloc(n*l.batch, sizeof(float));

			l.variance = calloc(n, sizeof(float));

			l.rolling_mean = calloc(n, sizeof(float));
			l.rolling_variance = calloc(n, sizeof(float));
			}
			@@ -114,12 +113,6 @@
			l.rolling_mean_gpu = cuda_make_array(l.mean, n);
			l.rolling_variance_gpu = cuda_make_array(l.variance, n);

			l.spatial_mean_gpu = cuda_make_array(l.spatial_mean, n*l.batch);
			l.spatial_variance_gpu = cuda_make_array(l.spatial_mean, n*l.batch);

			l.spatial_mean_delta_gpu = cuda_make_array(l.spatial_mean, n*l.batch);
			l.spatial_variance_delta_gpu = cuda_make_array(l.spatial_mean, n*l.batch);

			l.mean_delta_gpu = cuda_make_array(l.mean, n);
			l.variance_delta_gpu = cuda_make_array(l.variance, n);

			@@ -201,13 +194,25 @@
			#endif
			}

			void bias_output(float output, float biases, int batch, int n, int size)
			void add_bias(float output, float biases, int batch, int n, int size)
			{
			int i,j,b;
			for(b = 0; b < batch; ++b){
			for(i = 0; i < n; ++i){
			for(j = 0; j < size; ++j){
			output[(bn + i)size + j] = biases[i];
			output[(bn + i)size + j] += biases[i];
			}
			}
			}
			}

			void scale_bias(float output, float scales, int batch, int n, int size)
			{
			int i,j,b;
			for(b = 0; b < batch; ++b){
			for(i = 0; i < n; ++i){
			for(j = 0; j < size; ++j){
			output[(bn + i)size + j] *= scales[i];
			}
			}
			}
			@@ -229,7 +234,7 @@
			int out_w = convolutional_out_width(l);
			int i;

			bias_output(l.output, l.biases, l.batch, l.n, out_h*out_w);
			fill_cpu(l.outputs*l.batch, 0, l.output, 1);

			int m = l.n;
			int k = l.sizel.sizel.c;
			@@ -248,10 +253,16 @@
			}

			if(l.batch_normalize){
			mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean);
			variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance);
			normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w);
			if(state.train){
			mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean);
			variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance);
			normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w);
			} else {
			normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.n, l.out_h*l.out_w);
			}
			scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
			}
			add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

			activate_array(l.output, mnl.batch, l.activation);
			}