~speedprog/mtg/mtg_card_detector.git

			@@ -44,7 +44,7 @@
			}
			mean = mean / size;
			for(i = 0; i < size; ++i){
			binary[fsize + i] = (weights[fsize + i] > 0) ? mean : -mean;
			binary[fsize + i] = (weights[fsize + i] > 0) ? mean: -mean;
			}
			}
			}
			@@ -688,6 +688,7 @@
			// t_input = calloc(t_intput_size, sizeof(float));
			// im2col_cpu_custom_transpose(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, t_input, new_ldb);
			//}
			//if (l.xnor && l.size == 3 && l.stride == 1 && l.pad == 1) {}
			//else
			im2col_cpu_custom(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b);

			@@ -772,17 +773,25 @@
			*/

			/*
			if (l.size == 3 && l.stride == 1 && l.pad == 1) {
			if (l.size == 3 && l.stride == 1 && l.pad == 1)
			{
			//binarize_weights(l.weights, l.n, l.cl.sizel.size, l.binary_weights);
			//printf("\n mean = %f \n", l.mean_arr[0]);

			convolution_2d(l.w, l.h, l.size, l.n, l.c, l.pad, l.stride,
			l.weights, state.input, l.output);
			//l.weights, state.input, l.output, l.mean_arr);
			l.binary_weights, state.input, l.output, l.mean_arr);
			}
			else {
			*/
			*/

			//size_t ldb_align = 256; // 256 bit for AVX2
			int ldb_align = l.lda_align;
			size_t new_ldb = k + (ldb_align - k%ldb_align);
			char *t_bit_input = NULL;
			size_t t_intput_size = binary_transpose_align_input(k, n, b, &t_bit_input, ldb_align);
			//char t_bit_input = calloc(new_ldb n, sizeof(char)); // for im2col_cpu_custom_transpose() only
			//float_to_bit(t_input, t_bit_input, new_ldb * n); // for im2col_cpu_custom_transpose() only

			gemm_nn_custom_bin_mean_transposed(m, n, k, 1, l.align_bit_weights, new_ldb, t_bit_input, new_ldb, c, n, l.mean_arr);