~speedprog/mtg/mtg_card_detector.git

parent: 0d6bb5d4 | patch | commit | ignore whitespace

Joseph Redmon

2013-12-05 b715671988a4f3e476586df52fa3bf052cce7f80

Works well on MNIST

12 files modified

	.gitignore	2 ●●●●● patch \| view \| raw \| blame \| history
	nist.cfg	10 ●●●●● patch \| view \| raw \| blame \| history
	src/activations.c	39 ●●●●● patch \| view \| raw \| blame \| history
	src/activations.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/connected_layer.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_layer.c	19 ●●●●● patch \| view \| raw \| blame \| history
	src/maxpool_layer.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	8 ●●●●● patch \| view \| raw \| blame \| history
	src/softmax_layer.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/tests.c	86 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.c	29 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.h	3 ●●●●● patch \| view \| raw \| blame \| history

 .gitignore

@@ -1,6 +1,8 @@
*.o
*.dSYM
*.csv
*.out
mnist/
images/
opencv/
convnet/

 nist.cfg

@@ -2,7 +2,7 @@
width=28
height=28
channels=1
filters=4
filters=6
size=5
stride=1
activation=ramp
@@ -11,7 +11,7 @@
stride=2

[conv]
filters=12
filters=16
size=5
stride=1
activation=ramp
@@ -20,7 +20,7 @@
stride=2

[conv]
filters=10
filters=120
size=3
stride=1
activation=ramp
@@ -29,6 +29,10 @@
stride=2

[conn]
output = 80
activation=ramp

[conn]
output = 10
activation=ramp


 src/activations.c

@@ -8,15 +8,16 @@
{
    if (strcmp(s, "sigmoid")==0) return SIGMOID;
    if (strcmp(s, "relu")==0) return RELU;
    if (strcmp(s, "identity")==0) return IDENTITY;
    if (strcmp(s, "linear")==0) return LINEAR;
    if (strcmp(s, "ramp")==0) return RAMP;
    if (strcmp(s, "tanh")==0) return TANH;
    fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
    return RELU;
}

double activate(double x, ACTIVATION a){
    switch(a){
        case IDENTITY:
        case LINEAR:
            return x;
        case SIGMOID:
            return 1./(1.+exp(-x));
@@ -24,12 +25,14 @@
            return x*(x>0);
        case RAMP:
            return x*(x>0) + .1*x;
        case TANH:
            return (exp(2*x)-1)/(exp(2*x)+1);
    }
    return 0;
}
double gradient(double x, ACTIVATION a){
    switch(a){
        case IDENTITY:
        case LINEAR:
            return 1;
        case SIGMOID:
            return (1.-x)*x;
@@ -37,35 +40,9 @@
            return (x>0);
        case RAMP:
            return (x>0) + .1;
        case TANH:
            return 1-x*x;
    }
    return 0;
}

double identity_activation(double x)
{
    return x;
}
double identity_gradient(double x)
{
    return 1;
}

double relu_activation(double x)
{
    return x*(x>0);
}
double relu_gradient(double x)
{
    return (x>0);
}

double sigmoid_activation(double x)
{
    return 1./(1.+exp(-x));
}

double sigmoid_gradient(double x)
{
    return x*(1.-x);
}


 src/activations.h

@@ -2,7 +2,7 @@
#define ACTIVATIONS_H

typedef enum{
    SIGMOID, RELU, IDENTITY, RAMP
    SIGMOID, RELU, LINEAR, RAMP, TANH
}ACTIVATION;

ACTIVATION get_activation(char *s);

 src/connected_layer.c

@@ -8,7 +8,7 @@

connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation)
{
    printf("Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
    fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
    int i;
    connected_layer *layer = calloc(1, sizeof(connected_layer));
    layer->inputs = inputs;
@@ -29,7 +29,7 @@
    layer->biases = calloc(outputs, sizeof(double));
    for(i = 0; i < outputs; ++i)
        //layer->biases[i] = rand_normal()*scale + scale;
        layer->biases[i] = 1;
        layer->biases[i] = 0;

    layer->activation = activation;
    return layer;

 src/convolutional_layer.c

@@ -39,7 +39,7 @@
    layer->w = w;
    layer->c = c;
    layer->n = n;
    layer->edge = 0;
    layer->edge = 1;
    layer->stride = stride;
    layer->kernels = calloc(n, sizeof(image));
    layer->kernel_updates = calloc(n, sizeof(image));
@@ -47,10 +47,10 @@
    layer->biases = calloc(n, sizeof(double));
    layer->bias_updates = calloc(n, sizeof(double));
    layer->bias_momentum = calloc(n, sizeof(double));
    double scale = 20./(size*size*c);
    double scale = 2./(size*size);
    for(i = 0; i < n; ++i){
        //layer->biases[i] = rand_normal()*scale + scale;
        layer->biases[i] = 1;
        layer->biases[i] = 0;
        layer->kernels[i] = make_random_kernel(size, c, scale);
        layer->kernel_updates[i] = make_random_kernel(size, c, 0);
        layer->kernel_momentum[i] = make_random_kernel(size, c, 0);
@@ -63,7 +63,7 @@
        out_h = (layer->h - layer->size)/layer->stride+1;
        out_w = (layer->h - layer->size)/layer->stride+1;
    }
    printf("Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
    fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
    layer->output = calloc(out_h * out_w * n, sizeof(double));
    layer->delta  = calloc(out_h * out_w * n, sizeof(double));
    layer->upsampled = make_image(h,w,n);
@@ -124,15 +124,22 @@
    }
}

void learn_convolutional_layer(convolutional_layer layer, double *input)
void gradient_delta_convolutional_layer(convolutional_layer layer)
{
    int i;
    image in_image = double_to_image(layer.h, layer.w, layer.c, input);
    image out_delta = get_convolutional_delta(layer);
    image out_image = get_convolutional_image(layer);
    for(i = 0; i < out_image.h*out_image.w*out_image.c; ++i){
        out_delta.data[i] *= gradient(out_image.data[i], layer.activation);
    }
}

void learn_convolutional_layer(convolutional_layer layer, double *input)
{
    int i;
    image in_image = double_to_image(layer.h, layer.w, layer.c, input);
    image out_delta = get_convolutional_delta(layer);
    gradient_delta_convolutional_layer(layer);
    for(i = 0; i < layer.n; ++i){
        kernel_update(in_image, layer.kernel_updates[i], layer.stride, i, out_delta, layer.edge);
        layer.bias_updates[i] += avg_image_layer(out_delta, i);

 src/maxpool_layer.c

@@ -19,7 +19,7 @@

maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride)
{
    printf("Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride);
    fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride);
    maxpool_layer *layer = calloc(1, sizeof(maxpool_layer));
    layer->h = h;
    layer->w = w;

 src/network.c

@@ -276,10 +276,10 @@
        }
        double mean = mean_array(output, n);
        double vari = variance_array(output, n);
        printf("Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
        fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
        if(n > 100) n = 100;
        for(j = 0; j < n; ++j) printf("%f, ", output[j]);
        if(n == 100)printf(".....\n");
        printf("\n");
        for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]);
        if(n == 100)fprintf(stderr,".....\n");
        fprintf(stderr, "\n");
    }
}

 src/softmax_layer.c

@@ -5,7 +5,7 @@

softmax_layer *make_softmax_layer(int inputs)
{
    printf("Softmax Layer: %d inputs\n", inputs);
    fprintf(stderr, "Softmax Layer: %d inputs\n", inputs);
    softmax_layer *layer = calloc(1, sizeof(softmax_layer));
    layer->inputs = inputs;
    layer->output = calloc(inputs, sizeof(double));

 src/tests.c

@@ -15,7 +15,6 @@
void test_convolve()
{
    image dog = load_image("dog.jpg");
    //show_image_layers(dog, "Dog");
    printf("dog channels %d\n", dog.c);
    image kernel = make_random_image(3,3,dog.c);
    image edge = make_image(dog.h, dog.w, 1);
@@ -88,7 +87,7 @@
    image out_delta = get_convolutional_delta(layer);
    for(i = 0; i < out.h*out.w*out.c; ++i){
        out_delta.data[i] = 1;
        backward_convolutional_layer2(layer, test.data, in_delta.data);
        backward_convolutional_layer(layer, test.data, in_delta.data);
        image partial = copy_image(in_delta);
        jacobian2[i] = partial.data;
        out_delta.data[i] = 0;
@@ -156,7 +155,7 @@
    int count = 0;
        
    double avgerr = 0;
    while(1){
    while(++count < 100000000){
        double v = ((double)rand()/RAND_MAX);
        double truth = v*v;
        input[0] = v;
@@ -165,8 +164,7 @@
        double *delta = get_network_delta(net);
        double err = pow((out[0]-truth),2.);
        avgerr = .99 * avgerr + .01 * err;
        //if(++count % 100000 == 0) printf("%f\n", avgerr);
        if(++count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
        if(count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
        delta[0] = truth - out[0];
        learn_network(net, input);
        update_network(net, .001);
@@ -197,15 +195,16 @@
    }
}

double error_network(network net, matrix m, double *truth)
double error_network(network net, matrix m, double **truth)
{
    int i;
    int correct = 0;
    int k = get_network_output_size(net);
    for(i = 0; i < m.rows; ++i){
        forward_network(net, m.vals[i]);
        double *out = get_network_output(net);
        double err = truth[i] - out[0];
        if(fabs(err) < .5) ++correct;
        int guess = max_index(out, k);
        if(truth[i][guess]) ++correct;
    }
    return (double)correct/m.rows;
}
@@ -224,24 +223,35 @@

void test_nist()
{
    srand(999999);
    network net = parse_network_cfg("nist.cfg");
    matrix m = csv_to_matrix("images/nist_train.csv");
    matrix ho = hold_out_matrix(&m, 3000);
    matrix m = csv_to_matrix("mnist/mnist_train.csv");
    matrix test = csv_to_matrix("mnist/mnist_test.csv");
    double *truth_1d = pop_column(&m, 0);
    double **truth = one_hot(truth_1d, m.rows, 10);
    double *ho_truth_1d = pop_column(&ho, 0);
    double **ho_truth = one_hot(ho_truth_1d, ho.rows, 10);
    double *test_truth_1d = pop_column(&test, 0);
    double **test_truth = one_hot(test_truth_1d, test.rows, 10);
    int i,j;
    clock_t start = clock(), end;
    for(i = 0; i < test.rows; ++i){
        normalize_array(test.vals[i], 28*28);
        //scale_array(m.vals[i], 28*28, 1./255.);
        //translate_array(m.vals[i], 28*28, -.1);
    }
    for(i = 0; i < m.rows; ++i){
        normalize_array(m.vals[i], 28*28);
        //scale_array(m.vals[i], 28*28, 1./255.);
        //translate_array(m.vals[i], 28*28, -.1);
    }
    int count = 0;
    double lr = .0001;
    while(++count <= 3000000){
    double lr = .0005;
    while(++count <= 300){
        //lr *= .99;
        int index = 0;
        int correct = 0;
        for(i = 0; i < 1000; ++i){
        int number = 1000;
        for(i = 0; i < number; ++i){
            index = rand()%m.rows;
            normalize_array(m.vals[index], 28*28);
            forward_network(net, m.vals[index]);
            double *out = get_network_output(net);
            double *delta = get_network_delta(net);
@@ -260,19 +270,29 @@
        }
        print_network(net);
        image input = double_to_image(28,28,1, m.vals[index]);
        show_image(input, "Input");
        //show_image(input, "Input");
        image o = get_network_image(net);
        show_image_collapsed(o, "Output");
        //show_image_collapsed(o, "Output");
        visualize_network(net);
        cvWaitKey(100);
        cvWaitKey(10);
        //double test_acc = error_network(net, m, truth);
        //double valid_acc = error_network(net, ho, ho_truth);
        //printf("%f, %f\n", test_acc, valid_acc);
        fprintf(stderr, "%5d: %f %f\n",count, (double)correct/1000, lr);
        //if(valid_acc > .70) break;
        fprintf(stderr, "\n%5d: %f %f\n\n",count, (double)correct/number, lr);
        if(count % 10 == 0 && 0){
            double train_acc = error_network(net, m, truth);
            fprintf(stderr, "\nTRAIN: %f\n", train_acc);
            double test_acc = error_network(net, test, test_truth);
            fprintf(stderr, "TEST: %f\n\n", test_acc);
            printf("%d, %f, %f\n", count, train_acc, test_acc);
        }
        if(count % (m.rows/number) == 0) lr /= 2; 
    }
            double train_acc = error_network(net, m, truth);
            fprintf(stderr, "\nTRAIN: %f\n", train_acc);
            double test_acc = error_network(net, test, test_truth);
            fprintf(stderr, "TEST: %f\n\n", test_acc);
            printf("%d, %f, %f\n", count, train_acc, test_acc);
    end = clock();
    printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
    //printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
}

void test_kernel_update()
@@ -281,14 +301,14 @@
    double delta[] = {.1};
    double input[] = {.3, .5, .3, .5, .5, .5, .5, .0, .5};
    double kernel[] = {1,2,3,4,5,6,7,8,9};
    convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, IDENTITY);
    convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, LINEAR);
    layer.kernels[0].data = kernel;
    layer.delta = delta;
    learn_convolutional_layer(layer, input);
    print_image(layer.kernels[0]);
    print_image(get_convolutional_delta(layer));
    print_image(layer.kernel_updates[0]);
    

}

void test_random_classify()
@@ -311,15 +331,15 @@
            double *delta = get_network_delta(net);
            //printf("%f\n", out[0]);
            delta[0] = truth[index] - out[0];
           // printf("%f\n", delta[0]);
            // printf("%f\n", delta[0]);
            //printf("%f %f\n", truth[index], out[0]);
            learn_network(net, m.vals[index]);
            update_network(net, .00001);
        }
        double test_acc = error_network(net, m, truth);
        double valid_acc = error_network(net, ho, ho_truth);
        printf("%f, %f\n", test_acc, valid_acc);
        fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
        //double test_acc = error_network(net, m, truth);
        //double valid_acc = error_network(net, ho, ho_truth);
        //printf("%f, %f\n", test_acc, valid_acc);
        //fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
        //if(valid_acc > .70) break;
    }
    end = clock();
@@ -362,8 +382,8 @@
int main()
{
    //test_kernel_update();
    //test_nist();
    test_full();
    test_nist();
    //test_full();
    //test_random_preprocess();
    //test_random_classify();
    //test_parser();

 src/utils.c

@@ -180,6 +180,35 @@
    sigma = sqrt(variance_array(a,n));
}

void translate_array(double *a, int n, double s)
{
    int i;
    for(i = 0; i < n; ++i){
        a[i] += s;
    }
}

void scale_array(double *a, int n, double s)
{
    int i;
    for(i = 0; i < n; ++i){
        a[i] *= s;
    }
}
int max_index(double *a, int n)
{
    if(n <= 0) return -1;
    int i, max_i = 0;
    double max = a[0];
    for(i = 1; i < n; ++i){
        if(a[i] > max){
            max = a[i];
            max_i = i;
        }
    }
    return max_i;
}

double rand_normal()
{
    int i;

 src/utils.h

@@ -15,6 +15,9 @@
int count_fields(char *line);
double *parse_fields(char *line, int n);
void normalize_array(double *a, int n);
void scale_array(double *a, int n, double s);
void translate_array(double *a, int n, double s);
int max_index(double *a, int n);
double constrain(double a, double max);
double rand_normal();
double mean_array(double *a, int n);

			@@ -1,6 +1,8 @@
			*.o
			*.dSYM
			*.csv
			*.out
			mnist/
			images/
			opencv/
			convnet/

			@@ -2,7 +2,7 @@
			width=28
			height=28
			channels=1
			filters=4
			filters=6
			size=5
			stride=1
			activation=ramp
			@@ -11,7 +11,7 @@
			stride=2

			[conv]
			filters=12
			filters=16
			size=5
			stride=1
			activation=ramp
			@@ -20,7 +20,7 @@
			stride=2

			[conv]
			filters=10
			filters=120
			size=3
			stride=1
			activation=ramp
			@@ -29,6 +29,10 @@
			stride=2

			[conn]
			output = 80
			activation=ramp

			[conn]
			output = 10
			activation=ramp

			@@ -8,15 +8,16 @@
			{
			if (strcmp(s, "sigmoid")==0) return SIGMOID;
			if (strcmp(s, "relu")==0) return RELU;
			if (strcmp(s, "identity")==0) return IDENTITY;
			if (strcmp(s, "linear")==0) return LINEAR;
			if (strcmp(s, "ramp")==0) return RAMP;
			if (strcmp(s, "tanh")==0) return TANH;
			fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
			return RELU;
			}

			double activate(double x, ACTIVATION a){
			switch(a){
			case IDENTITY:
			case LINEAR:
			return x;
			case SIGMOID:
			return 1./(1.+exp(-x));
			@@ -24,12 +25,14 @@
			return x*(x>0);
			case RAMP:
			return x(x>0) + .1x;
			case TANH:
			return (exp(2x)-1)/(exp(2x)+1);
			}
			return 0;
			}
			double gradient(double x, ACTIVATION a){
			switch(a){
			case IDENTITY:
			case LINEAR:
			return 1;
			case SIGMOID:
			return (1.-x)*x;
			@@ -37,35 +40,9 @@
			return (x>0);
			case RAMP:
			return (x>0) + .1;
			case TANH:
			return 1-x*x;
			}
			return 0;
			}

			double identity_activation(double x)
			{
			return x;
			}
			double identity_gradient(double x)
			{
			return 1;
			}

			double relu_activation(double x)
			{
			return x*(x>0);
			}
			double relu_gradient(double x)
			{
			return (x>0);
			}

			double sigmoid_activation(double x)
			{
			return 1./(1.+exp(-x));
			}

			double sigmoid_gradient(double x)
			{
			return x*(1.-x);
			}

			@@ -2,7 +2,7 @@
			#define ACTIVATIONS_H

			typedef enum{
			SIGMOID, RELU, IDENTITY, RAMP
			SIGMOID, RELU, LINEAR, RAMP, TANH
			}ACTIVATION;

			ACTIVATION get_activation(char *s);

			@@ -8,7 +8,7 @@

			connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation)
			{
			printf("Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
			fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
			int i;
			connected_layer *layer = calloc(1, sizeof(connected_layer));
			layer->inputs = inputs;
			@@ -29,7 +29,7 @@
			layer->biases = calloc(outputs, sizeof(double));
			for(i = 0; i < outputs; ++i)
			//layer->biases[i] = rand_normal()*scale + scale;
			layer->biases[i] = 1;
			layer->biases[i] = 0;

			layer->activation = activation;
			return layer;

			@@ -39,7 +39,7 @@
			layer->w = w;
			layer->c = c;
			layer->n = n;
			layer->edge = 0;
			layer->edge = 1;
			layer->stride = stride;
			layer->kernels = calloc(n, sizeof(image));
			layer->kernel_updates = calloc(n, sizeof(image));
			@@ -47,10 +47,10 @@
			layer->biases = calloc(n, sizeof(double));
			layer->bias_updates = calloc(n, sizeof(double));
			layer->bias_momentum = calloc(n, sizeof(double));
			double scale = 20./(sizesizec);
			double scale = 2./(size*size);
			for(i = 0; i < n; ++i){
			//layer->biases[i] = rand_normal()*scale + scale;
			layer->biases[i] = 1;
			layer->biases[i] = 0;
			layer->kernels[i] = make_random_kernel(size, c, scale);
			layer->kernel_updates[i] = make_random_kernel(size, c, 0);
			layer->kernel_momentum[i] = make_random_kernel(size, c, 0);
			@@ -63,7 +63,7 @@
			out_h = (layer->h - layer->size)/layer->stride+1;
			out_w = (layer->h - layer->size)/layer->stride+1;
			}
			printf("Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
			fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
			layer->output = calloc(out_h * out_w * n, sizeof(double));
			layer->delta = calloc(out_h * out_w * n, sizeof(double));
			layer->upsampled = make_image(h,w,n);
			@@ -124,15 +124,22 @@
			}
			}

			void learn_convolutional_layer(convolutional_layer layer, double *input)
			void gradient_delta_convolutional_layer(convolutional_layer layer)
			{
			int i;
			image in_image = double_to_image(layer.h, layer.w, layer.c, input);
			image out_delta = get_convolutional_delta(layer);
			image out_image = get_convolutional_image(layer);
			for(i = 0; i < out_image.hout_image.wout_image.c; ++i){
			out_delta.data[i] *= gradient(out_image.data[i], layer.activation);
			}
			}

			void learn_convolutional_layer(convolutional_layer layer, double *input)
			{
			int i;
			image in_image = double_to_image(layer.h, layer.w, layer.c, input);
			image out_delta = get_convolutional_delta(layer);
			gradient_delta_convolutional_layer(layer);
			for(i = 0; i < layer.n; ++i){
			kernel_update(in_image, layer.kernel_updates[i], layer.stride, i, out_delta, layer.edge);
			layer.bias_updates[i] += avg_image_layer(out_delta, i);

			@@ -19,7 +19,7 @@

			maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride)
			{
			printf("Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride);
			fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride);
			maxpool_layer *layer = calloc(1, sizeof(maxpool_layer));
			layer->h = h;
			layer->w = w;

			@@ -276,10 +276,10 @@
			}
			double mean = mean_array(output, n);
			double vari = variance_array(output, n);
			printf("Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
			fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
			if(n > 100) n = 100;
			for(j = 0; j < n; ++j) printf("%f, ", output[j]);
			if(n == 100)printf(".....\n");
			printf("\n");
			for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]);
			if(n == 100)fprintf(stderr,".....\n");
			fprintf(stderr, "\n");
			}
			}

			@@ -5,7 +5,7 @@

			softmax_layer *make_softmax_layer(int inputs)
			{
			printf("Softmax Layer: %d inputs\n", inputs);
			fprintf(stderr, "Softmax Layer: %d inputs\n", inputs);
			softmax_layer *layer = calloc(1, sizeof(softmax_layer));
			layer->inputs = inputs;
			layer->output = calloc(inputs, sizeof(double));

			@@ -15,7 +15,6 @@
			void test_convolve()
			{
			image dog = load_image("dog.jpg");
			//show_image_layers(dog, "Dog");
			printf("dog channels %d\n", dog.c);
			image kernel = make_random_image(3,3,dog.c);
			image edge = make_image(dog.h, dog.w, 1);
			@@ -88,7 +87,7 @@
			image out_delta = get_convolutional_delta(layer);
			for(i = 0; i < out.hout.wout.c; ++i){
			out_delta.data[i] = 1;
			backward_convolutional_layer2(layer, test.data, in_delta.data);
			backward_convolutional_layer(layer, test.data, in_delta.data);
			image partial = copy_image(in_delta);
			jacobian2[i] = partial.data;
			out_delta.data[i] = 0;
			@@ -156,7 +155,7 @@
			int count = 0;

			double avgerr = 0;
			while(1){
			while(++count < 100000000){
			double v = ((double)rand()/RAND_MAX);
			double truth = v*v;
			input[0] = v;
			@@ -165,8 +164,7 @@
			double *delta = get_network_delta(net);
			double err = pow((out[0]-truth),2.);
			avgerr = .99 * avgerr + .01 * err;
			//if(++count % 100000 == 0) printf("%f\n", avgerr);
			if(++count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
			if(count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
			delta[0] = truth - out[0];
			learn_network(net, input);
			update_network(net, .001);
			@@ -197,15 +195,16 @@
			}
			}

			double error_network(network net, matrix m, double *truth)
			double error_network(network net, matrix m, double **truth)
			{
			int i;
			int correct = 0;
			int k = get_network_output_size(net);
			for(i = 0; i < m.rows; ++i){
			forward_network(net, m.vals[i]);
			double *out = get_network_output(net);
			double err = truth[i] - out[0];
			if(fabs(err) < .5) ++correct;
			int guess = max_index(out, k);
			if(truth[i][guess]) ++correct;
			}
			return (double)correct/m.rows;
			}
			@@ -224,24 +223,35 @@

			void test_nist()
			{
			srand(999999);
			network net = parse_network_cfg("nist.cfg");
			matrix m = csv_to_matrix("images/nist_train.csv");
			matrix ho = hold_out_matrix(&m, 3000);
			matrix m = csv_to_matrix("mnist/mnist_train.csv");
			matrix test = csv_to_matrix("mnist/mnist_test.csv");
			double *truth_1d = pop_column(&m, 0);
			double **truth = one_hot(truth_1d, m.rows, 10);
			double *ho_truth_1d = pop_column(&ho, 0);
			double **ho_truth = one_hot(ho_truth_1d, ho.rows, 10);
			double *test_truth_1d = pop_column(&test, 0);
			double **test_truth = one_hot(test_truth_1d, test.rows, 10);
			int i,j;
			clock_t start = clock(), end;
			for(i = 0; i < test.rows; ++i){
			normalize_array(test.vals[i], 28*28);
			//scale_array(m.vals[i], 28*28, 1./255.);
			//translate_array(m.vals[i], 28*28, -.1);
			}
			for(i = 0; i < m.rows; ++i){
			normalize_array(m.vals[i], 28*28);
			//scale_array(m.vals[i], 28*28, 1./255.);
			//translate_array(m.vals[i], 28*28, -.1);
			}
			int count = 0;
			double lr = .0001;
			while(++count <= 3000000){
			double lr = .0005;
			while(++count <= 300){
			//lr *= .99;
			int index = 0;
			int correct = 0;
			for(i = 0; i < 1000; ++i){
			int number = 1000;
			for(i = 0; i < number; ++i){
			index = rand()%m.rows;
			normalize_array(m.vals[index], 28*28);
			forward_network(net, m.vals[index]);
			double *out = get_network_output(net);
			double *delta = get_network_delta(net);
			@@ -260,19 +270,29 @@
			}
			print_network(net);
			image input = double_to_image(28,28,1, m.vals[index]);
			show_image(input, "Input");
			//show_image(input, "Input");
			image o = get_network_image(net);
			show_image_collapsed(o, "Output");
			//show_image_collapsed(o, "Output");
			visualize_network(net);
			cvWaitKey(100);
			cvWaitKey(10);
			//double test_acc = error_network(net, m, truth);
			//double valid_acc = error_network(net, ho, ho_truth);
			//printf("%f, %f\n", test_acc, valid_acc);
			fprintf(stderr, "%5d: %f %f\n",count, (double)correct/1000, lr);
			//if(valid_acc > .70) break;
			fprintf(stderr, "\n%5d: %f %f\n\n",count, (double)correct/number, lr);
			if(count % 10 == 0 && 0){
			double train_acc = error_network(net, m, truth);
			fprintf(stderr, "\nTRAIN: %f\n", train_acc);
			double test_acc = error_network(net, test, test_truth);
			fprintf(stderr, "TEST: %f\n\n", test_acc);
			printf("%d, %f, %f\n", count, train_acc, test_acc);
			}
			if(count % (m.rows/number) == 0) lr /= 2;
			}
			double train_acc = error_network(net, m, truth);
			fprintf(stderr, "\nTRAIN: %f\n", train_acc);
			double test_acc = error_network(net, test, test_truth);
			fprintf(stderr, "TEST: %f\n\n", test_acc);
			printf("%d, %f, %f\n", count, train_acc, test_acc);
			end = clock();
			printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
			//printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
			}

			void test_kernel_update()
			@@ -281,14 +301,14 @@
			double delta[] = {.1};
			double input[] = {.3, .5, .3, .5, .5, .5, .5, .0, .5};
			double kernel[] = {1,2,3,4,5,6,7,8,9};
			convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, IDENTITY);
			convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, LINEAR);
			layer.kernels[0].data = kernel;
			layer.delta = delta;
			learn_convolutional_layer(layer, input);
			print_image(layer.kernels[0]);
			print_image(get_convolutional_delta(layer));
			print_image(layer.kernel_updates[0]);


			}

			void test_random_classify()
			@@ -311,15 +331,15 @@
			double *delta = get_network_delta(net);
			//printf("%f\n", out[0]);
			delta[0] = truth[index] - out[0];
			// printf("%f\n", delta[0]);
			// printf("%f\n", delta[0]);
			//printf("%f %f\n", truth[index], out[0]);
			learn_network(net, m.vals[index]);
			update_network(net, .00001);
			}
			double test_acc = error_network(net, m, truth);
			double valid_acc = error_network(net, ho, ho_truth);
			printf("%f, %f\n", test_acc, valid_acc);
			fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
			//double test_acc = error_network(net, m, truth);
			//double valid_acc = error_network(net, ho, ho_truth);
			//printf("%f, %f\n", test_acc, valid_acc);
			//fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc);
			//if(valid_acc > .70) break;
			}
			end = clock();
			@@ -362,8 +382,8 @@
			int main()
			{
			//test_kernel_update();
			//test_nist();
			test_full();
			test_nist();
			//test_full();
			//test_random_preprocess();
			//test_random_classify();
			//test_parser();

			@@ -180,6 +180,35 @@
			sigma = sqrt(variance_array(a,n));
			}

			void translate_array(double *a, int n, double s)
			{
			int i;
			for(i = 0; i < n; ++i){
			a[i] += s;
			}
			}

			void scale_array(double *a, int n, double s)
			{
			int i;
			for(i = 0; i < n; ++i){
			a[i] *= s;
			}
			}
			int max_index(double *a, int n)
			{
			if(n <= 0) return -1;
			int i, max_i = 0;
			double max = a[0];
			for(i = 1; i < n; ++i){
			if(a[i] > max){
			max = a[i];
			max_i = i;
			}
			}
			return max_i;
			}

			double rand_normal()
			{
			int i;

			@@ -15,6 +15,9 @@
			int count_fields(char *line);
			double parse_fields(char line, int n);
			void normalize_array(double *a, int n);
			void scale_array(double *a, int n, double s);
			void translate_array(double *a, int n, double s);
			int max_index(double *a, int n);
			double constrain(double a, double max);
			double rand_normal();
			double mean_array(double *a, int n);