~speedprog/mtg/mtg_card_detector.git

parent: 4bdf96bd | patch | commit | show whitespace

Joseph Redmon

2013-12-07 8c3694bc911bbeab63e75c18f920e0991a5fa877

Ensemble

8 files modified

	Makefile	4 ●●●●● patch \| view \| raw \| blame \| history
	src/data.c	12 ●●●●● patch \| view \| raw \| blame \| history
	src/data.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/matrix.c	18 ●●●●● patch \| view \| raw \| blame \| history
	src/matrix.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	48 ●●●●● patch \| view \| raw \| blame \| history
	src/network.h	3 ●●●●● patch \| view \| raw \| blame \| history
	src/tests.c	51 ●●●●● patch \| view \| raw \| blame \| history

 Makefile

@@ -1,12 +1,12 @@
CC=gcc
COMMON=-Wall `pkg-config --cflags opencv`
CFLAGS= $(COMMON) -O3 -ffast-math -flto
UNAME = $(shell uname)
ifeq ($(UNAME), Darwin)
COMMON += -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include
else
COMMON += -march=native
CFLAGS += -march=native
endif
CFLAGS= $(COMMON) -O3 -ffast-math -flto
#CFLAGS= $(COMMON) -O0 -g 
LDFLAGS=`pkg-config --libs opencv` -lm
VPATH=./src/

 src/data.c

@@ -141,7 +141,7 @@
    }
}

data *cv_split_data(data d, int part, int total)
data *split_data(data d, int part, int total)
{
    data *split = calloc(2, sizeof(data));
    int i;
@@ -155,6 +155,12 @@
    train.X.rows = train.y.rows = d.X.rows - (end-start);
    train.X.cols = test.X.cols = d.X.cols;
    train.y.cols = test.y.cols = d.y.cols;

    train.X.vals = calloc(train.X.rows, sizeof(double*));
    test.X.vals = calloc(test.X.rows, sizeof(double*));
    train.y.vals = calloc(train.y.rows, sizeof(double*));
    test.y.vals = calloc(test.y.rows, sizeof(double*));

    for(i = 0; i < start; ++i){
        train.X.vals[i] = d.X.vals[i];
        train.y.vals[i] = d.y.vals[i];
@@ -164,8 +170,8 @@
        test.y.vals[i-start] = d.y.vals[i];
    }
    for(i = end; i < d.X.rows; ++i){
        train.X.vals[i-(start-end)] = d.X.vals[i];
        train.y.vals[i-(start-end)] = d.y.vals[i];
        train.X.vals[i-(end-start)] = d.X.vals[i];
        train.y.vals[i-(end-start)] = d.y.vals[i];
    }
    split[0] = train;
    split[1] = test;

 src/data.h

@@ -19,6 +19,6 @@
data load_categorical_data_csv(char *filename, int target, int k);
void normalize_data_rows(data d);
void randomize_data(data d);
data *cv_split_data(data d, int part, int total);
data *split_data(data d, int part, int total);

#endif

 src/matrix.c

@@ -13,6 +13,18 @@
    free(m.vals);
}

double matrix_accuracy(matrix truth, matrix guess)
{
    int k = truth.cols;
    int i;
    int count = 0;
    for(i = 0; i < truth.rows; ++i){
        int class = max_index(guess.vals[i], k);
        if(truth.vals[i][class]) ++count;
    }
    return (double)count/truth.rows;
}

void matrix_add_matrix(matrix from, matrix to)
{
    assert(from.rows == to.rows && from.cols == to.cols);
@@ -26,12 +38,14 @@

matrix make_matrix(int rows, int cols)
{
    int i;
    matrix m;
    m.rows = rows;
    m.cols = cols;
    m.vals = calloc(m.rows, sizeof(double *));
    int i;
    for(i = 0; i < m.rows; ++i) m.vals[i] = calloc(m.cols, sizeof(double));
    for(i = 0; i < m.rows; ++i){
        m.vals[i] = calloc(m.cols, sizeof(double));
    }
    return m;
}


 src/matrix.h

@@ -11,6 +11,8 @@

matrix csv_to_matrix(char *filename);
matrix hold_out_matrix(matrix *m, int n);
double matrix_accuracy(matrix truth, matrix guess);
void matrix_add_matrix(matrix from, matrix to);

double *pop_column(matrix *m, int c);


 src/network.c

@@ -174,18 +174,18 @@
        return (y[class]?1:0);
}

double train_network_sgd(network net, data d, double step, double momentum,double decay)
double train_network_sgd(network net, data d, int n, double step, double momentum,double decay)
{
    int i;
    int correct = 0;
    for(i = 0; i < d.X.rows; ++i){
    for(i = 0; i < n; ++i){
        int index = rand()%d.X.rows;
        correct += train_network_datum(net, d.X.vals[index], d.y.vals[index], step, momentum, decay);
        if((i+1)%10 == 0){
            printf("%d: %f\n", (i+1), (double)correct/(i+1));
        //if((i+1)%10 == 0){
        //    printf("%d: %f\n", (i+1), (double)correct/(i+1));
        //}
        }
    }
    return (double)correct/d.X.rows;
    return (double)correct/n;
}

void train_network(network net, data d, double step, double momentum, double decay)
@@ -269,6 +269,27 @@
    } 
}

double *network_predict(network net, double *input)
{
    forward_network(net, input);
    double *out = get_network_output(net);
    return out;
}

matrix network_predict_data(network net, data test)
{
    int i,j;
    int k = get_network_output_size(net);
    matrix pred = make_matrix(test.X.rows, k);
    for(i = 0; i < test.X.rows; ++i){
        double *out = network_predict(net, test.X.vals[i]);
        for(j = 0; j < k; ++j){
            pred.vals[i][j] = out[j];
        }
    }
    return pred;   
}

void print_network(network net)
{
    int i,j;
@@ -306,17 +327,12 @@
        fprintf(stderr, "\n");
    }
}

double network_accuracy(network net, data d)
{
    int i;
    int correct = 0;
    int k = get_network_output_size(net);
    for(i = 0; i < d.X.rows; ++i){
        forward_network(net, d.X.vals[i]);
        double *out = get_network_output(net);
        int guess = max_index(out, k);
        if(d.y.vals[i][guess]) ++correct;
    }
    return (double)correct/d.X.rows;
    matrix guess = network_predict_data(net, d);
    double acc = matrix_accuracy(d.y, guess);
    free_matrix(guess);
    return acc;
}


 src/network.h

@@ -24,8 +24,9 @@
void forward_network(network net, double *input);
void backward_network(network net, double *input, double *truth);
void update_network(network net, double step, double momentum, double decay);
double train_network_sgd(network net, data d, double step, double momentum,double decay);
double train_network_sgd(network net, data d, int n, double step, double momentum,double decay);
void train_network(network net, data d, double step, double momentum, double decay);
matrix network_predict_data(network net, data test);
double network_accuracy(network net, data d);
double *get_network_output(network net);
double *get_network_output_layer(network net, int i);

 src/tests.c

@@ -204,21 +204,57 @@
    int count = 0;
    double lr = .0005;
    while(++count <= 1){
        double acc = train_network_sgd(net, train, lr, .9, .001);
        printf("Training Accuracy: %lf", acc);
        double acc = train_network_sgd(net, train, 10000, lr, .9, .001);
        printf("Training Accuracy: %lf\n", acc);
        lr /= 2; 
    }
    /*
    double train_acc = network_accuracy(net, train);
    fprintf(stderr, "\nTRAIN: %f\n", train_acc);
    double test_acc = network_accuracy(net, test);
    fprintf(stderr, "TEST: %f\n\n", test_acc);
    printf("%d, %f, %f\n", count, train_acc, test_acc);
    */
    //end = clock();
    //printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
}

void test_ensemble()
{
    int i;
    srand(888888);
    data d = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
    normalize_data_rows(d);
    randomize_data(d);
    data test = load_categorical_data_csv("mnist/mnist_test.csv", 0,10);
    normalize_data_rows(test);
    data train = d;
    /*
    data *split = split_data(d, 1, 10);
    data train = split[0];
    data test = split[1];
    */
    matrix prediction = make_matrix(test.y.rows, test.y.cols);
    int n = 30;
    for(i = 0; i < n; ++i){
        int count = 0;
        double lr = .0005;
        network net = parse_network_cfg("nist.cfg");
        while(++count <= 5){
            double acc = train_network_sgd(net, train, train.X.rows, lr, .9, .001);
            printf("Training Accuracy: %lf\n", acc);
            lr /= 2; 
        }
        matrix partial = network_predict_data(net, test);
        double acc = matrix_accuracy(test.y, partial);
        printf("Model Accuracy: %lf\n", acc);
        matrix_add_matrix(partial, prediction);
        acc = matrix_accuracy(test.y, prediction);
        printf("Current Ensemble Accuracy: %lf\n", acc);
        free_matrix(partial);
    }
    double acc = matrix_accuracy(test.y, prediction);
    printf("Full Ensemble Accuracy: %lf\n", acc);
}

void test_kernel_update()
{
    srand(0);
@@ -283,7 +319,7 @@
void test_split()
{
    data train = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
    data *split = cv_split_data(train, 0, 13);
    data *split = split_data(train, 0, 13);
    printf("%d, %d, %d\n", train.X.rows, split[0].X.rows, split[1].X.rows);
}

@@ -291,7 +327,8 @@
int main()
{
    //test_kernel_update();
    test_split();
    //test_split();
    test_ensemble();
   // test_nist();
    //test_full();
    //test_random_preprocess();
@@ -307,6 +344,6 @@
    //test_convolutional_layer();
    //verify_convolutional_layer();
    //test_color();
    cvWaitKey(0);
    //cvWaitKey(0);
    return 0;
}

			@@ -1,12 +1,12 @@
			CC=gcc
			COMMON=-Wall `pkg-config --cflags opencv`
			CFLAGS= $(COMMON) -O3 -ffast-math -flto
			UNAME = $(shell uname)
			ifeq ($(UNAME), Darwin)
			COMMON += -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include
			else
			COMMON += -march=native
			CFLAGS += -march=native
			endif
			CFLAGS= $(COMMON) -O3 -ffast-math -flto
			#CFLAGS= $(COMMON) -O0 -g
			LDFLAGS=`pkg-config --libs opencv` -lm
			VPATH=./src/

			@@ -141,7 +141,7 @@
			}
			}

			data *cv_split_data(data d, int part, int total)
			data *split_data(data d, int part, int total)
			{
			data *split = calloc(2, sizeof(data));
			int i;
			@@ -155,6 +155,12 @@
			train.X.rows = train.y.rows = d.X.rows - (end-start);
			train.X.cols = test.X.cols = d.X.cols;
			train.y.cols = test.y.cols = d.y.cols;

			train.X.vals = calloc(train.X.rows, sizeof(double*));
			test.X.vals = calloc(test.X.rows, sizeof(double*));
			train.y.vals = calloc(train.y.rows, sizeof(double*));
			test.y.vals = calloc(test.y.rows, sizeof(double*));

			for(i = 0; i < start; ++i){
			train.X.vals[i] = d.X.vals[i];
			train.y.vals[i] = d.y.vals[i];
			@@ -164,8 +170,8 @@
			test.y.vals[i-start] = d.y.vals[i];
			}
			for(i = end; i < d.X.rows; ++i){
			train.X.vals[i-(start-end)] = d.X.vals[i];
			train.y.vals[i-(start-end)] = d.y.vals[i];
			train.X.vals[i-(end-start)] = d.X.vals[i];
			train.y.vals[i-(end-start)] = d.y.vals[i];
			}
			split[0] = train;
			split[1] = test;

			@@ -19,6 +19,6 @@
			data load_categorical_data_csv(char *filename, int target, int k);
			void normalize_data_rows(data d);
			void randomize_data(data d);
			data *cv_split_data(data d, int part, int total);
			data *split_data(data d, int part, int total);

			#endif

			@@ -13,6 +13,18 @@
			free(m.vals);
			}

			double matrix_accuracy(matrix truth, matrix guess)
			{
			int k = truth.cols;
			int i;
			int count = 0;
			for(i = 0; i < truth.rows; ++i){
			int class = max_index(guess.vals[i], k);
			if(truth.vals[i][class]) ++count;
			}
			return (double)count/truth.rows;
			}

			void matrix_add_matrix(matrix from, matrix to)
			{
			assert(from.rows == to.rows && from.cols == to.cols);
			@@ -26,12 +38,14 @@

			matrix make_matrix(int rows, int cols)
			{
			int i;
			matrix m;
			m.rows = rows;
			m.cols = cols;
			m.vals = calloc(m.rows, sizeof(double *));
			int i;
			for(i = 0; i < m.rows; ++i) m.vals[i] = calloc(m.cols, sizeof(double));
			for(i = 0; i < m.rows; ++i){
			m.vals[i] = calloc(m.cols, sizeof(double));
			}
			return m;
			}

			@@ -11,6 +11,8 @@

			matrix csv_to_matrix(char *filename);
			matrix hold_out_matrix(matrix *m, int n);
			double matrix_accuracy(matrix truth, matrix guess);
			void matrix_add_matrix(matrix from, matrix to);

			double pop_column(matrix m, int c);

			@@ -174,18 +174,18 @@
			return (y[class]?1:0);
			}

			double train_network_sgd(network net, data d, double step, double momentum,double decay)
			double train_network_sgd(network net, data d, int n, double step, double momentum,double decay)
			{
			int i;
			int correct = 0;
			for(i = 0; i < d.X.rows; ++i){
			for(i = 0; i < n; ++i){
			int index = rand()%d.X.rows;
			correct += train_network_datum(net, d.X.vals[index], d.y.vals[index], step, momentum, decay);
			if((i+1)%10 == 0){
			printf("%d: %f\n", (i+1), (double)correct/(i+1));
			//if((i+1)%10 == 0){
			// printf("%d: %f\n", (i+1), (double)correct/(i+1));
			//}
			}
			}
			return (double)correct/d.X.rows;
			return (double)correct/n;
			}

			void train_network(network net, data d, double step, double momentum, double decay)
			@@ -269,6 +269,27 @@
			}
			}

			double network_predict(network net, double input)
			{
			forward_network(net, input);
			double *out = get_network_output(net);
			return out;
			}

			matrix network_predict_data(network net, data test)
			{
			int i,j;
			int k = get_network_output_size(net);
			matrix pred = make_matrix(test.X.rows, k);
			for(i = 0; i < test.X.rows; ++i){
			double *out = network_predict(net, test.X.vals[i]);
			for(j = 0; j < k; ++j){
			pred.vals[i][j] = out[j];
			}
			}
			return pred;
			}

			void print_network(network net)
			{
			int i,j;
			@@ -306,17 +327,12 @@
			fprintf(stderr, "\n");
			}
			}

			double network_accuracy(network net, data d)
			{
			int i;
			int correct = 0;
			int k = get_network_output_size(net);
			for(i = 0; i < d.X.rows; ++i){
			forward_network(net, d.X.vals[i]);
			double *out = get_network_output(net);
			int guess = max_index(out, k);
			if(d.y.vals[i][guess]) ++correct;
			}
			return (double)correct/d.X.rows;
			matrix guess = network_predict_data(net, d);
			double acc = matrix_accuracy(d.y, guess);
			free_matrix(guess);
			return acc;
			}

			@@ -24,8 +24,9 @@
			void forward_network(network net, double *input);
			void backward_network(network net, double input, double truth);
			void update_network(network net, double step, double momentum, double decay);
			double train_network_sgd(network net, data d, double step, double momentum,double decay);
			double train_network_sgd(network net, data d, int n, double step, double momentum,double decay);
			void train_network(network net, data d, double step, double momentum, double decay);
			matrix network_predict_data(network net, data test);
			double network_accuracy(network net, data d);
			double *get_network_output(network net);
			double *get_network_output_layer(network net, int i);

			@@ -204,21 +204,57 @@
			int count = 0;
			double lr = .0005;
			while(++count <= 1){
			double acc = train_network_sgd(net, train, lr, .9, .001);
			printf("Training Accuracy: %lf", acc);
			double acc = train_network_sgd(net, train, 10000, lr, .9, .001);
			printf("Training Accuracy: %lf\n", acc);
			lr /= 2;
			}
			/*
			double train_acc = network_accuracy(net, train);
			fprintf(stderr, "\nTRAIN: %f\n", train_acc);
			double test_acc = network_accuracy(net, test);
			fprintf(stderr, "TEST: %f\n\n", test_acc);
			printf("%d, %f, %f\n", count, train_acc, test_acc);
			*/
			//end = clock();
			//printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
			}

			void test_ensemble()
			{
			int i;
			srand(888888);
			data d = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
			normalize_data_rows(d);
			randomize_data(d);
			data test = load_categorical_data_csv("mnist/mnist_test.csv", 0,10);
			normalize_data_rows(test);
			data train = d;
			/*
			data *split = split_data(d, 1, 10);
			data train = split[0];
			data test = split[1];
			*/
			matrix prediction = make_matrix(test.y.rows, test.y.cols);
			int n = 30;
			for(i = 0; i < n; ++i){
			int count = 0;
			double lr = .0005;
			network net = parse_network_cfg("nist.cfg");
			while(++count <= 5){
			double acc = train_network_sgd(net, train, train.X.rows, lr, .9, .001);
			printf("Training Accuracy: %lf\n", acc);
			lr /= 2;
			}
			matrix partial = network_predict_data(net, test);
			double acc = matrix_accuracy(test.y, partial);
			printf("Model Accuracy: %lf\n", acc);
			matrix_add_matrix(partial, prediction);
			acc = matrix_accuracy(test.y, prediction);
			printf("Current Ensemble Accuracy: %lf\n", acc);
			free_matrix(partial);
			}
			double acc = matrix_accuracy(test.y, prediction);
			printf("Full Ensemble Accuracy: %lf\n", acc);
			}

			void test_kernel_update()
			{
			srand(0);
			@@ -283,7 +319,7 @@
			void test_split()
			{
			data train = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
			data *split = cv_split_data(train, 0, 13);
			data *split = split_data(train, 0, 13);
			printf("%d, %d, %d\n", train.X.rows, split[0].X.rows, split[1].X.rows);
			}

			@@ -291,7 +327,8 @@
			int main()
			{
			//test_kernel_update();
			test_split();
			//test_split();
			test_ensemble();
			// test_nist();
			//test_full();
			//test_random_preprocess();
			@@ -307,6 +344,6 @@
			//test_convolutional_layer();
			//verify_convolutional_layer();
			//test_color();
			cvWaitKey(0);
			//cvWaitKey(0);
			return 0;
			}