From 8c3694bc911bbeab63e75c18f920e0991a5fa877 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sat, 07 Dec 2013 17:38:50 +0000
Subject: [PATCH] Ensemble

---
 src/network.c |   48 ++++++++++-----
 src/matrix.c  |   18 +++++
 src/matrix.h  |    2 
 src/network.h |    3 
 Makefile      |    4 
 src/data.c    |   12 +++-
 src/data.h    |    2 
 src/tests.c   |   53 +++++++++++++++--
 8 files changed, 109 insertions(+), 33 deletions(-)

diff --git a/Makefile b/Makefile
index e1238d6..44c930f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,12 @@
 CC=gcc
 COMMON=-Wall `pkg-config --cflags opencv`
+CFLAGS= $(COMMON) -O3 -ffast-math -flto
 UNAME = $(shell uname)
 ifeq ($(UNAME), Darwin)
 COMMON += -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include
 else
-COMMON += -march=native
+CFLAGS += -march=native
 endif
-CFLAGS= $(COMMON) -O3 -ffast-math -flto
 #CFLAGS= $(COMMON) -O0 -g 
 LDFLAGS=`pkg-config --libs opencv` -lm
 VPATH=./src/
diff --git a/src/data.c b/src/data.c
index b209197..0b396d7 100644
--- a/src/data.c
+++ b/src/data.c
@@ -141,7 +141,7 @@
     }
 }
 
-data *cv_split_data(data d, int part, int total)
+data *split_data(data d, int part, int total)
 {
     data *split = calloc(2, sizeof(data));
     int i;
@@ -155,6 +155,12 @@
     train.X.rows = train.y.rows = d.X.rows - (end-start);
     train.X.cols = test.X.cols = d.X.cols;
     train.y.cols = test.y.cols = d.y.cols;
+
+    train.X.vals = calloc(train.X.rows, sizeof(double*));
+    test.X.vals = calloc(test.X.rows, sizeof(double*));
+    train.y.vals = calloc(train.y.rows, sizeof(double*));
+    test.y.vals = calloc(test.y.rows, sizeof(double*));
+
     for(i = 0; i < start; ++i){
         train.X.vals[i] = d.X.vals[i];
         train.y.vals[i] = d.y.vals[i];
@@ -164,8 +170,8 @@
         test.y.vals[i-start] = d.y.vals[i];
     }
     for(i = end; i < d.X.rows; ++i){
-        train.X.vals[i-(start-end)] = d.X.vals[i];
-        train.y.vals[i-(start-end)] = d.y.vals[i];
+        train.X.vals[i-(end-start)] = d.X.vals[i];
+        train.y.vals[i-(end-start)] = d.y.vals[i];
     }
     split[0] = train;
     split[1] = test;
diff --git a/src/data.h b/src/data.h
index 3c16574..e887d0b 100644
--- a/src/data.h
+++ b/src/data.h
@@ -19,6 +19,6 @@
 data load_categorical_data_csv(char *filename, int target, int k);
 void normalize_data_rows(data d);
 void randomize_data(data d);
-data *cv_split_data(data d, int part, int total);
+data *split_data(data d, int part, int total);
 
 #endif
diff --git a/src/matrix.c b/src/matrix.c
index 5627b87..68e6f8d 100644
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -13,6 +13,18 @@
     free(m.vals);
 }
 
+double matrix_accuracy(matrix truth, matrix guess)
+{
+    int k = truth.cols;
+    int i;
+    int count = 0;
+    for(i = 0; i < truth.rows; ++i){
+        int class = max_index(guess.vals[i], k);
+        if(truth.vals[i][class]) ++count;
+    }
+    return (double)count/truth.rows;
+}
+
 void matrix_add_matrix(matrix from, matrix to)
 {
     assert(from.rows == to.rows && from.cols == to.cols);
@@ -26,12 +38,14 @@
 
 matrix make_matrix(int rows, int cols)
 {
+    int i;
     matrix m;
     m.rows = rows;
     m.cols = cols;
     m.vals = calloc(m.rows, sizeof(double *));
-    int i;
-    for(i = 0; i < m.rows; ++i) m.vals[i] = calloc(m.cols, sizeof(double));
+    for(i = 0; i < m.rows; ++i){
+        m.vals[i] = calloc(m.cols, sizeof(double));
+    }
     return m;
 }
 
diff --git a/src/matrix.h b/src/matrix.h
index 182135a..098eb9e 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -11,6 +11,8 @@
 
 matrix csv_to_matrix(char *filename);
 matrix hold_out_matrix(matrix *m, int n);
+double matrix_accuracy(matrix truth, matrix guess);
+void matrix_add_matrix(matrix from, matrix to);
 
 double *pop_column(matrix *m, int c);
 
diff --git a/src/network.c b/src/network.c
index 29234da..34cd8b4 100644
--- a/src/network.c
+++ b/src/network.c
@@ -174,18 +174,18 @@
         return (y[class]?1:0);
 }
 
-double train_network_sgd(network net, data d, double step, double momentum,double decay)
+double train_network_sgd(network net, data d, int n, double step, double momentum,double decay)
 {
     int i;
     int correct = 0;
-    for(i = 0; i < d.X.rows; ++i){
+    for(i = 0; i < n; ++i){
         int index = rand()%d.X.rows;
         correct += train_network_datum(net, d.X.vals[index], d.y.vals[index], step, momentum, decay);
-        if((i+1)%10 == 0){
-            printf("%d: %f\n", (i+1), (double)correct/(i+1));
-        }
+        //if((i+1)%10 == 0){
+        //    printf("%d: %f\n", (i+1), (double)correct/(i+1));
+        //}
     }
-    return (double)correct/d.X.rows;
+    return (double)correct/n;
 }
 
 void train_network(network net, data d, double step, double momentum, double decay)
@@ -269,6 +269,27 @@
     } 
 }
 
+double *network_predict(network net, double *input)
+{
+    forward_network(net, input);
+    double *out = get_network_output(net);
+    return out;
+}
+
+matrix network_predict_data(network net, data test)
+{
+    int i,j;
+    int k = get_network_output_size(net);
+    matrix pred = make_matrix(test.X.rows, k);
+    for(i = 0; i < test.X.rows; ++i){
+        double *out = network_predict(net, test.X.vals[i]);
+        for(j = 0; j < k; ++j){
+            pred.vals[i][j] = out[j];
+        }
+    }
+    return pred;   
+}
+
 void print_network(network net)
 {
     int i,j;
@@ -306,17 +327,12 @@
         fprintf(stderr, "\n");
     }
 }
+
 double network_accuracy(network net, data d)
 {
-    int i;
-    int correct = 0;
-    int k = get_network_output_size(net);
-    for(i = 0; i < d.X.rows; ++i){
-        forward_network(net, d.X.vals[i]);
-        double *out = get_network_output(net);
-        int guess = max_index(out, k);
-        if(d.y.vals[i][guess]) ++correct;
-    }
-    return (double)correct/d.X.rows;
+    matrix guess = network_predict_data(net, d);
+    double acc = matrix_accuracy(d.y, guess);
+    free_matrix(guess);
+    return acc;
 }
 
diff --git a/src/network.h b/src/network.h
index 3614c52..2ffc76b 100644
--- a/src/network.h
+++ b/src/network.h
@@ -24,8 +24,9 @@
 void forward_network(network net, double *input);
 void backward_network(network net, double *input, double *truth);
 void update_network(network net, double step, double momentum, double decay);
-double train_network_sgd(network net, data d, double step, double momentum,double decay);
+double train_network_sgd(network net, data d, int n, double step, double momentum,double decay);
 void train_network(network net, data d, double step, double momentum, double decay);
+matrix network_predict_data(network net, data test);
 double network_accuracy(network net, data d);
 double *get_network_output(network net);
 double *get_network_output_layer(network net, int i);
diff --git a/src/tests.c b/src/tests.c
index d7d9389..0b9b5db 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -204,21 +204,57 @@
     int count = 0;
     double lr = .0005;
     while(++count <= 1){
-        double acc = train_network_sgd(net, train, lr, .9, .001);
-        printf("Training Accuracy: %lf", acc);
+        double acc = train_network_sgd(net, train, 10000, lr, .9, .001);
+        printf("Training Accuracy: %lf\n", acc);
         lr /= 2; 
     }
-    /*
     double train_acc = network_accuracy(net, train);
     fprintf(stderr, "\nTRAIN: %f\n", train_acc);
     double test_acc = network_accuracy(net, test);
     fprintf(stderr, "TEST: %f\n\n", test_acc);
     printf("%d, %f, %f\n", count, train_acc, test_acc);
-    */
     //end = clock();
     //printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
 }
 
+void test_ensemble()
+{
+    int i;
+    srand(888888);
+    data d = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
+    normalize_data_rows(d);
+    randomize_data(d);
+    data test = load_categorical_data_csv("mnist/mnist_test.csv", 0,10);
+    normalize_data_rows(test);
+    data train = d;
+    /*
+    data *split = split_data(d, 1, 10);
+    data train = split[0];
+    data test = split[1];
+    */
+    matrix prediction = make_matrix(test.y.rows, test.y.cols);
+    int n = 30;
+    for(i = 0; i < n; ++i){
+        int count = 0;
+        double lr = .0005;
+        network net = parse_network_cfg("nist.cfg");
+        while(++count <= 5){
+            double acc = train_network_sgd(net, train, train.X.rows, lr, .9, .001);
+            printf("Training Accuracy: %lf\n", acc);
+            lr /= 2; 
+        }
+        matrix partial = network_predict_data(net, test);
+        double acc = matrix_accuracy(test.y, partial);
+        printf("Model Accuracy: %lf\n", acc);
+        matrix_add_matrix(partial, prediction);
+        acc = matrix_accuracy(test.y, prediction);
+        printf("Current Ensemble Accuracy: %lf\n", acc);
+        free_matrix(partial);
+    }
+    double acc = matrix_accuracy(test.y, prediction);
+    printf("Full Ensemble Accuracy: %lf\n", acc);
+}
+
 void test_kernel_update()
 {
     srand(0);
@@ -283,7 +319,7 @@
 void test_split()
 {
     data train = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10);
-    data *split = cv_split_data(train, 0, 13);
+    data *split = split_data(train, 0, 13);
     printf("%d, %d, %d\n", train.X.rows, split[0].X.rows, split[1].X.rows);
 }
 
@@ -291,8 +327,9 @@
 int main()
 {
     //test_kernel_update();
-    test_split();
-   // test_nist();
+    //test_split();
+    test_ensemble();
+    //test_nist();
     //test_full();
     //test_random_preprocess();
     //test_random_classify();
@@ -307,6 +344,6 @@
     //test_convolutional_layer();
     //verify_convolutional_layer();
     //test_color();
-    cvWaitKey(0);
+    //cvWaitKey(0);
     return 0;
 }

--
Gitblit v1.10.0