From 118bdd6f624a81c7b43689943485f8d70cbd944e Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Fri, 14 Feb 2014 18:26:31 +0000
Subject: [PATCH] Training on VOC

---
 src/network.c             |   93 +++++++
 src/mini_blas.c           |    2 
 src/utils.h               |    1 
 src/network.h             |    1 
 Makefile                  |    4 
 src/connected_layer.c     |   30 ++
 src/connected_layer.h     |    3 
 src/data.c                |   25 +
 src/softmax_layer.c       |    5 
 src/data.h                |    9 
 /dev/null                 |   37 ---
 src/image.c               |  133 +++++++++-
 src/option_list.h         |    7 
 src/convolutional_layer.c |   10 
 src/option_list.c         |    8 
 src/activations.h         |    1 
 src/parser.c              |  187 ++++++++++----
 src/activations.c         |   19 +
 src/tests.c               |   81 +++++-
 src/image.h               |    3 
 src/utils.c               |    4 
 21 files changed, 501 insertions(+), 162 deletions(-)

diff --git a/Makefile b/Makefile
index fda7d88..4c1bb14 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,12 @@
 CC=gcc
 COMMON=-Wall `pkg-config --cflags opencv`
-CFLAGS= $(COMMON) -O3 -ffast-math -flto
 UNAME = $(shell uname)
 ifeq ($(UNAME), Darwin)
 COMMON += -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include
 else
-CFLAGS += -march=native
+COMMON += -march=native
 endif
+CFLAGS= $(COMMON) -Ofast -flto
 #CFLAGS= $(COMMON) -O0 -g 
 LDFLAGS=`pkg-config --libs opencv` -lm
 VPATH=./src/
diff --git a/connected.cfg b/connected.cfg
deleted file mode 100644
index dc2c073..0000000
--- a/connected.cfg
+++ /dev/null
@@ -1,8 +0,0 @@
-[conn]
-input=1690
-output = 10
-activation=relu
-
-[conn]
-output = 1
-activation=relu
diff --git a/convolutional.cfg b/convolutional.cfg
deleted file mode 100644
index 1612c9c..0000000
--- a/convolutional.cfg
+++ /dev/null
@@ -1,9 +0,0 @@
-[conv]
-width=200
-height=200
-channels=3
-filters=10
-size=15
-stride=16
-activation=relu
-
diff --git a/full.cfg b/full.cfg
deleted file mode 100644
index 78e938f..0000000
--- a/full.cfg
+++ /dev/null
@@ -1,17 +0,0 @@
-[conv]
-width=64
-height=64
-channels=3
-filters=10
-size=11
-stride=2
-activation=ramp
-
-[maxpool]
-stride=2
-
-[conn]
-output = 2
-activation=ramp
-
-[softmax]
diff --git a/nist.cfg b/nist.cfg
deleted file mode 100644
index 46e3223..0000000
--- a/nist.cfg
+++ /dev/null
@@ -1,30 +0,0 @@
-[conv]
-width=28
-height=28
-channels=1
-filters=20
-size=5
-stride=1
-activation=ramp
-
-[maxpool]
-stride=2
-
-[conv]
-filters=50
-size=5
-stride=1
-activation=ramp
-
-[maxpool]
-stride=2
-
-[conn]
-output = 500
-activation=ramp
-
-[conn]
-output = 10
-activation=ramp
-
-[softmax]
diff --git a/nist_basic.cfg b/nist_basic.cfg
deleted file mode 100644
index 7142735..0000000
--- a/nist_basic.cfg
+++ /dev/null
@@ -1,14 +0,0 @@
-[conv]
-width=28
-height=28
-channels=1
-filters=20
-size=11
-stride=1
-activation=linear
-
-[conn]
-output = 10
-activation=ramp
-
-[softmax]
diff --git a/src/activations.c b/src/activations.c
index cc923d0..c81d6aa 100644
--- a/src/activations.c
+++ b/src/activations.c
@@ -4,6 +4,25 @@
 #include <stdio.h>
 #include <string.h>
 
+char *get_activation_string(ACTIVATION a)
+{
+    switch(a){
+        case SIGMOID:
+            return "sigmoid";
+        case RELU:
+            return "relu";
+        case RAMP:
+            return "ramp";
+        case LINEAR:
+            return "linear";
+        case TANH:
+            return "tanh";
+        default:
+            break;
+    }
+    return "relu";
+}
+
 ACTIVATION get_activation(char *s)
 {
     if (strcmp(s, "sigmoid")==0) return SIGMOID;
diff --git a/src/activations.h b/src/activations.h
index fb2c54f..9474121 100644
--- a/src/activations.h
+++ b/src/activations.h
@@ -7,6 +7,7 @@
 
 ACTIVATION get_activation(char *s);
 
+char *get_activation_string(ACTIVATION a);
 float activate(float x, ACTIVATION a);
 float gradient(float x, ACTIVATION a);
 
diff --git a/src/connected_layer.c b/src/connected_layer.c
index 5f6631c..07fad69 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -19,23 +19,46 @@
     layer->delta = calloc(outputs, sizeof(float*));
 
     layer->weight_updates = calloc(inputs*outputs, sizeof(float));
+    layer->weight_adapt = calloc(inputs*outputs, sizeof(float));
     layer->weight_momentum = calloc(inputs*outputs, sizeof(float));
     layer->weights = calloc(inputs*outputs, sizeof(float));
-    float scale = 2./inputs;
+    float scale = 1./inputs;
     for(i = 0; i < inputs*outputs; ++i)
-        layer->weights[i] = rand_normal()*scale;
+        layer->weights[i] = scale*(rand_uniform());
 
     layer->bias_updates = calloc(outputs, sizeof(float));
+    layer->bias_adapt = calloc(outputs, sizeof(float));
     layer->bias_momentum = calloc(outputs, sizeof(float));
     layer->biases = calloc(outputs, sizeof(float));
     for(i = 0; i < outputs; ++i)
         //layer->biases[i] = rand_normal()*scale + scale;
-        layer->biases[i] = 0;
+        layer->biases[i] = 1;
 
     layer->activation = activation;
     return layer;
 }
 
+/*
+void update_connected_layer(connected_layer layer, float step, float momentum, float decay)
+{
+    int i;
+    for(i = 0; i < layer.outputs; ++i){
+        float delta = layer.bias_updates[i];
+        layer.bias_adapt[i] += delta*delta;
+        layer.bias_momentum[i] = step/sqrt(layer.bias_adapt[i])*(layer.bias_updates[i]) + momentum*layer.bias_momentum[i];
+        layer.biases[i] += layer.bias_momentum[i];
+    }
+    for(i = 0; i < layer.outputs*layer.inputs; ++i){
+        float delta = layer.weight_updates[i];
+        layer.weight_adapt[i] += delta*delta;
+        layer.weight_momentum[i] = step/sqrt(layer.weight_adapt[i])*(layer.weight_updates[i] - decay*layer.weights[i]) + momentum*layer.weight_momentum[i];
+        layer.weights[i] += layer.weight_momentum[i];
+    }
+    memset(layer.bias_updates, 0, layer.outputs*sizeof(float));
+    memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(float));
+}
+*/
+
 void update_connected_layer(connected_layer layer, float step, float momentum, float decay)
 {
     int i;
@@ -65,6 +88,7 @@
     for(i = 0; i < layer.outputs; ++i){
         layer.output[i] = activate(layer.output[i], layer.activation);
     }
+    //for(i = 0; i < layer.outputs; ++i) if(i%(layer.outputs/10+1)==0) printf("%f, ", layer.output[i]); printf("\n");
 }
 
 void learn_connected_layer(connected_layer layer, float *input)
diff --git a/src/connected_layer.h b/src/connected_layer.h
index ce0181d..4b17c59 100644
--- a/src/connected_layer.h
+++ b/src/connected_layer.h
@@ -12,6 +12,9 @@
     float *weight_updates;
     float *bias_updates;
 
+    float *weight_adapt;
+    float *bias_adapt;
+
     float *weight_momentum;
     float *bias_momentum;
 
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index cdfe9e1..6a103f6 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -41,8 +41,8 @@
     layer->biases = calloc(n, sizeof(float));
     layer->bias_updates = calloc(n, sizeof(float));
     layer->bias_momentum = calloc(n, sizeof(float));
-    float scale = 2./(size*size);
-    for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = rand_normal()*scale;
+    float scale = 1./(size*size*c);
+    for(i = 0; i < c*n*size*size; ++i) layer->filters[i] = scale*(rand_uniform());
     for(i = 0; i < n; ++i){
         //layer->biases[i] = rand_normal()*scale + scale;
         layer->biases[i] = 0;
@@ -65,6 +65,7 @@
 
 void forward_convolutional_layer(const convolutional_layer layer, float *in)
 {
+    int i;
     int m = layer.n;
     int k = layer.size*layer.size*layer.c;
     int n = ((layer.h-layer.size)/layer.stride + 1)*
@@ -79,6 +80,11 @@
     im2col_cpu(in,  layer.c,  layer.h,  layer.w,  layer.size,  layer.stride, b);
     gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
 
+    for(i = 0; i < m*n; ++i){
+        layer.output[i] = activate(layer.output[i], layer.activation);
+    }
+    //for(i = 0; i < m*n; ++i) if(i%(m*n/10+1)==0) printf("%f, ", layer.output[i]); printf("\n");
+
 }
 
 void gradient_delta_convolutional_layer(convolutional_layer layer)
diff --git a/src/data.c b/src/data.c
index 2c5932b..035efa1 100644
--- a/src/data.c
+++ b/src/data.c
@@ -30,7 +30,7 @@
     }
 }
 
-data load_data_image_paths(char **paths, int n, char **labels, int k)
+data load_data_image_paths(char **paths, int n, char **labels, int k, int h, int w)
 {
     int i;
     data d;
@@ -40,7 +40,7 @@
     d.y = make_matrix(n, k);
 
     for(i = 0; i < n; ++i){
-        image im = load_image(paths[i]);
+        image im = load_image(paths[i], h, w);
         d.X.vals[i] = im.data;
         d.X.cols = im.h*im.w*im.c;
         fill_truth(paths[i], labels, k, d.y.vals[i]);
@@ -48,11 +48,11 @@
     return d;
 }
 
-data load_data_image_pathfile(char *filename, char **labels, int k)
+data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w)
 {
     list *plist = get_paths(filename);
     char **paths = (char **)list_to_array(plist);
-    data d = load_data_image_paths(paths, plist->size, labels, k);
+    data d = load_data_image_paths(paths, plist->size, labels, k, h, w);
     free_list_contents(plist);
     free_list(plist);
     free(paths);
@@ -70,20 +70,20 @@
     }
 }
 
-data load_data_image_pathfile_part(char *filename, int part, int total, char **labels, int k)
+data load_data_image_pathfile_part(char *filename, int part, int total, char **labels, int k, int h, int w)
 {
     list *plist = get_paths(filename);
     char **paths = (char **)list_to_array(plist);
     int start = part*plist->size/total;
     int end = (part+1)*plist->size/total;
-    data d = load_data_image_paths(paths+start, end-start, labels, k);
+    data d = load_data_image_paths(paths+start, end-start, labels, k, h, w);
     free_list_contents(plist);
     free_list(plist);
     free(paths);
     return d;
 }
 
-data load_data_image_pathfile_random(char *filename, int n, char **labels, int k)
+data load_data_image_pathfile_random(char *filename, int n, char **labels, int k, int h, int w)
 {
     int i;
     list *plist = get_paths(filename);
@@ -92,8 +92,9 @@
     for(i = 0; i < n; ++i){
         int index = rand()%plist->size;
         random_paths[i] = paths[index];
+        if(i == 0) printf("%s\n", paths[index]);
     }
-    data d = load_data_image_paths(random_paths, n, labels, k);
+    data d = load_data_image_paths(random_paths, n, labels, k, h, w);
     free_list_contents(plist);
     free_list(plist);
     free(paths);
@@ -133,6 +134,14 @@
     }
 }
 
+void scale_data_rows(data d, float s)
+{
+    int i;
+    for(i = 0; i < d.X.rows; ++i){
+        scale_array(d.X.vals[i], d.X.cols, s);
+    }
+}
+
 void normalize_data_rows(data d)
 {
     int i;
diff --git a/src/data.h b/src/data.h
index e887d0b..e170974 100644
--- a/src/data.h
+++ b/src/data.h
@@ -10,14 +10,15 @@
 } data;
 
 
-data load_data_image_pathfile(char *filename, char **labels, int k);
 void free_data(data d);
-data load_data_image_pathfile(char *filename, char **labels, int k);
+data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w);
 data load_data_image_pathfile_part(char *filename, int part, int total, 
-                                                char **labels, int k);
-data load_data_image_pathfile_random(char *filename, int n, char **labels, int k);
+                                    char **labels, int k, int h, int w);
+data load_data_image_pathfile_random(char *filename, int n, char **labels, 
+                                        int k, int h, int w);
 data load_categorical_data_csv(char *filename, int target, int k);
 void normalize_data_rows(data d);
+void scale_data_rows(data d, float s);
 void randomize_data(data d);
 data *split_data(data d, int part, int total);
 
diff --git a/src/image.c b/src/image.c
index 62ee5f7..460df3d 100644
--- a/src/image.c
+++ b/src/image.c
@@ -242,8 +242,107 @@
     return out;
 }
 
+// Returns a new image that is a cropped version (rectangular cut-out)
+// of the original image.
+IplImage* cropImage(const IplImage *img, const CvRect region)
+{
+    IplImage *imageCropped;
+    CvSize size;
 
-image load_image(char *filename)
+    if (img->width <= 0 || img->height <= 0
+            || region.width <= 0 || region.height <= 0) {
+        //cerr << "ERROR in cropImage(): invalid dimensions." << endl;
+        exit(1);
+    }
+
+    if (img->depth != IPL_DEPTH_8U) {
+        //cerr << "ERROR in cropImage(): image depth is not 8." << endl;
+        exit(1);
+    }
+
+    // Set the desired region of interest.
+    cvSetImageROI((IplImage*)img, region);
+    // Copy region of interest into a new iplImage and return it.
+    size.width = region.width;
+    size.height = region.height;
+    imageCropped = cvCreateImage(size, IPL_DEPTH_8U, img->nChannels);
+    cvCopy(img, imageCropped,NULL);  // Copy just the region.
+
+    return imageCropped;
+}
+
+// Creates a new image copy that is of a desired size. The aspect ratio will
+// be kept constant if 'keepAspectRatio' is true, by cropping undesired parts
+// so that only pixels of the original image are shown, instead of adding
+// extra blank space.
+// Remember to free the new image later.
+IplImage* resizeImage(const IplImage *origImg, int newHeight, int newWidth,
+        int keepAspectRatio)
+{
+    IplImage *outImg = 0;
+    int origWidth = 0;
+    int origHeight = 0;
+    if (origImg) {
+        origWidth = origImg->width;
+        origHeight = origImg->height;
+    }
+    if (newWidth <= 0 || newHeight <= 0 || origImg == 0
+            || origWidth <= 0 || origHeight <= 0) {
+        //cerr << "ERROR: Bad desired image size of " << newWidth
+        //  << "x" << newHeight << " in resizeImage().\n";
+        exit(1);
+    }
+
+    if (keepAspectRatio) {
+        // Resize the image without changing its aspect ratio,
+        // by cropping off the edges and enlarging the middle section.
+        CvRect r;
+        // input aspect ratio
+        float origAspect = (origWidth / (float)origHeight);
+        // output aspect ratio
+        float newAspect = (newWidth / (float)newHeight);
+        // crop width to be origHeight * newAspect
+        if (origAspect > newAspect) {
+            int tw = (origHeight * newWidth) / newHeight;
+            r = cvRect((origWidth - tw)/2, 0, tw, origHeight);
+        }
+        else {  // crop height to be origWidth / newAspect
+            int th = (origWidth * newHeight) / newWidth;
+            r = cvRect(0, (origHeight - th)/2, origWidth, th);
+        }
+        IplImage *croppedImg = cropImage(origImg, r);
+
+        // Call this function again, with the new aspect ratio image.
+        // Will do a scaled image resize with the correct aspect ratio.
+        outImg = resizeImage(croppedImg, newHeight, newWidth, 0);
+        cvReleaseImage( &croppedImg );
+
+    }
+    else {
+
+        // Scale the image to the new dimensions,
+        // even if the aspect ratio will be changed.
+        outImg = cvCreateImage(cvSize(newWidth, newHeight),
+                origImg->depth, origImg->nChannels);
+        if (newWidth > origImg->width && newHeight > origImg->height) {
+            // Make the image larger
+            cvResetImageROI((IplImage*)origImg);
+            // CV_INTER_LINEAR: good at enlarging.
+            // CV_INTER_CUBIC: good at enlarging.           
+            cvResize(origImg, outImg, CV_INTER_LINEAR);
+        }
+        else {
+            // Make the image smaller
+            cvResetImageROI((IplImage*)origImg);
+            // CV_INTER_AREA: good at shrinking (decimation) only.
+            cvResize(origImg, outImg, CV_INTER_AREA);
+        }
+
+    }
+    return outImg;
+}
+
+image load_image(char *filename, int h, int w)
 {
     IplImage* src = 0;
     if( (src = cvLoadImage(filename,-1)) == 0 )
@@ -251,10 +350,14 @@
         printf("Cannot load file image %s\n", filename);
         exit(0);
     }
+    cvShowImage("Orig", src);
+    IplImage *resized = resizeImage(src, h, w, 1);
+    cvShowImage("Sized", resized);
+    cvWaitKey(0);
+    cvReleaseImage(&src);
+    src = resized;
     unsigned char *data = (unsigned char *)src->imageData;
     int c = src->nChannels;
-    int h = src->height;
-    int w = src->width;
     int step = src->widthStep;
     image out = make_image(h,w,c);
     int i, j, k, count=0;;
@@ -363,14 +466,14 @@
         two_d_convolve(m, i, kernel, i, stride, out, channel, edge);
     }
     /*
-    int j;
-    for(i = 0; i < m.h; i += stride){
-        for(j = 0; j < m.w; j += stride){
-            float val = single_convolve(m, kernel, i, j);
-            set_pixel(out, i/stride, j/stride, channel, val);
-        }
-    }
-    */
+       int j;
+       for(i = 0; i < m.h; i += stride){
+       for(j = 0; j < m.w; j += stride){
+       float val = single_convolve(m, kernel, i, j);
+       set_pixel(out, i/stride, j/stride, channel, val);
+       }
+       }
+     */
 }
 
 void upsample_image(image m, int stride, image out)
@@ -422,10 +525,10 @@
         }
     }
     /*
-    for(i = 0; i < update.h*update.w*update.c; ++i){
-        update.data[i] /= (m.h/stride)*(m.w/stride);
-    }
-    */
+       for(i = 0; i < update.h*update.w*update.c; ++i){
+       update.data[i] /= (m.h/stride)*(m.w/stride);
+       }
+     */
 }
 
 void single_back_convolve(image m, image kernel, int x, int y, float val)
diff --git a/src/image.h b/src/image.h
index 72c4b2c..2c5d38a 100644
--- a/src/image.h
+++ b/src/image.h
@@ -33,13 +33,12 @@
 image make_random_kernel(int size, int c, float scale);
 image float_to_image(int h, int w, int c, float *data);
 image copy_image(image p);
-image load_image(char *filename);
+image load_image(char *filename, int h, int w);
 
 float get_pixel(image m, int x, int y, int c);
 float get_pixel_extend(image m, int x, int y, int c);
 void set_pixel(image m, int x, int y, int c, float val);
 
-
 image get_image_layer(image m, int l);
 
 void two_d_convolve(image m, int mc, image kernel, int kc, int stride, image out, int oc, int edge);
diff --git a/src/mini_blas.c b/src/mini_blas.c
index b9a4304..262798b 100644
--- a/src/mini_blas.c
+++ b/src/mini_blas.c
@@ -159,7 +159,7 @@
         gemm(TA,TB,m,n,k,1,a,k,b,n,1,c,n);
     }
     end = clock();
-    printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (double)(end-start)/CLOCKS_PER_SEC);
+    printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC);
 }
 
 void test_blas()
diff --git a/src/network.c b/src/network.c
index 29e22e4..f7abf58 100644
--- a/src/network.c
+++ b/src/network.c
@@ -21,6 +21,77 @@
     return net;
 }
 
+void print_convolutional_cfg(FILE *fp, convolutional_layer *l)
+{
+    int i;
+    fprintf(fp, "[convolutional]\n"
+                "height=%d\n"
+                "width=%d\n"
+                "channels=%d\n"
+                "filters=%d\n"
+                "size=%d\n"
+                "stride=%d\n"
+                "activation=%s\n",
+                l->h, l->w, l->c,
+                l->n, l->size, l->stride,
+                get_activation_string(l->activation));
+    fprintf(fp, "data=");
+    for(i = 0; i < l->n; ++i) fprintf(fp, "%g,", l->biases[i]);
+    for(i = 0; i < l->n*l->c*l->size*l->size; ++i) fprintf(fp, "%g,", l->filters[i]);
+    fprintf(fp, "\n\n");
+}
+void print_connected_cfg(FILE *fp, connected_layer *l)
+{
+    int i;
+    fprintf(fp, "[connected]\n"
+                "input=%d\n"
+                "output=%d\n"
+                "activation=%s\n",
+                l->inputs, l->outputs,
+                get_activation_string(l->activation));
+    fprintf(fp, "data=");
+    for(i = 0; i < l->outputs; ++i) fprintf(fp, "%g,", l->biases[i]);
+    for(i = 0; i < l->inputs*l->outputs; ++i) fprintf(fp, "%g,", l->weights[i]);
+    fprintf(fp, "\n\n");
+}
+
+void print_maxpool_cfg(FILE *fp, maxpool_layer *l)
+{
+    fprintf(fp, "[maxpool]\n"
+                "height=%d\n"
+                "width=%d\n"
+                "channels=%d\n"
+                "stride=%d\n\n",
+                l->h, l->w, l->c,
+                l->stride);
+}
+
+void print_softmax_cfg(FILE *fp, softmax_layer *l)
+{
+    fprintf(fp, "[softmax]\n"
+                "input=%d\n\n",
+                l->inputs);
+}
+
+void save_network(network net, char *filename)
+{
+    FILE *fp = fopen(filename, "w");
+    if(!fp) file_error(filename);
+    int i;
+    for(i = 0; i < net.n; ++i)
+    {
+        if(net.types[i] == CONVOLUTIONAL)
+            print_convolutional_cfg(fp, (convolutional_layer *)net.layers[i]);
+        else if(net.types[i] == CONNECTED)
+            print_connected_cfg(fp, (connected_layer *)net.layers[i]);
+        else if(net.types[i] == MAXPOOL)
+            print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i]);
+        else if(net.types[i] == SOFTMAX)
+            print_softmax_cfg(fp, (softmax_layer *)net.layers[i]);
+    }
+    fclose(fp);
+}
+
 void forward_network(network net, float *input)
 {
     int i;
@@ -64,7 +135,7 @@
         }
         else if(net.types[i] == CONNECTED){
             connected_layer layer = *(connected_layer *)net.layers[i];
-            update_connected_layer(layer, step, momentum, 0);
+            update_connected_layer(layer, step, momentum, decay);
         }
     }
 }
@@ -121,9 +192,11 @@
     float *out = get_network_output(net);
     int i, k = get_network_output_size(net);
     for(i = 0; i < k; ++i){
+        printf("%f, ", out[i]);
         delta[i] = truth[i] - out[i];
         sum += delta[i]*delta[i];
     }
+    printf("\n");
     return sum;
 }
 
@@ -173,25 +246,31 @@
 
 float train_network_datum(network net, float *x, float *y, float step, float momentum, float decay)
 {
-        forward_network(net, x);
-        int class = get_predicted_class_network(net);
-        float error = backward_network(net, x, y);
-        update_network(net, step, momentum, decay);
-        //return (y[class]?1:0);
-        return error;
+    forward_network(net, x);
+    //int class = get_predicted_class_network(net);
+    float error = backward_network(net, x, y);
+    update_network(net, step, momentum, decay);
+    //return (y[class]?1:0);
+    return error;
 }
 
 float train_network_sgd(network net, data d, int n, float step, float momentum,float decay)
 {
     int i;
     float error = 0;
+    int correct = 0;
     for(i = 0; i < n; ++i){
         int index = rand()%d.X.rows;
         error += train_network_datum(net, d.X.vals[index], d.y.vals[index], step, momentum, decay);
+        float *y = d.y.vals[index];
+        int class = get_predicted_class_network(net);
+        correct += (y[class]?1:0);
+        //printf("%d %f %f\n", i,net.output[0], d.y.vals[index][0]);
         //if((i+1)%10 == 0){
         //    printf("%d: %f\n", (i+1), (float)correct/(i+1));
         //}
     }
+    printf("Accuracy: %f\n",(float) correct/n);
     return error/n;
 }
 float train_network_batch(network net, data d, int n, float step, float momentum,float decay)
diff --git a/src/network.h b/src/network.h
index 17cc10b..a8b2860 100644
--- a/src/network.h
+++ b/src/network.h
@@ -40,6 +40,7 @@
 int get_predicted_class_network(network net);
 void print_network(network net);
 void visualize_network(network net);
+void save_network(network net, char *filename);
 
 #endif
 
diff --git a/src/option_list.c b/src/option_list.c
index 7902cd9..bb8b710 100644
--- a/src/option_list.c
+++ b/src/option_list.c
@@ -3,12 +3,6 @@
 #include <string.h>
 #include "option_list.h"
 
-typedef struct{
-    char *key;
-    char *val;
-    int used;
-} kvp;
-
 void option_insert(list *l, char *key, char *val)
 {
     kvp *p = malloc(sizeof(kvp));
@@ -47,7 +41,7 @@
 {
     char *v = option_find(l, key);
     if(v) return v;
-    fprintf(stderr, "%s: Using default '%s'\n", key, def);
+    if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def);
     return def;
 }
 
diff --git a/src/option_list.h b/src/option_list.h
index 60e37fe..26cd36f 100644
--- a/src/option_list.h
+++ b/src/option_list.h
@@ -2,6 +2,13 @@
 #define OPTION_LIST_H
 #include "list.h"
 
+typedef struct{
+    char *key;
+    char *val;
+    int used;
+} kvp;
+
+
 void option_insert(list *l, char *key, char *val);
 char *option_find(list *l, char *key);
 char *option_find_str(list *l, char *key, char *def);
diff --git a/src/parser.c b/src/parser.c
index eeb6f93..cf35a94 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -23,6 +23,130 @@
 int is_softmax(section *s);
 list *read_cfg(char *filename);
 
+void free_section(section *s)
+{
+    free(s->type);
+    node *n = s->options->front;
+    while(n){
+        kvp *pair = (kvp *)n->val;
+        free(pair->key);
+        free(pair);
+        node *next = n->next;
+        free(n);
+        n = next;
+    }
+    free(s->options);
+    free(s);
+}
+
+convolutional_layer *parse_convolutional(list *options, network net, int count)
+{
+    int i;
+    int h,w,c;
+    int n = option_find_int(options, "filters",1);
+    int size = option_find_int(options, "size",1);
+    int stride = option_find_int(options, "stride",1);
+    char *activation_s = option_find_str(options, "activation", "sigmoid");
+    ACTIVATION activation = get_activation(activation_s);
+    if(count == 0){
+        h = option_find_int(options, "height",1);
+        w = option_find_int(options, "width",1);
+        c = option_find_int(options, "channels",1);
+    }else{
+        image m =  get_network_image_layer(net, count-1);
+        h = m.h;
+        w = m.w;
+        c = m.c;
+        if(h == 0) error("Layer before convolutional layer must output image.");
+    }
+    convolutional_layer *layer = make_convolutional_layer(h,w,c,n,size,stride, activation);
+    char *data = option_find_str(options, "data", 0);
+    if(data){
+        char *curr = data;
+        char *next = data;
+        for(i = 0; i < n; ++i){
+            while(*++next !='\0' && *next != ',');
+            *next = '\0';
+            sscanf(curr, "%g", &layer->biases[i]);
+            curr = next+1;
+        }
+        for(i = 0; i < c*n*size*size; ++i){
+            while(*++next !='\0' && *next != ',');
+            *next = '\0';
+            sscanf(curr, "%g", &layer->filters[i]);
+            curr = next+1;
+        }
+    }
+    option_unused(options);
+    return layer;
+}
+
+connected_layer *parse_connected(list *options, network net, int count)
+{
+    int i;
+    int input;
+    int output = option_find_int(options, "output",1);
+    char *activation_s = option_find_str(options, "activation", "sigmoid");
+    ACTIVATION activation = get_activation(activation_s);
+    if(count == 0){
+        input = option_find_int(options, "input",1);
+    }else{
+        input =  get_network_output_size_layer(net, count-1);
+    }
+    connected_layer *layer = make_connected_layer(input, output, activation);
+    char *data = option_find_str(options, "data", 0);
+    if(data){
+        char *curr = data;
+        char *next = data;
+        for(i = 0; i < output; ++i){
+            while(*++next !='\0' && *next != ',');
+            *next = '\0';
+            sscanf(curr, "%g", &layer->biases[i]);
+            curr = next+1;
+        }
+        for(i = 0; i < input*output; ++i){
+            while(*++next !='\0' && *next != ',');
+            *next = '\0';
+            sscanf(curr, "%g", &layer->weights[i]);
+            curr = next+1;
+        }
+    }
+    option_unused(options);
+    return layer;
+}
+
+softmax_layer *parse_softmax(list *options, network net, int count)
+{
+    int input;
+    if(count == 0){
+        input = option_find_int(options, "input",1);
+    }else{
+        input =  get_network_output_size_layer(net, count-1);
+    }
+    softmax_layer *layer = make_softmax_layer(input);
+    option_unused(options);
+    return layer;
+}
+
+maxpool_layer *parse_maxpool(list *options, network net, int count)
+{
+    int h,w,c;
+    int stride = option_find_int(options, "stride",1);
+    if(count == 0){
+        h = option_find_int(options, "height",1);
+        w = option_find_int(options, "width",1);
+        c = option_find_int(options, "channels",1);
+    }else{
+        image m =  get_network_image_layer(net, count-1);
+        h = m.h;
+        w = m.w;
+        c = m.c;
+        if(h == 0) error("Layer before convolutional layer must output image.");
+    }
+    maxpool_layer *layer = make_maxpool_layer(h,w,c,stride);
+    option_unused(options);
+    return layer;
+}
 
 network parse_network_cfg(char *filename)
 {
@@ -35,78 +159,29 @@
         section *s = (section *)n->val;
         list *options = s->options;
         if(is_convolutional(s)){
-            int h,w,c;
-            int n = option_find_int(options, "filters",1);
-            int size = option_find_int(options, "size",1);
-            int stride = option_find_int(options, "stride",1);
-            char *activation_s = option_find_str(options, "activation", "sigmoid");
-            ACTIVATION activation = get_activation(activation_s);
-            if(count == 0){
-                h = option_find_int(options, "height",1);
-                w = option_find_int(options, "width",1);
-                c = option_find_int(options, "channels",1);
-            }else{
-                image m =  get_network_image_layer(net, count-1);
-                h = m.h;
-                w = m.w;
-                c = m.c;
-                if(h == 0) error("Layer before convolutional layer must output image.");
-            }
-            convolutional_layer *layer = make_convolutional_layer(h,w,c,n,size,stride, activation);
+            convolutional_layer *layer = parse_convolutional(options, net, count);
             net.types[count] = CONVOLUTIONAL;
             net.layers[count] = layer;
-            option_unused(options);
-        }
-        else if(is_connected(s)){
-            int input;
-            int output = option_find_int(options, "output",1);
-            char *activation_s = option_find_str(options, "activation", "sigmoid");
-            ACTIVATION activation = get_activation(activation_s);
-            if(count == 0){
-                input = option_find_int(options, "input",1);
-            }else{
-                input =  get_network_output_size_layer(net, count-1);
-            }
-            connected_layer *layer = make_connected_layer(input, output, activation);
+        }else if(is_connected(s)){
+            connected_layer *layer = parse_connected(options, net, count);
             net.types[count] = CONNECTED;
             net.layers[count] = layer;
-            option_unused(options);
         }else if(is_softmax(s)){
-            int input;
-            if(count == 0){
-                input = option_find_int(options, "input",1);
-            }else{
-                input =  get_network_output_size_layer(net, count-1);
-            }
-            softmax_layer *layer = make_softmax_layer(input);
+            softmax_layer *layer = parse_softmax(options, net, count);
             net.types[count] = SOFTMAX;
             net.layers[count] = layer;
-            option_unused(options);
         }else if(is_maxpool(s)){
-            int h,w,c;
-            int stride = option_find_int(options, "stride",1);
-            //char *activation_s = option_find_str(options, "activation", "sigmoid");
-            if(count == 0){
-                h = option_find_int(options, "height",1);
-                w = option_find_int(options, "width",1);
-                c = option_find_int(options, "channels",1);
-            }else{
-                image m =  get_network_image_layer(net, count-1);
-                h = m.h;
-                w = m.w;
-                c = m.c;
-                if(h == 0) error("Layer before convolutional layer must output image.");
-            }
-            maxpool_layer *layer = make_maxpool_layer(h,w,c,stride);
+            maxpool_layer *layer = parse_maxpool(options, net, count);
             net.types[count] = MAXPOOL;
             net.layers[count] = layer;
-            option_unused(options);
         }else{
             fprintf(stderr, "Type not recognized: %s\n", s->type);
         }
+        free_section(s);
         ++count;
         n = n->next;
     }   
+    free_list(sections);
     net.outputs = get_network_output_size(net);
     net.output = get_network_output(net);
     return net;
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 1e01bd2..79375de 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -36,8 +36,11 @@
     }
     for(i = 0; i < layer.inputs; ++i){
         sum += exp(input[i]-largest);
+        printf("%f, ", input[i]);
     }
-    sum = largest+log(sum);
+    printf("\n");
+    if(sum) sum = largest+log(sum);
+    else sum = largest-100;
     for(i = 0; i < layer.inputs; ++i){
         layer.output[i] = exp(input[i]-sum);
     }
diff --git a/src/tests.c b/src/tests.c
index 00cd1a1..09ec7b2 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -19,7 +19,7 @@
 
 void test_convolve()
 {
-    image dog = load_image("dog.jpg");
+    image dog = load_image("dog.jpg",300,400);
     printf("dog channels %d\n", dog.c);
     image kernel = make_random_image(3,3,dog.c);
     image edge = make_image(dog.h, dog.w, 1);
@@ -35,7 +35,7 @@
 
 void test_convolve_matrix()
 {
-    image dog = load_image("dog.jpg");
+    image dog = load_image("dog.jpg",300,400);
     printf("dog channels %d\n", dog.c);
     
     int size = 11;
@@ -64,7 +64,7 @@
 
 void test_color()
 {
-    image dog = load_image("test_color.png");
+    image dog = load_image("test_color.png", 300, 400);
     show_image_layers(dog, "Test Color");
 }
 
@@ -124,13 +124,13 @@
 
 void test_load()
 {
-    image dog = load_image("dog.jpg");
+    image dog = load_image("dog.jpg", 300, 400);
     show_image(dog, "Test Load");
     show_image_layers(dog, "Test Load");
 }
 void test_upsample()
 {
-    image dog = load_image("dog.jpg");
+    image dog = load_image("dog.jpg", 300, 400);
     int n = 3;
     image up = make_image(n*dog.h, n*dog.w, dog.c);
     upsample_image(dog, n, up);
@@ -141,7 +141,7 @@
 void test_rotate()
 {
     int i;
-    image dog = load_image("dog.jpg");
+    image dog = load_image("dog.jpg",300,400);
     clock_t start = clock(), end;
     for(i = 0; i < 1001; ++i){
         rotate_image(dog);
@@ -184,24 +184,39 @@
 void test_data()
 {
     char *labels[] = {"cat","dog"};
-    data train = load_data_image_pathfile_random("train_paths.txt", 101,labels, 2);
+    data train = load_data_image_pathfile_random("train_paths.txt", 101,labels, 2, 300, 400);
     free_data(train);
 }
 
 void test_full()
 {
     network net = parse_network_cfg("full.cfg");
-    srand(0);
-    int i = 0;
+    srand(2222222);
+    int i = 800;
     char *labels[] = {"cat","dog"};
     float lr = .00001;
     float momentum = .9;
     float decay = 0.01;
     while(i++ < 1000 || 1){
-        data train = load_data_image_pathfile_random("train_paths.txt", 1000, labels, 2);
-        train_network(net, train, lr, momentum, decay);
+        visualize_network(net);
+        cvWaitKey(100);
+        data train = load_data_image_pathfile_random("train_paths.txt", 1000, labels, 2, 256, 256);
+        image im = float_to_image(256, 256, 3,train.X.vals[0]);
+        show_image(im, "input");
+        cvWaitKey(100);
+        //scale_data_rows(train, 1./255.);
+        normalize_data_rows(train);
+        clock_t start = clock(), end;
+        float loss = train_network_sgd(net, train, 100, lr, momentum, decay);
+        end = clock();
+        printf("%d: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", i, loss, (float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
         free_data(train);
-        printf("Round %d\n", i);
+        if(i%100==0){
+            char buff[256];
+            sprintf(buff, "backup_%d.cfg", i);
+            //save_network(net, buff);
+        }
+        //lr *= .99;
     }
 }
 
@@ -218,7 +233,7 @@
     int count = 0;
     float lr = .0005;
     float momentum = .9;
-    float decay = 0.01;
+    float decay = 0.001;
     clock_t start = clock(), end;
     while(++count <= 100){
         //visualize_network(net);
@@ -227,7 +242,7 @@
         end = clock();
         printf("Time: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC);
         start=end;
-        cvWaitKey(100);
+        //cvWaitKey(100);
         //lr /= 2; 
         if(count%5 == 0){
             float train_acc = network_accuracy(net, train);
@@ -235,7 +250,7 @@
             float test_acc = network_accuracy(net, test);
             fprintf(stderr, "TEST: %f\n\n", test_acc);
             printf("%d, %f, %f\n", count, train_acc, test_acc);
-            lr *= .5;
+            //lr *= .5;
         }
     }
 }
@@ -345,7 +360,38 @@
     int i;
     for(i = 0; i < 1000; ++i){
         im2col_cpu(test.data,  c,  h,  w,  size,  stride, matrix);
-        image render = float_to_image(mh, mw, mc, matrix);
+        //image render = float_to_image(mh, mw, mc, matrix);
+    }
+}
+
+void train_VOC()
+{
+    network net = parse_network_cfg("cfg/voc_backup_ramp_80.cfg");
+    srand(2222222);
+    int i = 0;
+    char *labels[] = {"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"};
+    float lr = .00001;
+    float momentum = .9;
+    float decay = 0.01;
+    while(i++ < 1000 || 1){
+        visualize_network(net);
+        cvWaitKey(100);
+        data train = load_data_image_pathfile_random("images/VOC2012/train_paths.txt", 1000, labels, 20, 300, 400);
+        image im = float_to_image(300, 400, 3,train.X.vals[0]);
+        show_image(im, "input");
+        cvWaitKey(100);
+        normalize_data_rows(train);
+        clock_t start = clock(), end;
+        float loss = train_network_sgd(net, train, 1000, lr, momentum, decay);
+        end = clock();
+        printf("%d: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", i, loss, (float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
+        free_data(train);
+        if(i%10==0){
+            char buff[256];
+            sprintf(buff, "cfg/voc_backup_ramp_%d.cfg", i);
+            save_network(net, buff);
+        }
+        //lr *= .99;
     }
 }
 
@@ -358,8 +404,9 @@
     //    test_im2row();
     //test_split();
     //test_ensemble();
-    test_nist();
+    //test_nist();
     //test_full();
+    train_VOC();
     //test_random_preprocess();
     //test_random_classify();
     //test_parser();
diff --git a/src/utils.c b/src/utils.c
index 41ee768..67a9ba1 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -216,6 +216,10 @@
     for(i = 0; i < 12; ++i) sum += (float)rand()/RAND_MAX;
     return sum-6.;
 }
+float rand_uniform()
+{
+    return (float)rand()/RAND_MAX;
+}
 
 float **one_hot_encode(float *a, int n, int k)
 {
diff --git a/src/utils.h b/src/utils.h
index 8185107..6fe0343 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -20,6 +20,7 @@
 int max_index(float *a, int n);
 float constrain(float a, float max);
 float rand_normal();
+float rand_uniform();
 float mean_array(float *a, int n);
 float variance_array(float *a, int n);
 float **one_hot_encode(float *a, int n, int k);
diff --git a/test.cfg b/test.cfg
deleted file mode 100644
index fdbcc10..0000000
--- a/test.cfg
+++ /dev/null
@@ -1,37 +0,0 @@
-[conv]
-width=200
-height=200
-channels=3
-filters=10
-size=15
-stride=16
-activation=relu
-
-#[maxpool]
-#stride=2
-
-#[conv]
-#filters=10
-#size=10
-#stride=4
-#activation=relu
-
-#[maxpool]
-#stride=2
-
-#[conv]
-#filters=10
-#size=10
-#stride=4
-#activation=relu
-
-#[maxpool]
-#stride=2
-
-[conn]
-output = 10
-activation=relu
-
-[conn]
-output = 1
-activation=relu

--
Gitblit v1.10.0